mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Fix alignment and vector checks in debug data
* Update token alignment check to use Example alignment * Update missing vector check further related to changes in v3
This commit is contained in:
parent
8656a08777
commit
20e18cc246
|
@ -504,13 +504,18 @@ def _compile_gold(
|
|||
for eg in examples:
|
||||
gold = eg.reference
|
||||
doc = eg.predicted
|
||||
valid_words = [x for x in gold if x is not None]
|
||||
valid_words = [x.text for x in gold]
|
||||
data["words"].update(valid_words)
|
||||
data["n_words"] += len(valid_words)
|
||||
data["n_misaligned_words"] += len(gold) - len(valid_words)
|
||||
align = eg.alignment
|
||||
for token in doc:
|
||||
if token.orth_.isspace():
|
||||
continue
|
||||
if align.x2y.lengths[token.i] != 1:
|
||||
data["n_misaligned_words"] += 1
|
||||
data["texts"].add(doc.text)
|
||||
if len(nlp.vocab.vectors):
|
||||
for word in valid_words:
|
||||
for word in [t.text for t in doc]:
|
||||
if nlp.vocab.strings[word] not in nlp.vocab.vectors:
|
||||
data["words_missing_vectors"].update([word])
|
||||
if "ner" in factory_names:
|
||||
|
|
Loading…
Reference in New Issue
Block a user