mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #6571 from adrianeboyd/bugfix/debug-data-missing-vectors
Fix alignment and vector checks in debug data
This commit is contained in:
		
						commit
						3f90bffa27
					
				| 
						 | 
					@ -504,13 +504,18 @@ def _compile_gold(
 | 
				
			||||||
    for eg in examples:
 | 
					    for eg in examples:
 | 
				
			||||||
        gold = eg.reference
 | 
					        gold = eg.reference
 | 
				
			||||||
        doc = eg.predicted
 | 
					        doc = eg.predicted
 | 
				
			||||||
        valid_words = [x for x in gold if x is not None]
 | 
					        valid_words = [x.text for x in gold]
 | 
				
			||||||
        data["words"].update(valid_words)
 | 
					        data["words"].update(valid_words)
 | 
				
			||||||
        data["n_words"] += len(valid_words)
 | 
					        data["n_words"] += len(valid_words)
 | 
				
			||||||
        data["n_misaligned_words"] += len(gold) - len(valid_words)
 | 
					        align = eg.alignment
 | 
				
			||||||
 | 
					        for token in doc:
 | 
				
			||||||
 | 
					            if token.orth_.isspace():
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            if align.x2y.lengths[token.i] != 1:
 | 
				
			||||||
 | 
					                data["n_misaligned_words"] += 1
 | 
				
			||||||
        data["texts"].add(doc.text)
 | 
					        data["texts"].add(doc.text)
 | 
				
			||||||
        if len(nlp.vocab.vectors):
 | 
					        if len(nlp.vocab.vectors):
 | 
				
			||||||
            for word in valid_words:
 | 
					            for word in [t.text for t in doc]:
 | 
				
			||||||
                if nlp.vocab.strings[word] not in nlp.vocab.vectors:
 | 
					                if nlp.vocab.strings[word] not in nlp.vocab.vectors:
 | 
				
			||||||
                    data["words_missing_vectors"].update([word])
 | 
					                    data["words_missing_vectors"].update([word])
 | 
				
			||||||
        if "ner" in factory_names:
 | 
					        if "ner" in factory_names:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user