mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #5693 from svlandeg/bugfix/nel-v3
This commit is contained in:
		
						commit
						8a5b9a6d5f
					
				| 
						 | 
					@ -556,8 +556,8 @@ class Errors(object):
 | 
				
			||||||
    E979 = ("Cannot convert {type} to an Example object.")
 | 
					    E979 = ("Cannot convert {type} to an Example object.")
 | 
				
			||||||
    E980 = ("Each link annotation should refer to a dictionary with at most one "
 | 
					    E980 = ("Each link annotation should refer to a dictionary with at most one "
 | 
				
			||||||
            "identifier mapping to 1.0, and all others to 0.0.")
 | 
					            "identifier mapping to 1.0, and all others to 0.0.")
 | 
				
			||||||
    E981 = ("The offsets of the annotations for 'links' need to refer exactly "
 | 
					    E981 = ("The offsets of the annotations for 'links' could not be aligned "
 | 
				
			||||||
            "to the offsets of the 'entities' annotations.")
 | 
					            "to token boundaries.")
 | 
				
			||||||
    E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
 | 
					    E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
 | 
				
			||||||
            "into {values}, but found {value}.")
 | 
					            "into {values}, but found {value}.")
 | 
				
			||||||
    E983 = ("Invalid key for '{dict}': {key}. Available keys: "
 | 
					    E983 = ("Invalid key for '{dict}': {key}. Available keys: "
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -235,10 +235,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
 | 
				
			||||||
            if key == "entities":
 | 
					            if key == "entities":
 | 
				
			||||||
                pass
 | 
					                pass
 | 
				
			||||||
            elif key == "links":
 | 
					            elif key == "links":
 | 
				
			||||||
                entities = doc_annot.get("entities", {})
 | 
					                ent_kb_ids = _parse_links(vocab, tok_annot["ORTH"], tok_annot["SPACY"], value)
 | 
				
			||||||
                if not entities:
 | 
					 | 
				
			||||||
                    raise ValueError(Errors.E981)
 | 
					 | 
				
			||||||
                ent_kb_ids = _parse_links(vocab, tok_annot["ORTH"], value, entities)
 | 
					 | 
				
			||||||
                tok_annot["ENT_KB_ID"] = ent_kb_ids
 | 
					                tok_annot["ENT_KB_ID"] = ent_kb_ids
 | 
				
			||||||
            elif key == "cats":
 | 
					            elif key == "cats":
 | 
				
			||||||
                pass
 | 
					                pass
 | 
				
			||||||
| 
						 | 
					@ -381,18 +378,11 @@ def _parse_ner_tags(biluo_or_offsets, vocab, words, spaces):
 | 
				
			||||||
                ent_types.append("")
 | 
					                ent_types.append("")
 | 
				
			||||||
    return ent_iobs, ent_types
 | 
					    return ent_iobs, ent_types
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _parse_links(vocab, words, links, entities):
 | 
					def _parse_links(vocab, words, spaces, links):
 | 
				
			||||||
    reference = Doc(vocab, words=words)
 | 
					    reference = Doc(vocab, words=words, spaces=spaces)
 | 
				
			||||||
    starts = {token.idx: token.i for token in reference}
 | 
					    starts = {token.idx: token.i for token in reference}
 | 
				
			||||||
    ends = {token.idx + len(token): token.i for token in reference}
 | 
					    ends = {token.idx + len(token): token.i for token in reference}
 | 
				
			||||||
    ent_kb_ids = ["" for _ in reference]
 | 
					    ent_kb_ids = ["" for _ in reference]
 | 
				
			||||||
    entity_map = [(ent[0], ent[1]) for ent in entities]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # links annotations need to refer 1-1 to entity annotations - throw error otherwise
 | 
					 | 
				
			||||||
    for index, annot_dict in links.items():
 | 
					 | 
				
			||||||
        start_char, end_char = index
 | 
					 | 
				
			||||||
        if (start_char, end_char) not in entity_map:
 | 
					 | 
				
			||||||
            raise ValueError(Errors.E981)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for index, annot_dict in links.items():
 | 
					    for index, annot_dict in links.items():
 | 
				
			||||||
        true_kb_ids = []
 | 
					        true_kb_ids = []
 | 
				
			||||||
| 
						 | 
					@ -406,6 +396,8 @@ def _parse_links(vocab, words, links, entities):
 | 
				
			||||||
            start_char, end_char = index
 | 
					            start_char, end_char = index
 | 
				
			||||||
            start_token = starts.get(start_char)
 | 
					            start_token = starts.get(start_char)
 | 
				
			||||||
            end_token = ends.get(end_char)
 | 
					            end_token = ends.get(end_char)
 | 
				
			||||||
 | 
					            if start_token is None or end_token is None:
 | 
				
			||||||
 | 
					                raise ValueError(Errors.E981)
 | 
				
			||||||
            for i in range(start_token, end_token+1):
 | 
					            for i in range(start_token, end_token+1):
 | 
				
			||||||
                ent_kb_ids[i] = true_kb_ids[0]
 | 
					                ent_kb_ids[i] = true_kb_ids[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -230,8 +230,7 @@ def test_Example_from_dict_with_links(annots):
 | 
				
			||||||
    [
 | 
					    [
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "words": ["I", "like", "New", "York", "and", "Berlin", "."],
 | 
					            "words": ["I", "like", "New", "York", "and", "Berlin", "."],
 | 
				
			||||||
            "entities": [(7, 15, "LOC"), (20, 26, "LOC")],
 | 
					            "links": {(7, 14): {"Q7381115": 1.0, "Q2146908": 0.0}},
 | 
				
			||||||
            "links": {(0, 1): {"Q7381115": 1.0, "Q2146908": 0.0}},
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user