mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
fix parsing entity links in new gold format
This commit is contained in:
parent
a4cfe9fc33
commit
f503817623
|
@ -556,8 +556,6 @@ class Errors(object):
|
||||||
E979 = ("Cannot convert {type} to an Example object.")
|
E979 = ("Cannot convert {type} to an Example object.")
|
||||||
E980 = ("Each link annotation should refer to a dictionary with at most one "
|
E980 = ("Each link annotation should refer to a dictionary with at most one "
|
||||||
"identifier mapping to 1.0, and all others to 0.0.")
|
"identifier mapping to 1.0, and all others to 0.0.")
|
||||||
E981 = ("The offsets of the annotations for 'links' need to refer exactly "
|
|
||||||
"to the offsets of the 'entities' annotations.")
|
|
||||||
E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
|
E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
|
||||||
"into {values}, but found {value}.")
|
"into {values}, but found {value}.")
|
||||||
E983 = ("Invalid key for '{dict}': {key}. Available keys: "
|
E983 = ("Invalid key for '{dict}': {key}. Available keys: "
|
||||||
|
|
|
@ -235,10 +235,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
|
||||||
if key == "entities":
|
if key == "entities":
|
||||||
pass
|
pass
|
||||||
elif key == "links":
|
elif key == "links":
|
||||||
entities = doc_annot.get("entities", {})
|
ent_kb_ids = _parse_links(vocab, tok_annot["ORTH"], tok_annot["SPACY"], value)
|
||||||
if not entities:
|
|
||||||
raise ValueError(Errors.E981)
|
|
||||||
ent_kb_ids = _parse_links(vocab, tok_annot["ORTH"], value, entities)
|
|
||||||
tok_annot["ENT_KB_ID"] = ent_kb_ids
|
tok_annot["ENT_KB_ID"] = ent_kb_ids
|
||||||
elif key == "cats":
|
elif key == "cats":
|
||||||
pass
|
pass
|
||||||
|
@ -381,18 +378,11 @@ def _parse_ner_tags(biluo_or_offsets, vocab, words, spaces):
|
||||||
ent_types.append("")
|
ent_types.append("")
|
||||||
return ent_iobs, ent_types
|
return ent_iobs, ent_types
|
||||||
|
|
||||||
def _parse_links(vocab, words, links, entities):
|
def _parse_links(vocab, words, spaces, links):
|
||||||
reference = Doc(vocab, words=words)
|
reference = Doc(vocab, words=words, spaces=spaces)
|
||||||
starts = {token.idx: token.i for token in reference}
|
starts = {token.idx: token.i for token in reference}
|
||||||
ends = {token.idx + len(token): token.i for token in reference}
|
ends = {token.idx + len(token): token.i for token in reference}
|
||||||
ent_kb_ids = ["" for _ in reference]
|
ent_kb_ids = ["" for _ in reference]
|
||||||
entity_map = [(ent[0], ent[1]) for ent in entities]
|
|
||||||
|
|
||||||
# links annotations need to refer 1-1 to entity annotations - throw error otherwise
|
|
||||||
for index, annot_dict in links.items():
|
|
||||||
start_char, end_char = index
|
|
||||||
if (start_char, end_char) not in entity_map:
|
|
||||||
raise ValueError(Errors.E981)
|
|
||||||
|
|
||||||
for index, annot_dict in links.items():
|
for index, annot_dict in links.items():
|
||||||
true_kb_ids = []
|
true_kb_ids = []
|
||||||
|
|
Loading…
Reference in New Issue
Block a user