diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 9a2e51d84..48dcb9483 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -142,6 +142,7 @@ def read_json_file(loc, docs_filter=None): heads = [] labels = [] ner = [] + wsd = [] for i, token in enumerate(sent['tokens']): words.append(token['orth']) ids.append(i) @@ -152,8 +153,9 @@ def read_json_file(loc, docs_filter=None): if labels[-1].lower() == 'root': labels[-1] = 'ROOT' ner.append(token.get('ner', '-')) + wsd.append(token.get('senses', [])) sents.append(( - (ids, words, tags, heads, labels, ner), + (ids, words, tags, heads, labels, ner, wsd), sent.get('brackets', []))) if sents: yield (paragraph.get('raw', None), sents) diff --git a/spacy/scorer.py b/spacy/scorer.py index 7172b93a4..f1341418e 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -73,8 +73,8 @@ class Scorer(object): gold_deps = set() gold_tags = set() - gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot])) - for id_, word, tag, head, dep, ner in gold.orig_annot: + gold_ents = set(tags_to_entities([annot[5] for annot in gold.orig_annot])) + for id_, word, tag, head, dep, ner, wsd in gold.orig_annot: gold_tags.add((id_, tag)) if dep.lower() not in ('p', 'punct'): gold_deps.add((id_, head, dep.lower()))