* Read in new WSD gold data

This commit is contained in:
Matthew Honnibal 2015-07-03 04:43:23 +02:00
parent 333e414e9f
commit dbcef2b76e
2 changed files with 5 additions and 3 deletions

View File

@ -142,6 +142,7 @@ def read_json_file(loc, docs_filter=None):
heads = []
labels = []
ner = []
wsd = []
for i, token in enumerate(sent['tokens']):
words.append(token['orth'])
ids.append(i)
@ -152,8 +153,9 @@ def read_json_file(loc, docs_filter=None):
if labels[-1].lower() == 'root':
labels[-1] = 'ROOT'
ner.append(token.get('ner', '-'))
wsd.append(token.get('senses', []))
sents.append((
(ids, words, tags, heads, labels, ner),
(ids, words, tags, heads, labels, ner, wsd),
sent.get('brackets', [])))
if sents:
yield (paragraph.get('raw', None), sents)

View File

@ -73,8 +73,8 @@ class Scorer(object):
gold_deps = set()
gold_tags = set()
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
for id_, word, tag, head, dep, ner in gold.orig_annot:
gold_ents = set(tags_to_entities([annot[5] for annot in gold.orig_annot]))
for id_, word, tag, head, dep, ner, wsd in gold.orig_annot:
gold_tags.add((id_, tag))
if dep.lower() not in ('p', 'punct'):
gold_deps.add((id_, head, dep.lower()))