mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 21:57:15 +03:00
* Read in new WSD gold data
This commit is contained in:
parent
333e414e9f
commit
dbcef2b76e
|
@ -142,6 +142,7 @@ def read_json_file(loc, docs_filter=None):
|
||||||
heads = []
|
heads = []
|
||||||
labels = []
|
labels = []
|
||||||
ner = []
|
ner = []
|
||||||
|
wsd = []
|
||||||
for i, token in enumerate(sent['tokens']):
|
for i, token in enumerate(sent['tokens']):
|
||||||
words.append(token['orth'])
|
words.append(token['orth'])
|
||||||
ids.append(i)
|
ids.append(i)
|
||||||
|
@ -152,8 +153,9 @@ def read_json_file(loc, docs_filter=None):
|
||||||
if labels[-1].lower() == 'root':
|
if labels[-1].lower() == 'root':
|
||||||
labels[-1] = 'ROOT'
|
labels[-1] = 'ROOT'
|
||||||
ner.append(token.get('ner', '-'))
|
ner.append(token.get('ner', '-'))
|
||||||
|
wsd.append(token.get('senses', []))
|
||||||
sents.append((
|
sents.append((
|
||||||
(ids, words, tags, heads, labels, ner),
|
(ids, words, tags, heads, labels, ner, wsd),
|
||||||
sent.get('brackets', [])))
|
sent.get('brackets', [])))
|
||||||
if sents:
|
if sents:
|
||||||
yield (paragraph.get('raw', None), sents)
|
yield (paragraph.get('raw', None), sents)
|
||||||
|
|
|
@ -73,8 +73,8 @@ class Scorer(object):
|
||||||
|
|
||||||
gold_deps = set()
|
gold_deps = set()
|
||||||
gold_tags = set()
|
gold_tags = set()
|
||||||
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
gold_ents = set(tags_to_entities([annot[5] for annot in gold.orig_annot]))
|
||||||
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
for id_, word, tag, head, dep, ner, wsd in gold.orig_annot:
|
||||||
gold_tags.add((id_, tag))
|
gold_tags.add((id_, tag))
|
||||||
if dep.lower() not in ('p', 'punct'):
|
if dep.lower() not in ('p', 'punct'):
|
||||||
gold_deps.add((id_, head, dep.lower()))
|
gold_deps.add((id_, head, dep.lower()))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user