Call morph morphology in GoldParse

This commit is contained in:
Matthew Honnibal 2018-09-25 21:34:53 +02:00
parent 2ba10493f7
commit fb0abddd9e
3 changed files with 8 additions and 8 deletions

View File

@ -74,7 +74,7 @@ def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False,
head = int(head)-1 if head != '0' else id_
sent['words'].append(word)
sent['tags'].append(tag)
sent['morph'].append(_parse_morph_string(morph))
sent['morphology'].append(_parse_morph_string(morph))
sent['heads'].append(head)
sent['deps'].append('ROOT' if dep == 'root' else dep)
sent['spaces'].append(space_after == '_')

View File

@ -24,7 +24,7 @@ cdef class GoldParse:
cdef public int loss
cdef public list words
cdef public list tags
cdef public list morph
cdef public list morphology
cdef public list heads
cdef public list labels
cdef public dict orths

View File

@ -399,7 +399,7 @@ cdef class GoldParse:
return cls(doc, words=words, tags=tags, heads=heads, deps=deps,
entities=entities, make_projective=make_projective)
def __init__(self, doc, annot_tuples=None, words=None, tags=None, morph=None,
def __init__(self, doc, annot_tuples=None, words=None, tags=None, morphology=None,
heads=None, deps=None, entities=None, make_projective=False,
cats=None, **_):
"""Create a GoldParse.
@ -436,8 +436,8 @@ cdef class GoldParse:
deps = [None for _ in doc]
if entities is None:
entities = [None for _ in doc]
if morph is None:
morph = [None for _ in doc]
if morphology is None:
morphology = [None for _ in doc]
elif len(entities) == 0:
entities = ['O' for _ in doc]
elif not isinstance(entities[0], basestring):
@ -462,7 +462,7 @@ cdef class GoldParse:
self.heads = [None] * len(doc)
self.labels = [None] * len(doc)
self.ner = [None] * len(doc)
self.morph = [None] * len(doc)
self.morphology = [None] * len(doc)
# This needs to be done before we align the words
if make_projective and heads is not None and deps is not None:
@ -490,12 +490,12 @@ cdef class GoldParse:
self.heads[i] = None
self.labels[i] = None
self.ner[i] = 'O'
self.morph[i] = set()
self.morphology[i] = set()
if gold_i is None:
if i in i2j_multi:
self.words[i] = words[i2j_multi[i]]
self.tags[i] = tags[i2j_multi[i]]
self.morph[i] = morph[i2j_multi[i]]
self.morphology[i] = morphology[i2j_multi[i]]
is_last = i2j_multi[i] != i2j_multi.get(i+1)
is_first = i2j_multi[i] != i2j_multi.get(i-1)
# Set next word in multi-token span as head, until last