mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 21:50:35 +03:00
Call morph morphology in GoldParse
This commit is contained in:
parent
2ba10493f7
commit
fb0abddd9e
|
@ -74,7 +74,7 @@ def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False,
|
||||||
head = int(head)-1 if head != '0' else id_
|
head = int(head)-1 if head != '0' else id_
|
||||||
sent['words'].append(word)
|
sent['words'].append(word)
|
||||||
sent['tags'].append(tag)
|
sent['tags'].append(tag)
|
||||||
sent['morph'].append(_parse_morph_string(morph))
|
sent['morphology'].append(_parse_morph_string(morph))
|
||||||
sent['heads'].append(head)
|
sent['heads'].append(head)
|
||||||
sent['deps'].append('ROOT' if dep == 'root' else dep)
|
sent['deps'].append('ROOT' if dep == 'root' else dep)
|
||||||
sent['spaces'].append(space_after == '_')
|
sent['spaces'].append(space_after == '_')
|
||||||
|
|
|
@ -24,7 +24,7 @@ cdef class GoldParse:
|
||||||
cdef public int loss
|
cdef public int loss
|
||||||
cdef public list words
|
cdef public list words
|
||||||
cdef public list tags
|
cdef public list tags
|
||||||
cdef public list morph
|
cdef public list morphology
|
||||||
cdef public list heads
|
cdef public list heads
|
||||||
cdef public list labels
|
cdef public list labels
|
||||||
cdef public dict orths
|
cdef public dict orths
|
||||||
|
|
|
@ -399,7 +399,7 @@ cdef class GoldParse:
|
||||||
return cls(doc, words=words, tags=tags, heads=heads, deps=deps,
|
return cls(doc, words=words, tags=tags, heads=heads, deps=deps,
|
||||||
entities=entities, make_projective=make_projective)
|
entities=entities, make_projective=make_projective)
|
||||||
|
|
||||||
def __init__(self, doc, annot_tuples=None, words=None, tags=None, morph=None,
|
def __init__(self, doc, annot_tuples=None, words=None, tags=None, morphology=None,
|
||||||
heads=None, deps=None, entities=None, make_projective=False,
|
heads=None, deps=None, entities=None, make_projective=False,
|
||||||
cats=None, **_):
|
cats=None, **_):
|
||||||
"""Create a GoldParse.
|
"""Create a GoldParse.
|
||||||
|
@ -436,8 +436,8 @@ cdef class GoldParse:
|
||||||
deps = [None for _ in doc]
|
deps = [None for _ in doc]
|
||||||
if entities is None:
|
if entities is None:
|
||||||
entities = [None for _ in doc]
|
entities = [None for _ in doc]
|
||||||
if morph is None:
|
if morphology is None:
|
||||||
morph = [None for _ in doc]
|
morphology = [None for _ in doc]
|
||||||
elif len(entities) == 0:
|
elif len(entities) == 0:
|
||||||
entities = ['O' for _ in doc]
|
entities = ['O' for _ in doc]
|
||||||
elif not isinstance(entities[0], basestring):
|
elif not isinstance(entities[0], basestring):
|
||||||
|
@ -462,7 +462,7 @@ cdef class GoldParse:
|
||||||
self.heads = [None] * len(doc)
|
self.heads = [None] * len(doc)
|
||||||
self.labels = [None] * len(doc)
|
self.labels = [None] * len(doc)
|
||||||
self.ner = [None] * len(doc)
|
self.ner = [None] * len(doc)
|
||||||
self.morph = [None] * len(doc)
|
self.morphology = [None] * len(doc)
|
||||||
|
|
||||||
# This needs to be done before we align the words
|
# This needs to be done before we align the words
|
||||||
if make_projective and heads is not None and deps is not None:
|
if make_projective and heads is not None and deps is not None:
|
||||||
|
@ -490,12 +490,12 @@ cdef class GoldParse:
|
||||||
self.heads[i] = None
|
self.heads[i] = None
|
||||||
self.labels[i] = None
|
self.labels[i] = None
|
||||||
self.ner[i] = 'O'
|
self.ner[i] = 'O'
|
||||||
self.morph[i] = set()
|
self.morphology[i] = set()
|
||||||
if gold_i is None:
|
if gold_i is None:
|
||||||
if i in i2j_multi:
|
if i in i2j_multi:
|
||||||
self.words[i] = words[i2j_multi[i]]
|
self.words[i] = words[i2j_multi[i]]
|
||||||
self.tags[i] = tags[i2j_multi[i]]
|
self.tags[i] = tags[i2j_multi[i]]
|
||||||
self.morph[i] = morph[i2j_multi[i]]
|
self.morphology[i] = morphology[i2j_multi[i]]
|
||||||
is_last = i2j_multi[i] != i2j_multi.get(i+1)
|
is_last = i2j_multi[i] != i2j_multi.get(i+1)
|
||||||
is_first = i2j_multi[i] != i2j_multi.get(i-1)
|
is_first = i2j_multi[i] != i2j_multi.get(i-1)
|
||||||
# Set next word in multi-token span as head, until last
|
# Set next word in multi-token span as head, until last
|
||||||
|
|
Loading…
Reference in New Issue
Block a user