mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Improve attribute handlign in doc.merge(). Still unsatisfying
This commit is contained in:
parent
cfff4e0f61
commit
9720103428
|
@ -668,12 +668,15 @@ cdef class Doc:
|
|||
attributes[LEMMA] = self.vocab.strings[lemma]
|
||||
attributes[ENT_TYPE] = self.vocab.strings[ent_type]
|
||||
elif not args:
|
||||
if "label" in attributes and ENT_TYPE not in attributes:
|
||||
# TODO: This code makes little sense overall. We're still
|
||||
# ignoring most of the attributes?
|
||||
if "label" in attributes and 'ent_type' not in attributes:
|
||||
if type(attributes["label"]) == int:
|
||||
attributes[ENT_TYPE] = attributes["label"]
|
||||
else:
|
||||
attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]]
|
||||
|
||||
if 'ent_type' in attributes:
|
||||
attributes[ENT_TYPE] = attributes['ent_type']
|
||||
elif args:
|
||||
raise ValueError(
|
||||
"Doc.merge received %d non-keyword arguments. "
|
||||
|
@ -693,6 +696,9 @@ cdef class Doc:
|
|||
tag = self.vocab.strings[attributes.get(TAG, span.root.tag)]
|
||||
lemma = self.vocab.strings[attributes.get(LEMMA, span.root.lemma)]
|
||||
ent_type = self.vocab.strings[attributes.get(ENT_TYPE, span.root.ent_type)]
|
||||
ent_id = attributes.get('ent_id', span.root.ent_id)
|
||||
if not isinstance(ent_id, int):
|
||||
ent_id = self.vocab.strings[ent_id]
|
||||
|
||||
# Get LexemeC for newly merged token
|
||||
new_orth = ''.join([t.text_with_ws for t in span])
|
||||
|
@ -713,6 +719,7 @@ cdef class Doc:
|
|||
else:
|
||||
token.ent_iob = 3
|
||||
token.ent_type = self.vocab.strings[ent_type]
|
||||
token.ent_id = ent_id
|
||||
# Begin by setting all the head indices to absolute token positions
|
||||
# This is easier to work with for now than the offsets
|
||||
# Before thinking of something simpler, beware the case where a dependency
|
||||
|
|
Loading…
Reference in New Issue
Block a user