mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Improve attribute handlign in doc.merge(). Still unsatisfying
This commit is contained in:
parent
cfff4e0f61
commit
9720103428
|
@ -668,12 +668,15 @@ cdef class Doc:
|
||||||
attributes[LEMMA] = self.vocab.strings[lemma]
|
attributes[LEMMA] = self.vocab.strings[lemma]
|
||||||
attributes[ENT_TYPE] = self.vocab.strings[ent_type]
|
attributes[ENT_TYPE] = self.vocab.strings[ent_type]
|
||||||
elif not args:
|
elif not args:
|
||||||
if "label" in attributes and ENT_TYPE not in attributes:
|
# TODO: This code makes little sense overall. We're still
|
||||||
|
# ignoring most of the attributes?
|
||||||
|
if "label" in attributes and 'ent_type' not in attributes:
|
||||||
if type(attributes["label"]) == int:
|
if type(attributes["label"]) == int:
|
||||||
attributes[ENT_TYPE] = attributes["label"]
|
attributes[ENT_TYPE] = attributes["label"]
|
||||||
else:
|
else:
|
||||||
attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]]
|
attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]]
|
||||||
|
if 'ent_type' in attributes:
|
||||||
|
attributes[ENT_TYPE] = attributes['ent_type']
|
||||||
elif args:
|
elif args:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Doc.merge received %d non-keyword arguments. "
|
"Doc.merge received %d non-keyword arguments. "
|
||||||
|
@ -693,6 +696,9 @@ cdef class Doc:
|
||||||
tag = self.vocab.strings[attributes.get(TAG, span.root.tag)]
|
tag = self.vocab.strings[attributes.get(TAG, span.root.tag)]
|
||||||
lemma = self.vocab.strings[attributes.get(LEMMA, span.root.lemma)]
|
lemma = self.vocab.strings[attributes.get(LEMMA, span.root.lemma)]
|
||||||
ent_type = self.vocab.strings[attributes.get(ENT_TYPE, span.root.ent_type)]
|
ent_type = self.vocab.strings[attributes.get(ENT_TYPE, span.root.ent_type)]
|
||||||
|
ent_id = attributes.get('ent_id', span.root.ent_id)
|
||||||
|
if not isinstance(ent_id, int):
|
||||||
|
ent_id = self.vocab.strings[ent_id]
|
||||||
|
|
||||||
# Get LexemeC for newly merged token
|
# Get LexemeC for newly merged token
|
||||||
new_orth = ''.join([t.text_with_ws for t in span])
|
new_orth = ''.join([t.text_with_ws for t in span])
|
||||||
|
@ -713,6 +719,7 @@ cdef class Doc:
|
||||||
else:
|
else:
|
||||||
token.ent_iob = 3
|
token.ent_iob = 3
|
||||||
token.ent_type = self.vocab.strings[ent_type]
|
token.ent_type = self.vocab.strings[ent_type]
|
||||||
|
token.ent_id = ent_id
|
||||||
# Begin by setting all the head indices to absolute token positions
|
# Begin by setting all the head indices to absolute token positions
|
||||||
# This is easier to work with for now than the offsets
|
# This is easier to work with for now than the offsets
|
||||||
# Before thinking of something simpler, beware the case where a dependency
|
# Before thinking of something simpler, beware the case where a dependency
|
||||||
|
|
Loading…
Reference in New Issue
Block a user