fix: use actual range in 'seen' instead of subtree

This commit is contained in:
svlandeg 2020-05-20 23:06:39 +02:00
parent 36a94c409a
commit b509a3e7fc
2 changed files with 3 additions and 3 deletions

View File

@ -36,7 +36,7 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(w.i in seen for w in word.subtree): if any(j in seen for j in range(word.left_edge.i, word.i + 1)):
continue continue
seen.update(j for j in range(word.left_edge.i, word.i + 1)) seen.update(j for j in range(word.left_edge.i, word.i + 1))
yield word.left_edge.i, word.i + 1, np_label yield word.left_edge.i, word.i + 1, np_label
@ -46,7 +46,7 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(w.i in seen for w in word.subtree): if any(j in seen for j in range(word.left_edge.i, word.i + 1)):
continue continue
seen.update(j for j in range(word.left_edge.i, word.i + 1)) seen.update(j for j in range(word.left_edge.i, word.i + 1))
yield word.left_edge.i, word.i + 1, np_label yield word.left_edge.i, word.i + 1, np_label

View File

@ -418,7 +418,7 @@ class Language(object):
def __call__(self, text, disable=[], component_cfg=None): def __call__(self, text, disable=[], component_cfg=None):
"""Apply the pipeline to some text. The text can span multiple sentences, """Apply the pipeline to some text. The text can span multiple sentences,
and can contain arbtrary whitespace. Alignment into the original string and can contain arbitrary whitespace. Alignment into the original string
is preserved. is preserved.
text (unicode): The text to be processed. text (unicode): The text to be processed.