fix: use actual range in 'seen' instead of subtree

This commit is contained in:
svlandeg 2020-05-20 23:06:39 +02:00
parent 36a94c409a
commit b509a3e7fc
2 changed files with 3 additions and 3 deletions

View File

@ -36,7 +36,7 @@ def noun_chunks(obj):
if word.i in seen:
continue
if word.dep in np_deps:
if any(w.i in seen for w in word.subtree):
if any(j in seen for j in range(word.left_edge.i, word.i + 1)):
continue
seen.update(j for j in range(word.left_edge.i, word.i + 1))
yield word.left_edge.i, word.i + 1, np_label
@ -46,7 +46,7 @@ def noun_chunks(obj):
head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps:
if any(w.i in seen for w in word.subtree):
if any(j in seen for j in range(word.left_edge.i, word.i + 1)):
continue
seen.update(j for j in range(word.left_edge.i, word.i + 1))
yield word.left_edge.i, word.i + 1, np_label

View File

@ -418,7 +418,7 @@ class Language(object):
def __call__(self, text, disable=[], component_cfg=None):
"""Apply the pipeline to some text. The text can span multiple sentences,
and can contain arbtrary whitespace. Alignment into the original string
and can contain arbitrary whitespace. Alignment into the original string
is preserved.
text (unicode): The text to be processed.