From b509a3e7fcadf84c257c1e5168b6dc926b8b2f3d Mon Sep 17 00:00:00 2001 From: svlandeg Date: Wed, 20 May 2020 23:06:39 +0200 Subject: [PATCH] fix: use actual range in 'seen' instead of subtree --- spacy/lang/en/syntax_iterators.py | 4 ++-- spacy/language.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/lang/en/syntax_iterators.py b/spacy/lang/en/syntax_iterators.py index 5ff848124..22f7fcf81 100644 --- a/spacy/lang/en/syntax_iterators.py +++ b/spacy/lang/en/syntax_iterators.py @@ -36,7 +36,7 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): + if any(j in seen for j in range(word.left_edge.i, word.i + 1)): continue seen.update(j for j in range(word.left_edge.i, word.i + 1)) yield word.left_edge.i, word.i + 1, np_label @@ -46,7 +46,7 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + if any(j in seen for j in range(word.left_edge.i, word.i + 1)): continue seen.update(j for j in range(word.left_edge.i, word.i + 1)) yield word.left_edge.i, word.i + 1, np_label diff --git a/spacy/language.py b/spacy/language.py index 703806627..c4eb26bad 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -418,7 +418,7 @@ class Language(object): def __call__(self, text, disable=[], component_cfg=None): """Apply the pipeline to some text. The text can span multiple sentences, - and can contain arbtrary whitespace. Alignment into the original string + and can contain arbitrary whitespace. Alignment into the original string is preserved. text (unicode): The text to be processed.