diff --git a/spacy/lang/en/syntax_iterators.py b/spacy/lang/en/syntax_iterators.py index 5ff848124..22f7fcf81 100644 --- a/spacy/lang/en/syntax_iterators.py +++ b/spacy/lang/en/syntax_iterators.py @@ -36,7 +36,7 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): + if any(j in seen for j in range(word.left_edge.i, word.i + 1)): continue seen.update(j for j in range(word.left_edge.i, word.i + 1)) yield word.left_edge.i, word.i + 1, np_label @@ -46,7 +46,7 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + if any(j in seen for j in range(word.left_edge.i, word.i + 1)): continue seen.update(j for j in range(word.left_edge.i, word.i + 1)) yield word.left_edge.i, word.i + 1, np_label diff --git a/spacy/language.py b/spacy/language.py index 703806627..c4eb26bad 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -418,7 +418,7 @@ class Language(object): def __call__(self, text, disable=[], component_cfg=None): """Apply the pipeline to some text. The text can span multiple sentences, - and can contain arbtrary whitespace. Alignment into the original string + and can contain arbitrary whitespace. Alignment into the original string is preserved. text (unicode): The text to be processed.