diff --git a/spacy/lang/el/syntax_iterators.py b/spacy/lang/el/syntax_iterators.py index f02619ac9..5d6398aad 100644 --- a/spacy/lang/el/syntax_iterators.py +++ b/spacy/lang/el/syntax_iterators.py @@ -31,16 +31,15 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): - continue flag = False if word.pos == NOUN: # check for patterns such as γραμμή παραγωγής for potential_nmod in word.rights: if potential_nmod.dep == nmod: - seen.update( - j for j in range(word.left_edge.i, potential_nmod.i + 1) - ) + w_range = range(word.left_edge.i, potential_nmod.i + 1) + if any(j in seen for j in w_range): + continue + seen.update(j for j in w_range) yield word.left_edge.i, potential_nmod.i + 1, np_label flag = True break @@ -54,9 +53,10 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.i + 1, np_label diff --git a/spacy/lang/en/syntax_iterators.py b/spacy/lang/en/syntax_iterators.py index 22f7fcf81..0d43ebf37 100644 --- a/spacy/lang/en/syntax_iterators.py +++ b/spacy/lang/en/syntax_iterators.py @@ -36,9 +36,10 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(j in seen for j in range(word.left_edge.i, word.i + 1)): + w_range = range(word.left_edge.i, word.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.i + 1, np_label elif word.dep == conj: head = word.head @@ -46,9 +47,10 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(j in seen for j in range(word.left_edge.i, word.i + 1)): + w_range = range(word.left_edge.i, word.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.i + 1, np_label diff --git a/spacy/lang/fa/syntax_iterators.py b/spacy/lang/fa/syntax_iterators.py index 5ff848124..0d43ebf37 100644 --- a/spacy/lang/fa/syntax_iterators.py +++ b/spacy/lang/fa/syntax_iterators.py @@ -36,9 +36,10 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.i + 1, np_label elif word.dep == conj: head = word.head @@ -46,9 +47,10 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.i + 1, np_label diff --git a/spacy/lang/fr/syntax_iterators.py b/spacy/lang/fr/syntax_iterators.py index 9495dcf1e..91b338eb3 100644 --- a/spacy/lang/fr/syntax_iterators.py +++ b/spacy/lang/fr/syntax_iterators.py @@ -35,9 +35,10 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label elif word.dep == conj: head = word.head @@ -45,9 +46,10 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label diff --git a/spacy/lang/id/syntax_iterators.py b/spacy/lang/id/syntax_iterators.py index 9495dcf1e..91b338eb3 100644 --- a/spacy/lang/id/syntax_iterators.py +++ b/spacy/lang/id/syntax_iterators.py @@ -35,9 +35,10 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label elif word.dep == conj: head = word.head @@ -45,9 +46,10 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label diff --git a/spacy/lang/nb/syntax_iterators.py b/spacy/lang/nb/syntax_iterators.py index 9495dcf1e..91b338eb3 100644 --- a/spacy/lang/nb/syntax_iterators.py +++ b/spacy/lang/nb/syntax_iterators.py @@ -35,9 +35,10 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label elif word.dep == conj: head = word.head @@ -45,9 +46,10 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label diff --git a/spacy/lang/sv/syntax_iterators.py b/spacy/lang/sv/syntax_iterators.py index 148884efe..31e3302e9 100644 --- a/spacy/lang/sv/syntax_iterators.py +++ b/spacy/lang/sv/syntax_iterators.py @@ -36,9 +36,10 @@ def noun_chunks(obj): if word.i in seen: continue if word.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label elif word.dep == conj: head = word.head @@ -46,9 +47,10 @@ def noun_chunks(obj): head = head.head # If the head is an NP, and we're coordinated to it, we're an NP if head.dep in np_deps: - if any(w.i in seen for w in word.subtree): + w_range = range(word.left_edge.i, word.right_edge.i + 1) + if any(j in seen for j in w_range): continue - seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) + seen.update(j for j in w_range) yield word.left_edge.i, word.right_edge.i + 1, np_label