Allow German noun chunks to work on Span

Update the German noun chunks iterator, so that it also works on Span objects.
2026-01-01 22:43:10 +03:00 · 2016-11-24 23:30:15 +11:00 · 2016-11-24 23:30:15 +11:00 · b8c4f5ea76
commit b8c4f5ea76
parent 3e3bda142d
1 changed files with 6 additions and 3 deletions
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@ -2,9 +2,11 @@ from spacy.parts_of_speech cimport NOUN, PROPN, PRON


 def english_noun_chunks(obj):
+    '''Detect base noun phrases from a dependency parse.
+    Works on both Doc and Span.'''
    labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
              'attr', 'ROOT', 'root']
-    doc = obj.doc
+    doc = obj.doc # Ensure works on both Doc and Span.
    np_deps = [doc.vocab.strings[label] for label in labels]
    conj = doc.vocab.strings['conj']
    np_label = doc.vocab.strings['NP']
@ -26,14 +28,15 @@ def english_noun_chunks(obj):
 # extended to the right of the NOUN
 # example: "eine Tasse Tee" (a cup (of) tea) returns "eine Tasse Tee" and not
 # just "eine Tasse", same for "das Thema Familie"
-def german_noun_chunks(doc):
+def german_noun_chunks(obj):
    labels = ['sb', 'oa', 'da', 'nk', 'mo', 'ag', 'ROOT', 'root', 'cj', 'pd', 'og', 'app']
+    doc = obj.doc # Ensure works on both Doc and Span.
    np_label = doc.vocab.strings['NP']
    np_deps = set(doc.vocab.strings[label] for label in labels)
    close_app = doc.vocab.strings['nk']

    rbracket = 0
-    for i, word in enumerate(doc):
+    for i, word in enumerate(obj):
        if i < rbracket:
            continue
        if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps: