add fix for German noun chunk iterator (issue #365)

This commit is contained in:
Wolfgang Seeker 2016-05-06 01:41:26 +02:00
parent 8c0888d6cb
commit 7b78239436

View File

@ -36,12 +36,12 @@ def german_noun_chunks(doc):
for i, word in enumerate(doc): for i, word in enumerate(doc):
if i < rbracket: if i < rbracket:
continue continue
if word.pos == (NOUN, PROPN, PRON) and word.dep in np_deps: if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
rbracket = word.i+1 rbracket = word.i+1
# try to extend the span to the right # try to extend the span to the right
# to capture close apposition/measurement constructions # to capture close apposition/measurement constructions
for rdep in doc[word.i].rights: for rdep in doc[word.i].rights:
if rdep.pos == NOUN and rdep.dep == close_app: if rdep.pos in (NOUN, PROPN) and rdep.dep == close_app:
rbracket = rdep.i+1 rbracket = rdep.i+1
yield word.left_edge.i, rbracket, np_label yield word.left_edge.i, rbracket, np_label