Fix span boundary handling in Spanish noun_chunks (#5860)

This commit is contained in:
Adriane Boyd 2020-08-03 13:53:15 +02:00 committed by GitHub
parent ac14ce7c30
commit cd59979ab4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -20,8 +20,7 @@ def noun_chunks(doclike):
np_left_deps = [doc.vocab.strings.add(label) for label in left_labels] np_left_deps = [doc.vocab.strings.add(label) for label in left_labels]
np_right_deps = [doc.vocab.strings.add(label) for label in right_labels] np_right_deps = [doc.vocab.strings.add(label) for label in right_labels]
stop_deps = [doc.vocab.strings.add(label) for label in stop_labels] stop_deps = [doc.vocab.strings.add(label) for label in stop_labels]
token = doc[0] for token in doclike:
while token and token.i < len(doclike):
if token.pos in [PROPN, NOUN, PRON]: if token.pos in [PROPN, NOUN, PRON]:
left, right = noun_bounds( left, right = noun_bounds(
doc, token, np_left_deps, np_right_deps, stop_deps doc, token, np_left_deps, np_right_deps, stop_deps