mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-05 13:43:24 +03:00
* Fix Issue #365: Error introduced during noun phrase chunking, due to use of corrected PRON/PROPN/etc tags.
This commit is contained in:
parent
41342ca79b
commit
bb94022975
|
@ -1,4 +1,4 @@
|
||||||
from spacy.parts_of_speech cimport NOUN
|
from spacy.parts_of_speech cimport NOUN, PROPN, PRON
|
||||||
|
|
||||||
|
|
||||||
def english_noun_chunks(doc):
|
def english_noun_chunks(doc):
|
||||||
|
@ -9,7 +9,7 @@ def english_noun_chunks(doc):
|
||||||
np_label = doc.vocab.strings['NP']
|
np_label = doc.vocab.strings['NP']
|
||||||
for i in range(len(doc)):
|
for i in range(len(doc)):
|
||||||
word = doc[i]
|
word = doc[i]
|
||||||
if word.pos == NOUN and word.dep in np_deps:
|
if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
|
||||||
yield word.left_edge.i, word.i+1, np_label
|
yield word.left_edge.i, word.i+1, np_label
|
||||||
elif word.pos == NOUN and word.dep == conj:
|
elif word.pos == NOUN and word.dep == conj:
|
||||||
head = word.head
|
head = word.head
|
||||||
|
@ -36,7 +36,7 @@ def german_noun_chunks(doc):
|
||||||
for i, word in enumerate(doc):
|
for i, word in enumerate(doc):
|
||||||
if i < rbracket:
|
if i < rbracket:
|
||||||
continue
|
continue
|
||||||
if word.pos == NOUN and word.dep in np_deps:
|
if word.pos == (NOUN, PROPN, PRON) and word.dep in np_deps:
|
||||||
rbracket = word.i+1
|
rbracket = word.i+1
|
||||||
# try to extend the span to the right
|
# try to extend the span to the right
|
||||||
# to capture close apposition/measurement constructions
|
# to capture close apposition/measurement constructions
|
||||||
|
|
Loading…
Reference in New Issue
Block a user