mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 13:40:34 +03:00
* Add Token.conjuncts property
This commit is contained in:
parent
4757899370
commit
f7ffd94e6a
|
@ -11,6 +11,7 @@ from .typedefs cimport LEMMA
|
||||||
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||||
from .typedefs cimport POS, LEMMA, TAG, DEP
|
from .typedefs cimport POS, LEMMA, TAG, DEP
|
||||||
from .parts_of_speech import UNIV_POS_NAMES
|
from .parts_of_speech import UNIV_POS_NAMES
|
||||||
|
from .parts_of_speech cimport CONJ, PUNCT
|
||||||
from .lexeme cimport check_flag
|
from .lexeme cimport check_flag
|
||||||
from .spans import Span
|
from .spans import Span
|
||||||
from .structs cimport UniStr
|
from .structs cimport UniStr
|
||||||
|
@ -539,6 +540,27 @@ cdef class Token:
|
||||||
self.c + self.c.head, self.i + self.c.head, self.array_len,
|
self.c + self.c.head, self.i + self.c.head, self.array_len,
|
||||||
self._seq)
|
self._seq)
|
||||||
|
|
||||||
|
property conjuncts:
|
||||||
|
def __get__(self):
|
||||||
|
"""Get a list of conjoined words"""
|
||||||
|
cdef Token word
|
||||||
|
conjs = []
|
||||||
|
if self.c.pos != CONJ and self.c.pos != PUNCT:
|
||||||
|
seen_conj = False
|
||||||
|
for word in reversed(list(self.lefts)):
|
||||||
|
if word.c.pos == CONJ:
|
||||||
|
seen_conj = True
|
||||||
|
elif seen_conj and word.c.pos == self.c.pos:
|
||||||
|
conjs.append(word)
|
||||||
|
conjs.reverse()
|
||||||
|
conjs.append(self)
|
||||||
|
if seen_conj:
|
||||||
|
return conjs
|
||||||
|
elif self is not self.head and self in self.head.conjuncts:
|
||||||
|
return self.head.conjuncts
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
property ent_type:
|
property ent_type:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.ent_type
|
return self.c.ent_type
|
||||||
|
|
34
tests/test_conjuncts.py
Normal file
34
tests/test_conjuncts.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
"""Test the Token.conjuncts property"""
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from spacy.en import English
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
NLU = English()
|
||||||
|
|
||||||
|
def orths(tokens):
|
||||||
|
return [t.orth_ for t in tokens]
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple_two():
|
||||||
|
tokens = NLU('I lost money and pride.')
|
||||||
|
pride = tokens[4]
|
||||||
|
assert orths(pride.conjuncts) == ['money', 'pride']
|
||||||
|
money = tokens[2]
|
||||||
|
assert orths(money.conjuncts) == ['money', 'pride']
|
||||||
|
|
||||||
|
|
||||||
|
def test_comma_three():
|
||||||
|
tokens = NLU('I found my wallet, phone and keys.')
|
||||||
|
keys = tokens[-2]
|
||||||
|
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
|
||||||
|
wallet = tokens[3]
|
||||||
|
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
|
||||||
|
|
||||||
|
|
||||||
|
def test_and_three():
|
||||||
|
tokens = NLU('I found my wallet and phone and keys.')
|
||||||
|
keys = tokens[-2]
|
||||||
|
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
|
||||||
|
wallet = tokens[3]
|
||||||
|
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
|
Loading…
Reference in New Issue
Block a user