* Add Token.conjuncts property

This commit is contained in:
Matthew Honnibal 2015-04-17 01:40:53 +02:00
parent 4757899370
commit f7ffd94e6a
2 changed files with 56 additions and 0 deletions

View File

@ -11,6 +11,7 @@ from .typedefs cimport LEMMA
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
from .typedefs cimport POS, LEMMA, TAG, DEP from .typedefs cimport POS, LEMMA, TAG, DEP
from .parts_of_speech import UNIV_POS_NAMES from .parts_of_speech import UNIV_POS_NAMES
from .parts_of_speech cimport CONJ, PUNCT
from .lexeme cimport check_flag from .lexeme cimport check_flag
from .spans import Span from .spans import Span
from .structs cimport UniStr from .structs cimport UniStr
@ -539,6 +540,27 @@ cdef class Token:
self.c + self.c.head, self.i + self.c.head, self.array_len, self.c + self.c.head, self.i + self.c.head, self.array_len,
self._seq) self._seq)
property conjuncts:
def __get__(self):
"""Get a list of conjoined words"""
cdef Token word
conjs = []
if self.c.pos != CONJ and self.c.pos != PUNCT:
seen_conj = False
for word in reversed(list(self.lefts)):
if word.c.pos == CONJ:
seen_conj = True
elif seen_conj and word.c.pos == self.c.pos:
conjs.append(word)
conjs.reverse()
conjs.append(self)
if seen_conj:
return conjs
elif self is not self.head and self in self.head.conjuncts:
return self.head.conjuncts
else:
return []
property ent_type: property ent_type:
def __get__(self): def __get__(self):
return self.c.ent_type return self.c.ent_type

34
tests/test_conjuncts.py Normal file
View File

@ -0,0 +1,34 @@
"""Test the Token.conjuncts property"""
from __future__ import unicode_literals
from spacy.en import English
import pytest
NLU = English()
def orths(tokens):
return [t.orth_ for t in tokens]
def test_simple_two():
tokens = NLU('I lost money and pride.')
pride = tokens[4]
assert orths(pride.conjuncts) == ['money', 'pride']
money = tokens[2]
assert orths(money.conjuncts) == ['money', 'pride']
def test_comma_three():
tokens = NLU('I found my wallet, phone and keys.')
keys = tokens[-2]
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
wallet = tokens[3]
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
def test_and_three():
tokens = NLU('I found my wallet and phone and keys.')
keys = tokens[-2]
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
wallet = tokens[3]
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']