mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
* Add Token.conjuncts property
This commit is contained in:
parent
4757899370
commit
f7ffd94e6a
|
@ -11,6 +11,7 @@ from .typedefs cimport LEMMA
|
|||
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||
from .typedefs cimport POS, LEMMA, TAG, DEP
|
||||
from .parts_of_speech import UNIV_POS_NAMES
|
||||
from .parts_of_speech cimport CONJ, PUNCT
|
||||
from .lexeme cimport check_flag
|
||||
from .spans import Span
|
||||
from .structs cimport UniStr
|
||||
|
@ -538,6 +539,27 @@ cdef class Token:
|
|||
return Token.cinit(self.vocab, self._string,
|
||||
self.c + self.c.head, self.i + self.c.head, self.array_len,
|
||||
self._seq)
|
||||
|
||||
property conjuncts:
|
||||
def __get__(self):
|
||||
"""Get a list of conjoined words"""
|
||||
cdef Token word
|
||||
conjs = []
|
||||
if self.c.pos != CONJ and self.c.pos != PUNCT:
|
||||
seen_conj = False
|
||||
for word in reversed(list(self.lefts)):
|
||||
if word.c.pos == CONJ:
|
||||
seen_conj = True
|
||||
elif seen_conj and word.c.pos == self.c.pos:
|
||||
conjs.append(word)
|
||||
conjs.reverse()
|
||||
conjs.append(self)
|
||||
if seen_conj:
|
||||
return conjs
|
||||
elif self is not self.head and self in self.head.conjuncts:
|
||||
return self.head.conjuncts
|
||||
else:
|
||||
return []
|
||||
|
||||
property ent_type:
|
||||
def __get__(self):
|
||||
|
|
34
tests/test_conjuncts.py
Normal file
34
tests/test_conjuncts.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
"""Test the Token.conjuncts property"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.en import English
|
||||
import pytest
|
||||
|
||||
NLU = English()
|
||||
|
||||
def orths(tokens):
|
||||
return [t.orth_ for t in tokens]
|
||||
|
||||
|
||||
def test_simple_two():
|
||||
tokens = NLU('I lost money and pride.')
|
||||
pride = tokens[4]
|
||||
assert orths(pride.conjuncts) == ['money', 'pride']
|
||||
money = tokens[2]
|
||||
assert orths(money.conjuncts) == ['money', 'pride']
|
||||
|
||||
|
||||
def test_comma_three():
|
||||
tokens = NLU('I found my wallet, phone and keys.')
|
||||
keys = tokens[-2]
|
||||
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
|
||||
wallet = tokens[3]
|
||||
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
|
||||
|
||||
|
||||
def test_and_three():
|
||||
tokens = NLU('I found my wallet and phone and keys.')
|
||||
keys = tokens[-2]
|
||||
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
|
||||
wallet = tokens[3]
|
||||
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
|
Loading…
Reference in New Issue
Block a user