Fix token.conjuncts (closes #795) (#3392)

* Implement conjuncts method

* Add span.conjuncts property

* Un-xfail token.conjuncts tests

* Update docs for token.conjuncts and span.conjuncts

* Fix merge error in token.conjuncts
This commit is contained in:
Matthew Honnibal 2019-03-11 17:05:45 +01:00 committed by Ines Montani
parent e2b9b523ce
commit b0b990e405
5 changed files with 47 additions and 14 deletions

View File

@ -201,7 +201,6 @@ def test_token0_has_sent_start_true():
assert not doc.is_sentenced
@pytest.mark.xfail
def test_token_api_conjuncts_chain(en_vocab):
words = "The boy and the girl and the man went .".split()
heads = [1, 7, -1, 1, -3, -1, 1, -3, 0, -1]
@ -212,7 +211,6 @@ def test_token_api_conjuncts_chain(en_vocab):
assert [w.text for w in doc[7].conjuncts] == ["boy", "girl"]
@pytest.mark.xfail
def test_token_api_conjuncts_simple(en_vocab):
words = "They came and went .".split()
heads = [1, 0, -1, -2, -1]
@ -222,7 +220,6 @@ def test_token_api_conjuncts_simple(en_vocab):
assert [w.text for w in doc[3].conjuncts] == ["came"]
@pytest.mark.xfail
def test_token_api_non_conjuncts(en_vocab):
words = "They came .".split()
heads = [1, 0, -1]

View File

@ -527,6 +527,16 @@ cdef class Span:
else:
return self.doc[root]
@property
def conjuncts(self):
"""Tokens that are conjoined to the span's root.
RETURNS (tuple): A tuple of Token objects.
DOCS: https://spacy.io/api/span#lefts
"""
return self.root.conjuncts
@property
def lefts(self):
"""Tokens that are to the left of the span, whose head is within the

View File

@ -689,19 +689,27 @@ cdef class Token:
def conjuncts(self):
"""A sequence of coordinated tokens, including the token itself.
YIELDS (Token): A coordinated token.
RETURNS (tuple): The coordinated tokens.
DOCS: https://spacy.io/api/token#conjuncts
"""
cdef Token word
cdef Token word, child
if "conjuncts" in self.doc.user_token_hooks:
yield from self.doc.user_token_hooks["conjuncts"](self)
else:
if self.dep != conj:
for word in self.rights:
if word.dep == conj:
yield word
yield from word.conjuncts
return tuple(self.doc.user_token_hooks["conjuncts"](self))
start = self
while start.i != start.head.i:
if start.dep == conj:
start = start.head
else:
break
queue = [start]
output = [start]
for word in queue:
for child in word.rights:
if child.c.dep == conj:
output.append(child)
queue.append(child)
return tuple([w for w in output if w.i != self.i])
property ent_type:
"""RETURNS (uint64): Named entity type."""

View File

@ -316,6 +316,24 @@ taken.
| ----------- | ------- | --------------- |
| **RETURNS** | `Token` | The root token. |
## Span.conjuncts {#conjuncts tag="property" model="parser"}
A tuple of tokens coordinated to `span.root`.
> #### Example
>
> ```python
> doc = nlp(u"I like apples and oranges")
> apples_conjuncts = doc[2:3].conjuncts
> assert [t.text for t in apples_conjuncts] == [u"oranges"]
> ```
| Name | Type | Description |
| ---------- | ------- | -------------------- |
| **RETURNS** | `tuple` | The coordinated tokens. |
## Span.lefts {#lefts tag="property" model="parser"}
Tokens that are to the left of the span, whose heads are within the span.

View File

@ -211,7 +211,7 @@ The rightmost token of this token's syntactic descendants.
## Token.conjuncts {#conjuncts tag="property" model="parser"}
A sequence of coordinated tokens, including the token itself.
A tuple of coordinated tokens, not including the token itself.
> #### Example
>
@ -223,7 +223,7 @@ A sequence of coordinated tokens, including the token itself.
| Name | Type | Description |
| ---------- | ------- | -------------------- |
| **YIELDS** | `Token` | A coordinated token. |
| **RETURNS** | `tuple` | The coordinated tokens. |
## Token.children {#children tag="property" model="parser"}