Fix token.conjuncts (closes #795) (#3392)

* Implement conjuncts method

* Add span.conjuncts property

* Un-xfail token.conjuncts tests

* Update docs for token.conjuncts and span.conjuncts

* Fix merge error in token.conjuncts
This commit is contained in:
Matthew Honnibal 2019-03-11 17:05:45 +01:00 committed by Ines Montani
parent e2b9b523ce
commit b0b990e405
5 changed files with 47 additions and 14 deletions

View File

@ -201,7 +201,6 @@ def test_token0_has_sent_start_true():
assert not doc.is_sentenced assert not doc.is_sentenced
@pytest.mark.xfail
def test_token_api_conjuncts_chain(en_vocab): def test_token_api_conjuncts_chain(en_vocab):
words = "The boy and the girl and the man went .".split() words = "The boy and the girl and the man went .".split()
heads = [1, 7, -1, 1, -3, -1, 1, -3, 0, -1] heads = [1, 7, -1, 1, -3, -1, 1, -3, 0, -1]
@ -212,7 +211,6 @@ def test_token_api_conjuncts_chain(en_vocab):
assert [w.text for w in doc[7].conjuncts] == ["boy", "girl"] assert [w.text for w in doc[7].conjuncts] == ["boy", "girl"]
@pytest.mark.xfail
def test_token_api_conjuncts_simple(en_vocab): def test_token_api_conjuncts_simple(en_vocab):
words = "They came and went .".split() words = "They came and went .".split()
heads = [1, 0, -1, -2, -1] heads = [1, 0, -1, -2, -1]
@ -222,7 +220,6 @@ def test_token_api_conjuncts_simple(en_vocab):
assert [w.text for w in doc[3].conjuncts] == ["came"] assert [w.text for w in doc[3].conjuncts] == ["came"]
@pytest.mark.xfail
def test_token_api_non_conjuncts(en_vocab): def test_token_api_non_conjuncts(en_vocab):
words = "They came .".split() words = "They came .".split()
heads = [1, 0, -1] heads = [1, 0, -1]

View File

@ -527,6 +527,16 @@ cdef class Span:
else: else:
return self.doc[root] return self.doc[root]
@property
def conjuncts(self):
"""Tokens that are conjoined to the span's root.
RETURNS (tuple): A tuple of Token objects.
DOCS: https://spacy.io/api/span#lefts
"""
return self.root.conjuncts
@property @property
def lefts(self): def lefts(self):
"""Tokens that are to the left of the span, whose head is within the """Tokens that are to the left of the span, whose head is within the

View File

@ -689,19 +689,27 @@ cdef class Token:
def conjuncts(self): def conjuncts(self):
"""A sequence of coordinated tokens, including the token itself. """A sequence of coordinated tokens, including the token itself.
YIELDS (Token): A coordinated token. RETURNS (tuple): The coordinated tokens.
DOCS: https://spacy.io/api/token#conjuncts DOCS: https://spacy.io/api/token#conjuncts
""" """
cdef Token word cdef Token word, child
if "conjuncts" in self.doc.user_token_hooks: if "conjuncts" in self.doc.user_token_hooks:
yield from self.doc.user_token_hooks["conjuncts"](self) return tuple(self.doc.user_token_hooks["conjuncts"](self))
start = self
while start.i != start.head.i:
if start.dep == conj:
start = start.head
else: else:
if self.dep != conj: break
for word in self.rights: queue = [start]
if word.dep == conj: output = [start]
yield word for word in queue:
yield from word.conjuncts for child in word.rights:
if child.c.dep == conj:
output.append(child)
queue.append(child)
return tuple([w for w in output if w.i != self.i])
property ent_type: property ent_type:
"""RETURNS (uint64): Named entity type.""" """RETURNS (uint64): Named entity type."""

View File

@ -316,6 +316,24 @@ taken.
| ----------- | ------- | --------------- | | ----------- | ------- | --------------- |
| **RETURNS** | `Token` | The root token. | | **RETURNS** | `Token` | The root token. |
## Span.conjuncts {#conjuncts tag="property" model="parser"}
A tuple of tokens coordinated to `span.root`.
> #### Example
>
> ```python
> doc = nlp(u"I like apples and oranges")
> apples_conjuncts = doc[2:3].conjuncts
> assert [t.text for t in apples_conjuncts] == [u"oranges"]
> ```
| Name | Type | Description |
| ---------- | ------- | -------------------- |
| **RETURNS** | `tuple` | The coordinated tokens. |
## Span.lefts {#lefts tag="property" model="parser"} ## Span.lefts {#lefts tag="property" model="parser"}
Tokens that are to the left of the span, whose heads are within the span. Tokens that are to the left of the span, whose heads are within the span.

View File

@ -211,7 +211,7 @@ The rightmost token of this token's syntactic descendants.
## Token.conjuncts {#conjuncts tag="property" model="parser"} ## Token.conjuncts {#conjuncts tag="property" model="parser"}
A sequence of coordinated tokens, including the token itself. A tuple of coordinated tokens, not including the token itself.
> #### Example > #### Example
> >
@ -223,7 +223,7 @@ A sequence of coordinated tokens, including the token itself.
| Name | Type | Description | | Name | Type | Description |
| ---------- | ------- | -------------------- | | ---------- | ------- | -------------------- |
| **YIELDS** | `Token` | A coordinated token. | | **RETURNS** | `tuple` | The coordinated tokens. |
## Token.children {#children tag="property" model="parser"} ## Token.children {#children tag="property" model="parser"}