From b0b990e4050a5d7dc4d21ad5092d199d000f256c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 11 Mar 2019 17:05:45 +0100 Subject: [PATCH] Fix token.conjuncts (closes #795) (#3392) * Implement conjuncts method * Add span.conjuncts property * Un-xfail token.conjuncts tests * Update docs for token.conjuncts and span.conjuncts * Fix merge error in token.conjuncts --- spacy/tests/doc/test_token_api.py | 3 --- spacy/tokens/span.pyx | 10 ++++++++++ spacy/tokens/token.pyx | 26 +++++++++++++++++--------- website/docs/api/span.md | 18 ++++++++++++++++++ website/docs/api/token.md | 4 ++-- 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py index 9e3f88a48..bff2a95c6 100644 --- a/spacy/tests/doc/test_token_api.py +++ b/spacy/tests/doc/test_token_api.py @@ -201,7 +201,6 @@ def test_token0_has_sent_start_true(): assert not doc.is_sentenced -@pytest.mark.xfail def test_token_api_conjuncts_chain(en_vocab): words = "The boy and the girl and the man went .".split() heads = [1, 7, -1, 1, -3, -1, 1, -3, 0, -1] @@ -212,7 +211,6 @@ def test_token_api_conjuncts_chain(en_vocab): assert [w.text for w in doc[7].conjuncts] == ["boy", "girl"] -@pytest.mark.xfail def test_token_api_conjuncts_simple(en_vocab): words = "They came and went .".split() heads = [1, 0, -1, -2, -1] @@ -222,7 +220,6 @@ def test_token_api_conjuncts_simple(en_vocab): assert [w.text for w in doc[3].conjuncts] == ["came"] -@pytest.mark.xfail def test_token_api_non_conjuncts(en_vocab): words = "They came .".split() heads = [1, 0, -1] diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index d38d6e0fc..b51ca3e57 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -527,6 +527,16 @@ cdef class Span: else: return self.doc[root] + @property + def conjuncts(self): + """Tokens that are conjoined to the span's root. + + RETURNS (tuple): A tuple of Token objects. + + DOCS: https://spacy.io/api/span#lefts + """ + return self.root.conjuncts + @property def lefts(self): """Tokens that are to the left of the span, whose head is within the diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 960679e9c..409b68290 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -689,19 +689,27 @@ cdef class Token: def conjuncts(self): """A sequence of coordinated tokens, including the token itself. - YIELDS (Token): A coordinated token. + RETURNS (tuple): The coordinated tokens. DOCS: https://spacy.io/api/token#conjuncts """ - cdef Token word + cdef Token word, child if "conjuncts" in self.doc.user_token_hooks: - yield from self.doc.user_token_hooks["conjuncts"](self) - else: - if self.dep != conj: - for word in self.rights: - if word.dep == conj: - yield word - yield from word.conjuncts + return tuple(self.doc.user_token_hooks["conjuncts"](self)) + start = self + while start.i != start.head.i: + if start.dep == conj: + start = start.head + else: + break + queue = [start] + output = [start] + for word in queue: + for child in word.rights: + if child.c.dep == conj: + output.append(child) + queue.append(child) + return tuple([w for w in output if w.i != self.i]) property ent_type: """RETURNS (uint64): Named entity type.""" diff --git a/website/docs/api/span.md b/website/docs/api/span.md index 033aa579c..5445f13df 100644 --- a/website/docs/api/span.md +++ b/website/docs/api/span.md @@ -316,6 +316,24 @@ taken. | ----------- | ------- | --------------- | | **RETURNS** | `Token` | The root token. | +## Span.conjuncts {#conjuncts tag="property" model="parser"} + +A tuple of tokens coordinated to `span.root`. + +> #### Example +> +> ```python +> doc = nlp(u"I like apples and oranges") +> apples_conjuncts = doc[2:3].conjuncts +> assert [t.text for t in apples_conjuncts] == [u"oranges"] +> ``` + +| Name | Type | Description | +| ---------- | ------- | -------------------- | +| **RETURNS** | `tuple` | The coordinated tokens. | + + + ## Span.lefts {#lefts tag="property" model="parser"} Tokens that are to the left of the span, whose heads are within the span. diff --git a/website/docs/api/token.md b/website/docs/api/token.md index f30fd4639..6981f0fc7 100644 --- a/website/docs/api/token.md +++ b/website/docs/api/token.md @@ -211,7 +211,7 @@ The rightmost token of this token's syntactic descendants. ## Token.conjuncts {#conjuncts tag="property" model="parser"} -A sequence of coordinated tokens, including the token itself. +A tuple of coordinated tokens, not including the token itself. > #### Example > @@ -223,7 +223,7 @@ A sequence of coordinated tokens, including the token itself. | Name | Type | Description | | ---------- | ------- | -------------------- | -| **YIELDS** | `Token` | A coordinated token. | +| **RETURNS** | `tuple` | The coordinated tokens. | ## Token.children {#children tag="property" model="parser"}