Merge branch 'master' into develop

2025-11-30 14:55:44 +03:00 · 2018-07-09 18:05:10 +02:00 · 2018-07-09 18:05:10 +02:00 · fd6207426a
commit fd6207426a
parent 38e07ade4c 00b9a58558
4 changed files with 414 additions and 2 deletions
--- a/spacy/lang/de/lemmatizer.py
+++ b/spacy/lang/de/lemmatizer.py
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@ -57,6 +57,14 @@ def test_doc_token_api_str_builtin(en_tokenizer, text):
    assert str(tokens[0]) == text.split(' ')[0]
    assert str(tokens[1]) == text.split(' ')[1]

+@pytest.fixture
+def doc(en_tokenizer):
+    text = "This is a sentence. This is another sentence. And a third."
+    heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 0, 1, -2, -1]
+    deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det',
+            'attr', 'punct', 'ROOT', 'det', 'npadvmod', 'punct']
+    tokens = en_tokenizer(text)
+    return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)

 def test_doc_token_api_is_properties(en_vocab):
    text = ["Hi", ",", "my", "email", "is", "test@me.com"]
@ -164,9 +172,19 @@ def test_is_sent_start(en_tokenizer):
    doc.is_parsed = True
    assert len(list(doc.sents)) == 2

+
 def test_set_pos():
    doc = Doc(Vocab(), words=['hello', 'world'])
    doc[0].pos_ = 'NOUN'
    assert doc[0].pos_ == 'NOUN'
    doc[1].pos = VERB
    assert doc[1].pos_ == 'VERB'
+
+
+def test_tokens_sent(doc):
+    """Test token.sent property"""
+    assert len(list(doc.sents)) == 3
+    assert doc[1].sent.text == 'This is a sentence .'
+    assert doc[7].sent.text == 'This is another sentence .'
+    assert doc[1].sent.root.left_edge.text == 'This'
+    assert doc[7].sent.root.left_edge.text == 'This'
--- a/spacy/tests/lang/de/test_lemma.py
+++ b/spacy/tests/lang/de/test_lemma.py
@ -7,7 +7,9 @@ import pytest
@pytest.mark.parametrize('string,lemma', [('Abgehängten', 'Abgehängte'),
                                          ('engagierte', 'engagieren'),
                                          ('schließt', 'schließen'),
-                                          ('vorgebenden', 'vorgebend')])
+                                          ('vorgebenden', 'vorgebend'),
+                                          ('die', 'der'),
+                                          ('Die', 'der')])
 def test_lemmatizer_lookup_assigns(de_tokenizer, string, lemma):
    tokens = de_tokenizer(string)
    assert tokens[0].lemma_ == lemma
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -361,6 +361,13 @@ cdef class Token:
        def __get__(self):
            return self.c.r_kids

+    property sent:
+        """RETURNS (Span): The sentence span that the token is a part of."""
+        def __get__(self):
+            if 'sent' in self.doc.user_token_hooks:
+                return self.doc.user_token_hooks['sent'](self)
+            return self.doc[self.i : self.i+1].sent
+
    property sent_start:
        def __get__(self):
            # Raising a deprecation warning here causes errors for autocomplete