Merge branch 'master' into develop

This commit is contained in:
ines 2018-07-09 18:05:10 +02:00
commit fd6207426a
4 changed files with 414 additions and 2 deletions

File diff suppressed because it is too large Load Diff

View File

@ -57,6 +57,14 @@ def test_doc_token_api_str_builtin(en_tokenizer, text):
assert str(tokens[0]) == text.split(' ')[0]
assert str(tokens[1]) == text.split(' ')[1]
@pytest.fixture
def doc(en_tokenizer):
text = "This is a sentence. This is another sentence. And a third."
heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 0, 1, -2, -1]
deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det',
'attr', 'punct', 'ROOT', 'det', 'npadvmod', 'punct']
tokens = en_tokenizer(text)
return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
def test_doc_token_api_is_properties(en_vocab):
text = ["Hi", ",", "my", "email", "is", "test@me.com"]
@ -164,9 +172,19 @@ def test_is_sent_start(en_tokenizer):
doc.is_parsed = True
assert len(list(doc.sents)) == 2
def test_set_pos():
doc = Doc(Vocab(), words=['hello', 'world'])
doc[0].pos_ = 'NOUN'
assert doc[0].pos_ == 'NOUN'
doc[1].pos = VERB
assert doc[1].pos_ == 'VERB'
def test_tokens_sent(doc):
"""Test token.sent property"""
assert len(list(doc.sents)) == 3
assert doc[1].sent.text == 'This is a sentence .'
assert doc[7].sent.text == 'This is another sentence .'
assert doc[1].sent.root.left_edge.text == 'This'
assert doc[7].sent.root.left_edge.text == 'This'

View File

@ -7,7 +7,9 @@ import pytest
@pytest.mark.parametrize('string,lemma', [('Abgehängten', 'Abgehängte'),
('engagierte', 'engagieren'),
('schließt', 'schließen'),
('vorgebenden', 'vorgebend')])
('vorgebenden', 'vorgebend'),
('die', 'der'),
('Die', 'der')])
def test_lemmatizer_lookup_assigns(de_tokenizer, string, lemma):
tokens = de_tokenizer(string)
assert tokens[0].lemma_ == lemma

View File

@ -361,6 +361,13 @@ cdef class Token:
def __get__(self):
return self.c.r_kids
property sent:
"""RETURNS (Span): The sentence span that the token is a part of."""
def __get__(self):
if 'sent' in self.doc.user_token_hooks:
return self.doc.user_token_hooks['sent'](self)
return self.doc[self.i : self.i+1].sent
property sent_start:
def __get__(self):
# Raising a deprecation warning here causes errors for autocomplete