mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Merge branch 'master' into develop
This commit is contained in:
commit
fd6207426a
File diff suppressed because it is too large
Load Diff
|
@ -57,6 +57,14 @@ def test_doc_token_api_str_builtin(en_tokenizer, text):
|
|||
assert str(tokens[0]) == text.split(' ')[0]
|
||||
assert str(tokens[1]) == text.split(' ')[1]
|
||||
|
||||
@pytest.fixture
|
||||
def doc(en_tokenizer):
|
||||
text = "This is a sentence. This is another sentence. And a third."
|
||||
heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 0, 1, -2, -1]
|
||||
deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det',
|
||||
'attr', 'punct', 'ROOT', 'det', 'npadvmod', 'punct']
|
||||
tokens = en_tokenizer(text)
|
||||
return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
|
||||
|
||||
def test_doc_token_api_is_properties(en_vocab):
|
||||
text = ["Hi", ",", "my", "email", "is", "test@me.com"]
|
||||
|
@ -164,9 +172,19 @@ def test_is_sent_start(en_tokenizer):
|
|||
doc.is_parsed = True
|
||||
assert len(list(doc.sents)) == 2
|
||||
|
||||
|
||||
def test_set_pos():
|
||||
doc = Doc(Vocab(), words=['hello', 'world'])
|
||||
doc[0].pos_ = 'NOUN'
|
||||
assert doc[0].pos_ == 'NOUN'
|
||||
doc[1].pos = VERB
|
||||
assert doc[1].pos_ == 'VERB'
|
||||
|
||||
|
||||
def test_tokens_sent(doc):
|
||||
"""Test token.sent property"""
|
||||
assert len(list(doc.sents)) == 3
|
||||
assert doc[1].sent.text == 'This is a sentence .'
|
||||
assert doc[7].sent.text == 'This is another sentence .'
|
||||
assert doc[1].sent.root.left_edge.text == 'This'
|
||||
assert doc[7].sent.root.left_edge.text == 'This'
|
||||
|
|
|
@ -7,7 +7,9 @@ import pytest
|
|||
@pytest.mark.parametrize('string,lemma', [('Abgehängten', 'Abgehängte'),
|
||||
('engagierte', 'engagieren'),
|
||||
('schließt', 'schließen'),
|
||||
('vorgebenden', 'vorgebend')])
|
||||
('vorgebenden', 'vorgebend'),
|
||||
('die', 'der'),
|
||||
('Die', 'der')])
|
||||
def test_lemmatizer_lookup_assigns(de_tokenizer, string, lemma):
|
||||
tokens = de_tokenizer(string)
|
||||
assert tokens[0].lemma_ == lemma
|
||||
|
|
|
@ -361,6 +361,13 @@ cdef class Token:
|
|||
def __get__(self):
|
||||
return self.c.r_kids
|
||||
|
||||
property sent:
|
||||
"""RETURNS (Span): The sentence span that the token is a part of."""
|
||||
def __get__(self):
|
||||
if 'sent' in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks['sent'](self)
|
||||
return self.doc[self.i : self.i+1].sent
|
||||
|
||||
property sent_start:
|
||||
def __get__(self):
|
||||
# Raising a deprecation warning here causes errors for autocomplete
|
||||
|
|
Loading…
Reference in New Issue
Block a user