mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Merge branch 'master' into develop
This commit is contained in:
		
						commit
						fd6207426a
					
				
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -57,6 +57,14 @@ def test_doc_token_api_str_builtin(en_tokenizer, text): | ||||||
|     assert str(tokens[0]) == text.split(' ')[0] |     assert str(tokens[0]) == text.split(' ')[0] | ||||||
|     assert str(tokens[1]) == text.split(' ')[1] |     assert str(tokens[1]) == text.split(' ')[1] | ||||||
| 
 | 
 | ||||||
|  | @pytest.fixture | ||||||
|  | def doc(en_tokenizer): | ||||||
|  |     text = "This is a sentence. This is another sentence. And a third." | ||||||
|  |     heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 0, 1, -2, -1] | ||||||
|  |     deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det', | ||||||
|  |             'attr', 'punct', 'ROOT', 'det', 'npadvmod', 'punct'] | ||||||
|  |     tokens = en_tokenizer(text) | ||||||
|  |     return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps) | ||||||
| 
 | 
 | ||||||
| def test_doc_token_api_is_properties(en_vocab): | def test_doc_token_api_is_properties(en_vocab): | ||||||
|     text = ["Hi", ",", "my", "email", "is", "test@me.com"] |     text = ["Hi", ",", "my", "email", "is", "test@me.com"] | ||||||
|  | @ -164,9 +172,19 @@ def test_is_sent_start(en_tokenizer): | ||||||
|     doc.is_parsed = True |     doc.is_parsed = True | ||||||
|     assert len(list(doc.sents)) == 2 |     assert len(list(doc.sents)) == 2 | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| def test_set_pos(): | def test_set_pos(): | ||||||
|     doc = Doc(Vocab(), words=['hello', 'world']) |     doc = Doc(Vocab(), words=['hello', 'world']) | ||||||
|     doc[0].pos_ = 'NOUN' |     doc[0].pos_ = 'NOUN' | ||||||
|     assert doc[0].pos_ == 'NOUN' |     assert doc[0].pos_ == 'NOUN' | ||||||
|     doc[1].pos = VERB |     doc[1].pos = VERB | ||||||
|     assert doc[1].pos_ == 'VERB' |     assert doc[1].pos_ == 'VERB' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_tokens_sent(doc): | ||||||
|  |     """Test token.sent property""" | ||||||
|  |     assert len(list(doc.sents)) == 3 | ||||||
|  |     assert doc[1].sent.text == 'This is a sentence .' | ||||||
|  |     assert doc[7].sent.text == 'This is another sentence .' | ||||||
|  |     assert doc[1].sent.root.left_edge.text == 'This' | ||||||
|  |     assert doc[7].sent.root.left_edge.text == 'This' | ||||||
|  |  | ||||||
|  | @ -7,7 +7,9 @@ import pytest | ||||||
| @pytest.mark.parametrize('string,lemma', [('Abgehängten', 'Abgehängte'), | @pytest.mark.parametrize('string,lemma', [('Abgehängten', 'Abgehängte'), | ||||||
|                                           ('engagierte', 'engagieren'), |                                           ('engagierte', 'engagieren'), | ||||||
|                                           ('schließt', 'schließen'), |                                           ('schließt', 'schließen'), | ||||||
|                                           ('vorgebenden', 'vorgebend')]) |                                           ('vorgebenden', 'vorgebend'), | ||||||
|  |                                           ('die', 'der'), | ||||||
|  |                                           ('Die', 'der')]) | ||||||
| def test_lemmatizer_lookup_assigns(de_tokenizer, string, lemma): | def test_lemmatizer_lookup_assigns(de_tokenizer, string, lemma): | ||||||
|     tokens = de_tokenizer(string) |     tokens = de_tokenizer(string) | ||||||
|     assert tokens[0].lemma_ == lemma |     assert tokens[0].lemma_ == lemma | ||||||
|  |  | ||||||
|  | @ -361,6 +361,13 @@ cdef class Token: | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|             return self.c.r_kids |             return self.c.r_kids | ||||||
| 
 | 
 | ||||||
|  |     property sent: | ||||||
|  |         """RETURNS (Span): The sentence span that the token is a part of.""" | ||||||
|  |         def __get__(self): | ||||||
|  |             if 'sent' in self.doc.user_token_hooks: | ||||||
|  |                 return self.doc.user_token_hooks['sent'](self) | ||||||
|  |             return self.doc[self.i : self.i+1].sent | ||||||
|  | 
 | ||||||
|     property sent_start: |     property sent_start: | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|             # Raising a deprecation warning here causes errors for autocomplete |             # Raising a deprecation warning here causes errors for autocomplete | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user