mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Merge branch 'master' into develop
This commit is contained in:
		
						commit
						fd6207426a
					
				
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -57,6 +57,14 @@ def test_doc_token_api_str_builtin(en_tokenizer, text): | |||
|     assert str(tokens[0]) == text.split(' ')[0] | ||||
|     assert str(tokens[1]) == text.split(' ')[1] | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def doc(en_tokenizer): | ||||
|     text = "This is a sentence. This is another sentence. And a third." | ||||
|     heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 0, 1, -2, -1] | ||||
|     deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det', | ||||
|             'attr', 'punct', 'ROOT', 'det', 'npadvmod', 'punct'] | ||||
|     tokens = en_tokenizer(text) | ||||
|     return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps) | ||||
| 
 | ||||
| def test_doc_token_api_is_properties(en_vocab): | ||||
|     text = ["Hi", ",", "my", "email", "is", "test@me.com"] | ||||
|  | @ -164,9 +172,19 @@ def test_is_sent_start(en_tokenizer): | |||
|     doc.is_parsed = True | ||||
|     assert len(list(doc.sents)) == 2 | ||||
| 
 | ||||
| 
 | ||||
| def test_set_pos(): | ||||
|     doc = Doc(Vocab(), words=['hello', 'world']) | ||||
|     doc[0].pos_ = 'NOUN' | ||||
|     assert doc[0].pos_ == 'NOUN' | ||||
|     doc[1].pos = VERB | ||||
|     assert doc[1].pos_ == 'VERB' | ||||
| 
 | ||||
| 
 | ||||
| def test_tokens_sent(doc): | ||||
|     """Test token.sent property""" | ||||
|     assert len(list(doc.sents)) == 3 | ||||
|     assert doc[1].sent.text == 'This is a sentence .' | ||||
|     assert doc[7].sent.text == 'This is another sentence .' | ||||
|     assert doc[1].sent.root.left_edge.text == 'This' | ||||
|     assert doc[7].sent.root.left_edge.text == 'This' | ||||
|  |  | |||
|  | @ -7,7 +7,9 @@ import pytest | |||
| @pytest.mark.parametrize('string,lemma', [('Abgehängten', 'Abgehängte'), | ||||
|                                           ('engagierte', 'engagieren'), | ||||
|                                           ('schließt', 'schließen'), | ||||
|                                           ('vorgebenden', 'vorgebend')]) | ||||
|                                           ('vorgebenden', 'vorgebend'), | ||||
|                                           ('die', 'der'), | ||||
|                                           ('Die', 'der')]) | ||||
| def test_lemmatizer_lookup_assigns(de_tokenizer, string, lemma): | ||||
|     tokens = de_tokenizer(string) | ||||
|     assert tokens[0].lemma_ == lemma | ||||
|  |  | |||
|  | @ -361,6 +361,13 @@ cdef class Token: | |||
|         def __get__(self): | ||||
|             return self.c.r_kids | ||||
| 
 | ||||
|     property sent: | ||||
|         """RETURNS (Span): The sentence span that the token is a part of.""" | ||||
|         def __get__(self): | ||||
|             if 'sent' in self.doc.user_token_hooks: | ||||
|                 return self.doc.user_token_hooks['sent'](self) | ||||
|             return self.doc[self.i : self.i+1].sent | ||||
| 
 | ||||
|     property sent_start: | ||||
|         def __get__(self): | ||||
|             # Raising a deprecation warning here causes errors for autocomplete | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user