mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Reformat
This commit is contained in:
		
							parent
							
								
									6c59f6c623
								
							
						
					
					
						commit
						685a386106
					
				|  | @ -1,4 +1,5 @@ | |||
| """Test that longer and mixed texts are tokenized correctly.""" | ||||
| 
 | ||||
| import pytest | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -3,7 +3,13 @@ import pytest | |||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "word,lemma", | ||||
|     [("新しく", "新しい"), ("赤く", "赤い"), ("すごく", "すごい"), ("いただきました", "いただく"), ("なった", "なる")], | ||||
|     [ | ||||
|         ("新しく", "新しい"), | ||||
|         ("赤く", "赤い"), | ||||
|         ("すごく", "すごい"), | ||||
|         ("いただきました", "いただく"), | ||||
|         ("なった", "なる"), | ||||
|     ], | ||||
| ) | ||||
| def test_ja_lemmatizer_assigns(ja_tokenizer, word, lemma): | ||||
|     test_lemma = ja_tokenizer(word)[0].lemma_ | ||||
|  |  | |||
|  | @ -143,7 +143,12 @@ def test_ja_tokenizer_sub_tokens( | |||
|     [ | ||||
|         ( | ||||
|             "取ってつけた", | ||||
|             (["五段-ラ行;連用形-促音便"], [], ["下一段-カ行;連用形-一般"], ["助動詞-タ;終止形-一般"]), | ||||
|             ( | ||||
|                 ["五段-ラ行;連用形-促音便"], | ||||
|                 [], | ||||
|                 ["下一段-カ行;連用形-一般"], | ||||
|                 ["助動詞-タ;終止形-一般"], | ||||
|             ), | ||||
|             (["トッ"], ["テ"], ["ツケ"], ["タ"]), | ||||
|         ), | ||||
|         ("2=3", ([], [], []), (["ニ"], ["_"], ["サン"])), | ||||
|  |  | |||
|  | @ -2,7 +2,14 @@ import pytest | |||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "word,lemma", [("새로운", "새롭"), ("빨간", "빨갛"), ("클수록", "크"), ("뭡니까", "뭣"), ("됐다", "되")] | ||||
|     "word,lemma", | ||||
|     [ | ||||
|         ("새로운", "새롭"), | ||||
|         ("빨간", "빨갛"), | ||||
|         ("클수록", "크"), | ||||
|         ("뭡니까", "뭣"), | ||||
|         ("됐다", "되"), | ||||
|     ], | ||||
| ) | ||||
| def test_ko_lemmatizer_assigns(ko_tokenizer, word, lemma): | ||||
|     test_lemma = ko_tokenizer(word)[0].lemma_ | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| """Words like numbers are recognized correctly.""" | ||||
| 
 | ||||
| import pytest | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -265,7 +265,7 @@ def test_pretraining_tagger(): | |||
| 
 | ||||
| 
 | ||||
| # Try to debug segfault on windows | ||||
| #def test_pretraining_training(): | ||||
| # def test_pretraining_training(): | ||||
| #    """Test that training can use a pretrained Tok2Vec model""" | ||||
| #    config = Config().from_str(pretrain_string_internal) | ||||
| #    nlp = util.load_model_from_config(config, auto_fill=True, validate=False) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user