mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Auto-format [ci skip]
This commit is contained in:
		
							parent
							
								
									1a554bdcb1
								
							
						
					
					
						commit
						126268ce50
					
				|  | @ -26,7 +26,9 @@ class Ukrainian(Language): | ||||||
|     default_config={"model": None, "mode": "pymorphy2"}, |     default_config={"model": None, "mode": "pymorphy2"}, | ||||||
|     default_score_weights={"lemma_acc": 1.0}, |     default_score_weights={"lemma_acc": 1.0}, | ||||||
| ) | ) | ||||||
| def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False,): | def make_lemmatizer( | ||||||
|  |     nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False | ||||||
|  | ): | ||||||
|     return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) |     return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -54,9 +54,7 @@ def create_chinese_tokenizer(segmenter: Segmenter = Segmenter.char): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ChineseTokenizer(DummyTokenizer): | class ChineseTokenizer(DummyTokenizer): | ||||||
|     def __init__( |     def __init__(self, nlp: Language, segmenter: Segmenter = Segmenter.char): | ||||||
|         self, nlp: Language, segmenter: Segmenter = Segmenter.char, |  | ||||||
|     ): |  | ||||||
|         self.vocab = nlp.vocab |         self.vocab = nlp.vocab | ||||||
|         if isinstance(segmenter, Segmenter): |         if isinstance(segmenter, Segmenter): | ||||||
|             segmenter = segmenter.value |             segmenter = segmenter.value | ||||||
|  | @ -87,7 +85,7 @@ class ChineseTokenizer(DummyTokenizer): | ||||||
|             if pkuseg_user_dict is None: |             if pkuseg_user_dict is None: | ||||||
|                 pkuseg_user_dict = pkuseg_model |                 pkuseg_user_dict = pkuseg_model | ||||||
|             self.pkuseg_seg = try_pkuseg_import( |             self.pkuseg_seg = try_pkuseg_import( | ||||||
|                 pkuseg_model=pkuseg_model, pkuseg_user_dict=pkuseg_user_dict, |                 pkuseg_model=pkuseg_model, pkuseg_user_dict=pkuseg_user_dict | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|     def __call__(self, text: str) -> Doc: |     def __call__(self, text: str) -> Doc: | ||||||
|  |  | ||||||
|  | @ -209,9 +209,13 @@ def test_doc_retokenizer_split_norm(en_vocab): | ||||||
|     # Retokenize to split out the words in the token at doc[2]. |     # Retokenize to split out the words in the token at doc[2]. | ||||||
|     token = doc[2] |     token = doc[2] | ||||||
|     with doc.retokenize() as retokenizer: |     with doc.retokenize() as retokenizer: | ||||||
|       retokenizer.split(token, ["brown", "fox", "jumps", "over", "the"], heads=[(token, idx) for idx in range(5)]) |         retokenizer.split( | ||||||
|  |             token, | ||||||
|  |             ["brown", "fox", "jumps", "over", "the"], | ||||||
|  |             heads=[(token, idx) for idx in range(5)], | ||||||
|  |         ) | ||||||
| 
 | 
 | ||||||
|     assert doc[9].text  == "w/" |     assert doc[9].text == "w/" | ||||||
|     assert doc[9].norm_ == "with" |     assert doc[9].norm_ == "with" | ||||||
|     assert doc[5].text  == "over" |     assert doc[5].text == "over" | ||||||
|     assert doc[5].norm_ == "over" |     assert doc[5].norm_ == "over" | ||||||
|  |  | ||||||
|  | @ -350,7 +350,7 @@ def test_pipe_methods_frozen(): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||||
|     "pipe", ["tagger", "parser", "ner", "textcat", "morphologizer"], |     "pipe", ["tagger", "parser", "ner", "textcat", "morphologizer"] | ||||||
| ) | ) | ||||||
| def test_pipe_label_data_exports_labels(pipe): | def test_pipe_label_data_exports_labels(pipe): | ||||||
|     nlp = Language() |     nlp = Language() | ||||||
|  |  | ||||||
|  | @ -64,7 +64,7 @@ def get_tok2vec_kwargs(): | ||||||
|             width=32, |             width=32, | ||||||
|             rows=[500, 500, 500], |             rows=[500, 500, 500], | ||||||
|             attrs=["NORM", "PREFIX", "SHAPE"], |             attrs=["NORM", "PREFIX", "SHAPE"], | ||||||
|             include_static_vectors=False |             include_static_vectors=False, | ||||||
|         ), |         ), | ||||||
|         "encode": MaxoutWindowEncoder( |         "encode": MaxoutWindowEncoder( | ||||||
|             width=32, depth=2, maxout_pieces=2, window_size=1 |             width=32, depth=2, maxout_pieces=2, window_size=1 | ||||||
|  | @ -81,7 +81,7 @@ def test_multi_hash_embed(): | ||||||
|         width=32, |         width=32, | ||||||
|         rows=[500, 500, 500], |         rows=[500, 500, 500], | ||||||
|         attrs=["NORM", "PREFIX", "SHAPE"], |         attrs=["NORM", "PREFIX", "SHAPE"], | ||||||
|         include_static_vectors=False |         include_static_vectors=False, | ||||||
|     ) |     ) | ||||||
|     hash_embeds = [node for node in embed.walk() if node.name == "hashembed"] |     hash_embeds = [node for node in embed.walk() if node.name == "hashembed"] | ||||||
|     assert len(hash_embeds) == 3 |     assert len(hash_embeds) == 3 | ||||||
|  | @ -96,7 +96,7 @@ def test_multi_hash_embed(): | ||||||
|         width=32, |         width=32, | ||||||
|         rows=[1000, 50, 250], |         rows=[1000, 50, 250], | ||||||
|         attrs=["NORM", "PREFIX", "SHAPE"], |         attrs=["NORM", "PREFIX", "SHAPE"], | ||||||
|         include_static_vectors=False |         include_static_vectors=False, | ||||||
|     ) |     ) | ||||||
|     hash_embeds = [node for node in embed.walk() if node.name == "hashembed"] |     hash_embeds = [node for node in embed.walk() if node.name == "hashembed"] | ||||||
|     assert [he.get_dim("nV") for he in hash_embeds] == [1000, 50, 250] |     assert [he.get_dim("nV") for he in hash_embeds] == [1000, 50, 250] | ||||||
|  |  | ||||||
|  | @ -64,7 +64,7 @@ def dont_augment(nlp: "Language", example: Example) -> Iterator[Example]: | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def lower_casing_augmenter( | def lower_casing_augmenter( | ||||||
|     nlp: "Language", example: Example, *, level: float, |     nlp: "Language", example: Example, *, level: float | ||||||
| ) -> Iterator[Example]: | ) -> Iterator[Example]: | ||||||
|     if random.random() >= level: |     if random.random() >= level: | ||||||
|         yield example |         yield example | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user