mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	Tidy up and auto-format [ci skip]
This commit is contained in:
		
							parent
							
								
									bcbb9f5119
								
							
						
					
					
						commit
						3126dd0904
					
				|  | @ -39,8 +39,8 @@ def resolve_pos(token): | |||
|     """ | ||||
| 
 | ||||
|     # this is only used for consecutive ascii spaces | ||||
|     if token.pos == '空白': | ||||
|         return '空白' | ||||
|     if token.pos == "空白": | ||||
|         return "空白" | ||||
| 
 | ||||
|     # TODO: This is a first take. The rules here are crude approximations. | ||||
|     # For many of these, full dependencies are needed to properly resolve | ||||
|  | @ -78,7 +78,7 @@ def detailed_tokens(tokenizer, text): | |||
|         scount = node.next.rlength - node.next.length | ||||
|         spaces.append(bool(scount)) | ||||
|         while scount > 1: | ||||
|             words.append(ShortUnitWord(' ', ' ', '空白')) | ||||
|             words.append(ShortUnitWord(" ", " ", "空白")) | ||||
|             spaces.append(False) | ||||
|             scount -= 1 | ||||
| 
 | ||||
|  |  | |||
|  | @ -48,8 +48,9 @@ def test_ja_tokenizer_pos(ja_tokenizer, text, expected_pos): | |||
|     pos = [token.pos_ for token in ja_tokenizer(text)] | ||||
|     assert pos == expected_pos | ||||
| 
 | ||||
| 
 | ||||
| def test_extra_spaces(ja_tokenizer): | ||||
|     # note: three spaces after "I" | ||||
|     tokens = ja_tokenizer("I   like cheese.") | ||||
|     assert tokens[1].orth_ == ' ' | ||||
|     assert tokens[2].orth_ == ' ' | ||||
|     assert tokens[1].orth_ == " " | ||||
|     assert tokens[2].orth_ == " " | ||||
|  |  | |||
|  | @ -17,4 +17,6 @@ TEST_CASES = [ | |||
| 
 | ||||
| @pytest.mark.parametrize("tokens,lemmas", TEST_CASES) | ||||
| def test_lt_lemmatizer(lt_lemmatizer, tokens, lemmas): | ||||
|     assert lemmas == [lt_lemmatizer.lookup_table.get_string(token, token) for token in tokens] | ||||
|     assert lemmas == [ | ||||
|         lt_lemmatizer.lookup_table.get_string(token, token) for token in tokens | ||||
|     ] | ||||
|  |  | |||
|  | @ -78,7 +78,6 @@ def test_lookups_to_from_disk(): | |||
|     assert table2.get_string("b") == 2 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # This fails on Python 3.5 | ||||
| @pytest.mark.xfail | ||||
| def test_lookups_to_from_bytes_via_vocab(): | ||||
|  | @ -97,6 +96,7 @@ def test_lookups_to_from_bytes_via_vocab(): | |||
|     assert table.get_string("hello") == "world" | ||||
|     assert new_vocab.to_bytes() == vocab_bytes | ||||
| 
 | ||||
| 
 | ||||
| # This fails on Python 3.5 | ||||
| @pytest.mark.xfail | ||||
| def test_lookups_to_from_disk_via_vocab(): | ||||
|  |  | |||
|  | @ -80,8 +80,8 @@ training corpus and can be defined in the respective language data's | |||
| 
 | ||||
| <Accordion title="Universal Part-of-speech Tags" id="pos-universal"> | ||||
| 
 | ||||
| spaCy also maps all language-specific part-of-speech tags to a small, fixed set | ||||
| of word type tags following the | ||||
| spaCy maps all language-specific part-of-speech tags to a small, fixed set of | ||||
| word type tags following the | ||||
| [Universal Dependencies scheme](http://universaldependencies.org/u/pos/). The | ||||
| universal tags don't code for any morphological features and only cover the word | ||||
| type. They're available as the [`Token.pos`](/api/token#attributes) and | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user