mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Use hyphen to join parts of inflection in JA tokenizer
This commit is contained in:
		
							parent
							
								
									53b5f245ed
								
							
						
					
					
						commit
						f975690cc9
					
				|  | @ -94,7 +94,7 @@ class JapaneseTokenizer(DummyTokenizer): | ||||||
|             DetailedToken( |             DetailedToken( | ||||||
|                 token.surface(),  # orth |                 token.surface(),  # orth | ||||||
|                 "-".join([xx for xx in token.part_of_speech()[:4] if xx != "*"]),  # tag |                 "-".join([xx for xx in token.part_of_speech()[:4] if xx != "*"]),  # tag | ||||||
|                 ",".join([xx for xx in token.part_of_speech()[4:] if xx != "*"]),  # inf |                 "-".join([xx for xx in token.part_of_speech()[4:] if xx != "*"]),  # inf | ||||||
|                 token.dictionary_form(),  # lemma |                 token.dictionary_form(),  # lemma | ||||||
|                 token.normalized_form(), |                 token.normalized_form(), | ||||||
|                 token.reading_form(), |                 token.reading_form(), | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user