mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Use hyphen to join parts of inflection in JA tokenizer
This commit is contained in:
parent
53b5f245ed
commit
f975690cc9
|
@ -94,7 +94,7 @@ class JapaneseTokenizer(DummyTokenizer):
|
|||
DetailedToken(
|
||||
token.surface(), # orth
|
||||
"-".join([xx for xx in token.part_of_speech()[:4] if xx != "*"]), # tag
|
||||
",".join([xx for xx in token.part_of_speech()[4:] if xx != "*"]), # inf
|
||||
"-".join([xx for xx in token.part_of_speech()[4:] if xx != "*"]), # inf
|
||||
token.dictionary_form(), # lemma
|
||||
token.normalized_form(),
|
||||
token.reading_form(),
|
||||
|
|
Loading…
Reference in New Issue
Block a user