mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-08 22:03:24 +03:00
Change JA inflection separator to semicolon
Hyphen is unsuitable because of interactions with the JA data fields, but pipe is also unsuitable because it has a different meaning in UD data, so it's better to use something that has no significance in either case. So this uses semicolon.
This commit is contained in:
parent
227f98081b
commit
c4e3b7a5db
|
@ -94,7 +94,7 @@ class JapaneseTokenizer(DummyTokenizer):
|
||||||
DetailedToken(
|
DetailedToken(
|
||||||
token.surface(), # orth
|
token.surface(), # orth
|
||||||
"-".join([xx for xx in token.part_of_speech()[:4] if xx != "*"]), # tag
|
"-".join([xx for xx in token.part_of_speech()[:4] if xx != "*"]), # tag
|
||||||
"|".join([xx for xx in token.part_of_speech()[4:] if xx != "*"]), # inf
|
";".join([xx for xx in token.part_of_speech()[4:] if xx != "*"]), # inf
|
||||||
token.dictionary_form(), # lemma
|
token.dictionary_form(), # lemma
|
||||||
token.normalized_form(),
|
token.normalized_form(),
|
||||||
token.reading_form(),
|
token.reading_form(),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user