mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Rename ja morph features to Inflection and Reading (#9520)
* Rename ja morph features to Inflection and Reading
This commit is contained in:
parent
2ea9b58006
commit
0c97ed2746
|
@ -80,12 +80,12 @@ class JapaneseTokenizer(DummyTokenizer):
|
|||
morph = {}
|
||||
if dtoken.inf:
|
||||
# it's normal for this to be empty for non-inflecting types
|
||||
morph["inflection"] = dtoken.inf
|
||||
morph["Inflection"] = dtoken.inf
|
||||
token.norm_ = dtoken.norm
|
||||
if dtoken.reading:
|
||||
# punctuation is its own reading, but we don't want values like
|
||||
# "=" here
|
||||
morph["reading"] = re.sub("[=|]", "_", dtoken.reading)
|
||||
morph["Reading"] = re.sub("[=|]", "_", dtoken.reading)
|
||||
token.morph = MorphAnalysis(self.vocab, morph)
|
||||
if self.need_subtokens:
|
||||
doc.user_data["sub_tokens"] = sub_tokens_list
|
||||
|
|
|
@ -144,9 +144,9 @@ def test_ja_tokenizer_inflections_reading_forms(
|
|||
ja_tokenizer, text, inflections, reading_forms
|
||||
):
|
||||
tokens = ja_tokenizer(text)
|
||||
test_inflections = [tt.morph.get("inflection") for tt in tokens]
|
||||
test_inflections = [tt.morph.get("Inflection") for tt in tokens]
|
||||
assert test_inflections == list(inflections)
|
||||
test_readings = [tt.morph.get("reading") for tt in tokens]
|
||||
test_readings = [tt.morph.get("Reading") for tt in tokens]
|
||||
assert test_readings == list(reading_forms)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user