mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
2f981d5af1
Remove corpus-specific tag maps from the language data for languages without custom tokenizers. For languages with custom word segmenters that also provide tags (Japanese and Korean), the tag maps for the custom tokenizers are kept as the default. The default tag maps for languages without custom tokenizers are now the default tag map from `lang/tag_map/py`, UPOS -> UPOS.
75 lines
2.3 KiB
Python
75 lines
2.3 KiB
Python
import pytest
|
|
from spacy.symbols import POS, PRON, VERB
|
|
|
|
|
|
@pytest.fixture
|
|
def i_has(en_tokenizer):
|
|
doc = en_tokenizer("I has")
|
|
tag_map = {
|
|
"PRP": {POS: PRON, "PronType": "prs"},
|
|
"VBZ": {
|
|
POS: VERB,
|
|
"VerbForm": "fin",
|
|
"Tense": "pres",
|
|
"Number": "sing",
|
|
"Person": "three",
|
|
},
|
|
}
|
|
en_tokenizer.vocab.morphology.load_tag_map(tag_map)
|
|
doc[0].tag_ = "PRP"
|
|
doc[1].tag_ = "VBZ"
|
|
return doc
|
|
|
|
|
|
def test_token_morph_eq(i_has):
|
|
assert i_has[0].morph is not i_has[0].morph
|
|
assert i_has[0].morph == i_has[0].morph
|
|
assert i_has[0].morph != i_has[1].morph
|
|
|
|
|
|
def test_token_morph_key(i_has):
|
|
assert i_has[0].morph.key != 0
|
|
assert i_has[1].morph.key != 0
|
|
assert i_has[0].morph.key == i_has[0].morph.key
|
|
assert i_has[0].morph.key != i_has[1].morph.key
|
|
|
|
|
|
def test_morph_props(i_has):
|
|
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
|
|
assert i_has[1].morph.get("PronType") == []
|
|
|
|
|
|
def test_morph_iter(i_has):
|
|
assert set(i_has[0].morph) == set(["PronType=prs"])
|
|
assert set(i_has[1].morph) == set(
|
|
["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"]
|
|
)
|
|
|
|
|
|
def test_morph_get(i_has):
|
|
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
|
|
|
|
|
|
def test_morph_set(i_has):
|
|
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
|
|
# set by string
|
|
i_has[0].morph_ = "PronType=unk"
|
|
assert i_has[0].morph.get("PronType") == ["PronType=unk"]
|
|
# set by string, fields are alphabetized
|
|
i_has[0].morph_ = "PronType=123|NounType=unk"
|
|
assert i_has[0].morph_ == "NounType=unk|PronType=123"
|
|
# set by dict
|
|
i_has[0].morph_ = {"AType": "123", "BType": "unk", "POS": "ADJ"}
|
|
assert i_has[0].morph_ == "AType=123|BType=unk|POS=ADJ"
|
|
# set by string with multiple values, fields and values are alphabetized
|
|
i_has[0].morph_ = "BType=c|AType=b,a"
|
|
assert i_has[0].morph_ == "AType=a,b|BType=c"
|
|
# set by dict with multiple values, fields and values are alphabetized
|
|
i_has[0].morph_ = {"AType": "b,a", "BType": "c"}
|
|
assert i_has[0].morph_ == "AType=a,b|BType=c"
|
|
|
|
|
|
def test_morph_str(i_has):
|
|
assert str(i_has[0].morph) == "PronType=prs"
|
|
assert str(i_has[1].morph) == "Number=sing|Person=three|Tense=pres|VerbForm=fin"
|