mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-04 19:35:51 +03:00
Reformat
This commit is contained in:
parent
6c59f6c623
commit
685a386106
|
@ -1,4 +1,5 @@
|
||||||
"""Test that longer and mixed texts are tokenized correctly."""
|
"""Test that longer and mixed texts are tokenized correctly."""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,13 @@ import pytest
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"word,lemma",
|
"word,lemma",
|
||||||
[("新しく", "新しい"), ("赤く", "赤い"), ("すごく", "すごい"), ("いただきました", "いただく"), ("なった", "なる")],
|
[
|
||||||
|
("新しく", "新しい"),
|
||||||
|
("赤く", "赤い"),
|
||||||
|
("すごく", "すごい"),
|
||||||
|
("いただきました", "いただく"),
|
||||||
|
("なった", "なる"),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
def test_ja_lemmatizer_assigns(ja_tokenizer, word, lemma):
|
def test_ja_lemmatizer_assigns(ja_tokenizer, word, lemma):
|
||||||
test_lemma = ja_tokenizer(word)[0].lemma_
|
test_lemma = ja_tokenizer(word)[0].lemma_
|
||||||
|
|
|
@ -143,7 +143,12 @@ def test_ja_tokenizer_sub_tokens(
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
"取ってつけた",
|
"取ってつけた",
|
||||||
(["五段-ラ行;連用形-促音便"], [], ["下一段-カ行;連用形-一般"], ["助動詞-タ;終止形-一般"]),
|
(
|
||||||
|
["五段-ラ行;連用形-促音便"],
|
||||||
|
[],
|
||||||
|
["下一段-カ行;連用形-一般"],
|
||||||
|
["助動詞-タ;終止形-一般"],
|
||||||
|
),
|
||||||
(["トッ"], ["テ"], ["ツケ"], ["タ"]),
|
(["トッ"], ["テ"], ["ツケ"], ["タ"]),
|
||||||
),
|
),
|
||||||
("2=3", ([], [], []), (["ニ"], ["_"], ["サン"])),
|
("2=3", ([], [], []), (["ニ"], ["_"], ["サン"])),
|
||||||
|
|
|
@ -2,7 +2,14 @@ import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"word,lemma", [("새로운", "새롭"), ("빨간", "빨갛"), ("클수록", "크"), ("뭡니까", "뭣"), ("됐다", "되")]
|
"word,lemma",
|
||||||
|
[
|
||||||
|
("새로운", "새롭"),
|
||||||
|
("빨간", "빨갛"),
|
||||||
|
("클수록", "크"),
|
||||||
|
("뭡니까", "뭣"),
|
||||||
|
("됐다", "되"),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
def test_ko_lemmatizer_assigns(ko_tokenizer, word, lemma):
|
def test_ko_lemmatizer_assigns(ko_tokenizer, word, lemma):
|
||||||
test_lemma = ko_tokenizer(word)[0].lemma_
|
test_lemma = ko_tokenizer(word)[0].lemma_
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""Words like numbers are recognized correctly."""
|
"""Words like numbers are recognized correctly."""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -265,7 +265,7 @@ def test_pretraining_tagger():
|
||||||
|
|
||||||
|
|
||||||
# Try to debug segfault on windows
|
# Try to debug segfault on windows
|
||||||
#def test_pretraining_training():
|
# def test_pretraining_training():
|
||||||
# """Test that training can use a pretrained Tok2Vec model"""
|
# """Test that training can use a pretrained Tok2Vec model"""
|
||||||
# config = Config().from_str(pretrain_string_internal)
|
# config = Config().from_str(pretrain_string_internal)
|
||||||
# nlp = util.load_model_from_config(config, auto_fill=True, validate=False)
|
# nlp = util.load_model_from_config(config, auto_fill=True, validate=False)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user