mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 21:00:19 +03:00
remove warnings from get_lang_class
This commit is contained in:
parent
60314f20f6
commit
de838a0866
|
@ -13,6 +13,8 @@ from ..tokens import Doc, DocBin
|
||||||
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
|
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
|
||||||
from ..training.converters import conllu_to_docs
|
from ..training.converters import conllu_to_docs
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
from ..errors import Warnings
|
||||||
|
|
||||||
# Converters are matched by file extension except for ner/iob, which are
|
# Converters are matched by file extension except for ner/iob, which are
|
||||||
# matched by file extension and content. To add a converter, add a new
|
# matched by file extension and content. To add a converter, add a new
|
||||||
|
@ -110,6 +112,14 @@ def convert(
|
||||||
input_path = Path(input_path)
|
input_path = Path(input_path)
|
||||||
if not msg:
|
if not msg:
|
||||||
msg = Printer(no_print=silent)
|
msg = Printer(no_print=silent)
|
||||||
|
|
||||||
|
# Add warnings for renamed language code in v4
|
||||||
|
renamed_lang_codes = {"xx": "mul", "is": "isl"}
|
||||||
|
if lang in renamed_lang_codes:
|
||||||
|
warnings.warn(
|
||||||
|
Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang])
|
||||||
|
)
|
||||||
|
|
||||||
ner_map = srsly.read_json(ner_map) if ner_map is not None else None
|
ner_map = srsly.read_json(ner_map) if ner_map is not None else None
|
||||||
doc_files = []
|
doc_files = []
|
||||||
for input_loc in walk_directory(input_path, converter):
|
for input_loc in walk_directory(input_path, converter):
|
||||||
|
|
|
@ -215,7 +215,7 @@ class Warnings(metaclass=ErrorsWithCodes):
|
||||||
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
|
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
|
||||||
"aware that this might affect other components in your pipeline.")
|
"aware that this might affect other components in your pipeline.")
|
||||||
# New warnings added in v4.x
|
# New warnings added in v4.x
|
||||||
W124 = ("Language code '{lang}' was not found and replaced with '{renamed_lang}'. Language codes like 'xx' and 'is' were renamed to 'mul' and 'isl' in v4.")
|
W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.")
|
||||||
|
|
||||||
|
|
||||||
class Errors(metaclass=ErrorsWithCodes):
|
class Errors(metaclass=ErrorsWithCodes):
|
||||||
|
|
|
@ -590,13 +590,8 @@ def test_blank_languages(lang, target):
|
||||||
Test that we can get spacy.blank in various languages, including codes
|
Test that we can get spacy.blank in various languages, including codes
|
||||||
that are defined to be equivalent or that match by CLDR language matching.
|
that are defined to be equivalent or that match by CLDR language matching.
|
||||||
"""
|
"""
|
||||||
if lang == "en":
|
nlp = spacy.blank(lang)
|
||||||
nlp = spacy.blank(lang)
|
assert nlp.lang == target
|
||||||
assert nlp.lang == target
|
|
||||||
else:
|
|
||||||
with pytest.warns(UserWarning):
|
|
||||||
nlp = spacy.blank(lang)
|
|
||||||
assert nlp.lang == target
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("value", [False, None, ["x", "y"], Language, Vocab])
|
@pytest.mark.parametrize("value", [False, None, ["x", "y"], Language, Vocab])
|
||||||
|
|
|
@ -327,8 +327,6 @@ def get_lang_class(lang: str) -> Type["Language"]:
|
||||||
match = None
|
match = None
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
if match != lang:
|
|
||||||
warnings.warn(Warnings.W124.format(lang=lang, renamed_lang=match))
|
|
||||||
lang = match
|
lang = match
|
||||||
module = importlib.import_module(f".lang.{lang}", "spacy")
|
module = importlib.import_module(f".lang.{lang}", "spacy")
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user