remove warnings from get_lang_class

This commit is contained in:
thomashacker 2022-11-16 14:04:55 +01:00
parent 60314f20f6
commit de838a0866
4 changed files with 13 additions and 10 deletions

View File

@ -13,6 +13,8 @@ from ..tokens import Doc, DocBin
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
from ..training.converters import conllu_to_docs
import warnings
from ..errors import Warnings
# Converters are matched by file extension except for ner/iob, which are
# matched by file extension and content. To add a converter, add a new
@ -110,6 +112,14 @@ def convert(
input_path = Path(input_path)
if not msg:
msg = Printer(no_print=silent)
# Add warnings for renamed language code in v4
renamed_lang_codes = {"xx": "mul", "is": "isl"}
if lang in renamed_lang_codes:
warnings.warn(
Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang])
)
ner_map = srsly.read_json(ner_map) if ner_map is not None else None
doc_files = []
for input_loc in walk_directory(input_path, converter):

View File

@ -215,7 +215,7 @@ class Warnings(metaclass=ErrorsWithCodes):
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
"aware that this might affect other components in your pipeline.")
# New warnings added in v4.x
W124 = ("Language code '{lang}' was not found and replaced with '{renamed_lang}'. Language codes like 'xx' and 'is' were renamed to 'mul' and 'isl' in v4.")
W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.")
class Errors(metaclass=ErrorsWithCodes):

View File

@ -590,13 +590,8 @@ def test_blank_languages(lang, target):
Test that we can get spacy.blank in various languages, including codes
that are defined to be equivalent or that match by CLDR language matching.
"""
if lang == "en":
nlp = spacy.blank(lang)
assert nlp.lang == target
else:
with pytest.warns(UserWarning):
nlp = spacy.blank(lang)
assert nlp.lang == target
nlp = spacy.blank(lang)
assert nlp.lang == target
@pytest.mark.parametrize("value", [False, None, ["x", "y"], Language, Vocab])

View File

@ -327,8 +327,6 @@ def get_lang_class(lang: str) -> Type["Language"]:
match = None
if match:
if match != lang:
warnings.warn(Warnings.W124.format(lang=lang, renamed_lang=match))
lang = match
module = importlib.import_module(f".lang.{lang}", "spacy")
else: