remove warnings from get_lang_class

This commit is contained in:
thomashacker 2022-11-16 14:04:55 +01:00
parent 60314f20f6
commit de838a0866
4 changed files with 13 additions and 10 deletions

View File

@ -13,6 +13,8 @@ from ..tokens import Doc, DocBin
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
from ..training.converters import conllu_to_docs from ..training.converters import conllu_to_docs
import warnings
from ..errors import Warnings
# Converters are matched by file extension except for ner/iob, which are # Converters are matched by file extension except for ner/iob, which are
# matched by file extension and content. To add a converter, add a new # matched by file extension and content. To add a converter, add a new
@ -110,6 +112,14 @@ def convert(
input_path = Path(input_path) input_path = Path(input_path)
if not msg: if not msg:
msg = Printer(no_print=silent) msg = Printer(no_print=silent)
# Add warnings for renamed language code in v4
renamed_lang_codes = {"xx": "mul", "is": "isl"}
if lang in renamed_lang_codes:
warnings.warn(
Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang])
)
ner_map = srsly.read_json(ner_map) if ner_map is not None else None ner_map = srsly.read_json(ner_map) if ner_map is not None else None
doc_files = [] doc_files = []
for input_loc in walk_directory(input_path, converter): for input_loc in walk_directory(input_path, converter):

View File

@ -215,7 +215,7 @@ class Warnings(metaclass=ErrorsWithCodes):
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be " W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
"aware that this might affect other components in your pipeline.") "aware that this might affect other components in your pipeline.")
# New warnings added in v4.x # New warnings added in v4.x
W124 = ("Language code '{lang}' was not found and replaced with '{renamed_lang}'. Language codes like 'xx' and 'is' were renamed to 'mul' and 'isl' in v4.") W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.")
class Errors(metaclass=ErrorsWithCodes): class Errors(metaclass=ErrorsWithCodes):

View File

@ -590,13 +590,8 @@ def test_blank_languages(lang, target):
Test that we can get spacy.blank in various languages, including codes Test that we can get spacy.blank in various languages, including codes
that are defined to be equivalent or that match by CLDR language matching. that are defined to be equivalent or that match by CLDR language matching.
""" """
if lang == "en": nlp = spacy.blank(lang)
nlp = spacy.blank(lang) assert nlp.lang == target
assert nlp.lang == target
else:
with pytest.warns(UserWarning):
nlp = spacy.blank(lang)
assert nlp.lang == target
@pytest.mark.parametrize("value", [False, None, ["x", "y"], Language, Vocab]) @pytest.mark.parametrize("value", [False, None, ["x", "y"], Language, Vocab])

View File

@ -327,8 +327,6 @@ def get_lang_class(lang: str) -> Type["Language"]:
match = None match = None
if match: if match:
if match != lang:
warnings.warn(Warnings.W124.format(lang=lang, renamed_lang=match))
lang = match lang = match
module = importlib.import_module(f".lang.{lang}", "spacy") module = importlib.import_module(f".lang.{lang}", "spacy")
else: else: