mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 04:40:20 +03:00
add lang check function to cli._util
This commit is contained in:
parent
49fda655c3
commit
c5abf918f4
|
@ -20,6 +20,7 @@ from ..compat import Literal
|
|||
from ..schemas import ProjectConfigSchema, validate
|
||||
from ..util import import_file, run_command, make_tempdir, registry, logger
|
||||
from ..util import is_compatible_version, SimpleFrozenDict, ENV_VARS
|
||||
from ..errors import RENAMED_LANGUAGE_CODES
|
||||
from .. import about
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -132,6 +133,16 @@ def _parse_override(value: Any) -> Any:
|
|||
return str(value)
|
||||
|
||||
|
||||
def _handle_renamed_language_codes(lang: str) -> None:
|
||||
# Throw error for renamed language codes in v4
|
||||
if lang in RENAMED_LANGUAGE_CODES:
|
||||
msg.fail(
|
||||
title="Renamed language code",
|
||||
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in spaCy v4. Update the language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||
exits=1,
|
||||
)
|
||||
|
||||
|
||||
def load_project_config(
|
||||
path: Path, interpolate: bool = True, overrides: Dict[str, Any] = SimpleFrozenDict()
|
||||
) -> Dict[str, Any]:
|
||||
|
|
|
@ -7,13 +7,12 @@ import re
|
|||
import sys
|
||||
import itertools
|
||||
|
||||
from ._util import app, Arg, Opt
|
||||
from ._util import app, Arg, Opt, _handle_renamed_language_codes
|
||||
from ..training import docs_to_json
|
||||
from ..tokens import Doc, DocBin
|
||||
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
|
||||
from ..training.converters import conllu_to_docs
|
||||
|
||||
from ..errors import RENAMED_LANGUAGE_CODES
|
||||
|
||||
# Converters are matched by file extension except for ner/iob, which are
|
||||
# matched by file extension and content. To add a converter, add a new
|
||||
|
@ -113,12 +112,7 @@ def convert(
|
|||
msg = Printer(no_print=silent)
|
||||
|
||||
# Throw error for renamed language codes in v4
|
||||
if lang in RENAMED_LANGUAGE_CODES:
|
||||
msg.fail(
|
||||
title="Renamed language code",
|
||||
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||
exits=1,
|
||||
)
|
||||
_handle_renamed_language_codes(lang)
|
||||
|
||||
ner_map = srsly.read_json(ner_map) if ner_map is not None else None
|
||||
doc_files = []
|
||||
|
|
|
@ -12,9 +12,7 @@ from ..language import DEFAULT_CONFIG_PRETRAIN_PATH
|
|||
from ..schemas import RecommendationSchema
|
||||
from ..util import SimpleFrozenList
|
||||
from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
|
||||
from ._util import string_to_list, import_code
|
||||
|
||||
from ..errors import RENAMED_LANGUAGE_CODES
|
||||
from ._util import string_to_list, import_code, _handle_renamed_language_codes
|
||||
|
||||
|
||||
ROOT = Path(__file__).parent / "templates"
|
||||
|
@ -160,13 +158,10 @@ def init_config(
|
|||
msg = Printer(no_print=silent)
|
||||
with TEMPLATE_PATH.open("r") as f:
|
||||
template = Template(f.read())
|
||||
|
||||
# Throw error for renamed language codes in v4
|
||||
if lang in RENAMED_LANGUAGE_CODES:
|
||||
msg.fail(
|
||||
title="Renamed language code",
|
||||
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||
exits=1,
|
||||
)
|
||||
_handle_renamed_language_codes(lang)
|
||||
|
||||
# Filter out duplicates since tok2vec and transformer are added by template
|
||||
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
|
||||
defaults = RECOMMENDATIONS["__default__"]
|
||||
|
|
|
@ -9,9 +9,7 @@ from .. import util
|
|||
from ..training.initialize import init_nlp, convert_vectors
|
||||
from ..language import Language
|
||||
from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
|
||||
from ._util import import_code, setup_gpu
|
||||
|
||||
from ..errors import RENAMED_LANGUAGE_CODES
|
||||
from ._util import import_code, setup_gpu, _handle_renamed_language_codes
|
||||
|
||||
|
||||
@init_cli.command("vectors")
|
||||
|
@ -33,13 +31,10 @@ def init_vectors_cli(
|
|||
a model with vectors.
|
||||
"""
|
||||
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||
|
||||
# Throw error for renamed language codes in v4
|
||||
if lang in RENAMED_LANGUAGE_CODES:
|
||||
msg.fail(
|
||||
title="Renamed language code",
|
||||
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||
exits=1,
|
||||
)
|
||||
_handle_renamed_language_codes(lang)
|
||||
|
||||
msg.info(f"Creating blank nlp object for language '{lang}'")
|
||||
nlp = util.get_lang_class(lang)()
|
||||
if jsonl_loc is not None:
|
||||
|
|
|
@ -965,10 +965,7 @@ OLD_MODEL_SHORTCUTS = {
|
|||
}
|
||||
|
||||
# Renamed language codes in v4
|
||||
RENAMED_LANGUAGE_CODES = {
|
||||
"xx":"mul", "is":"isl"
|
||||
}
|
||||
|
||||
RENAMED_LANGUAGE_CODES = {"xx":"mul", "is":"isl"}
|
||||
|
||||
# fmt: on
|
||||
|
||||
|
|
|
@ -8,9 +8,9 @@ from spacy.util import get_lang_class
|
|||
LANGUAGES = ["af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el",
|
||||
"en", "es", "et", "eu", "fa", "fi", "fr", "ga", "gu", "he", "hi",
|
||||
"hr", "hu", "hy", "id", "isl", "it", "kn", "ky", "lb", "lt", "lv",
|
||||
"mk", "ml", "mr", "nb", "ne", "nl", "pl", "pt", "ro", "ru", "sa",
|
||||
"mk", "ml", "mr", "mul", "nb", "ne", "nl", "pl", "pt", "ro", "ru", "sa",
|
||||
"si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "ti", "tl", "tn",
|
||||
"tr", "tt", "uk", "ur", "mul", "yo"]
|
||||
"tr", "tt", "uk", "ur", "yo"]
|
||||
# fmt: on
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user