mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-31 03:03:17 +03:00
Raise error in spacy package when model name is not a valid python identifier (#10192)
* MultiHashEmbed vector docs correction * raise error for invalid identifier as model name * more succinct error message * update success message * permitted package name + double underscore * clarify package name error * clarify underscore run message * tweak language + simplify underscore run * cleanup underscore run warning * spacing correction * Update spacy/tests/test_cli.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
3877f78ff9
commit
ee662ec381
|
@ -7,6 +7,7 @@ from collections import defaultdict
|
||||||
from catalogue import RegistryError
|
from catalogue import RegistryError
|
||||||
import srsly
|
import srsly
|
||||||
import sys
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
|
from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
|
||||||
from ..schemas import validate, ModelMetaSchema
|
from ..schemas import validate, ModelMetaSchema
|
||||||
|
@ -109,6 +110,24 @@ def package(
|
||||||
", ".join(meta["requirements"]),
|
", ".join(meta["requirements"]),
|
||||||
)
|
)
|
||||||
if name is not None:
|
if name is not None:
|
||||||
|
if not name.isidentifier():
|
||||||
|
msg.fail(
|
||||||
|
f"Model name ('{name}') is not a valid module name. "
|
||||||
|
"This is required so it can be imported as a module.",
|
||||||
|
"We recommend names that use ASCII A-Z, a-z, _ (underscore), "
|
||||||
|
"and 0-9. "
|
||||||
|
"For specific details see: https://docs.python.org/3/reference/lexical_analysis.html#identifiers",
|
||||||
|
exits=1,
|
||||||
|
)
|
||||||
|
if not _is_permitted_package_name(name):
|
||||||
|
msg.fail(
|
||||||
|
f"Model name ('{name}') is not a permitted package name. "
|
||||||
|
"This is required to correctly load the model with spacy.load.",
|
||||||
|
"We recommend names that use ASCII A-Z, a-z, _ (underscore), "
|
||||||
|
"and 0-9. "
|
||||||
|
"For specific details see: https://www.python.org/dev/peps/pep-0426/#name",
|
||||||
|
exits=1,
|
||||||
|
)
|
||||||
meta["name"] = name
|
meta["name"] = name
|
||||||
if version is not None:
|
if version is not None:
|
||||||
meta["version"] = version
|
meta["version"] = version
|
||||||
|
@ -162,7 +181,7 @@ def package(
|
||||||
imports="\n".join(f"from . import {m}" for m in imports)
|
imports="\n".join(f"from . import {m}" for m in imports)
|
||||||
)
|
)
|
||||||
create_file(package_path / "__init__.py", init_py)
|
create_file(package_path / "__init__.py", init_py)
|
||||||
msg.good(f"Successfully created package '{model_name_v}'", main_path)
|
msg.good(f"Successfully created package directory '{model_name_v}'", main_path)
|
||||||
if create_sdist:
|
if create_sdist:
|
||||||
with util.working_dir(main_path):
|
with util.working_dir(main_path):
|
||||||
util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
|
util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
|
||||||
|
@ -171,8 +190,14 @@ def package(
|
||||||
if create_wheel:
|
if create_wheel:
|
||||||
with util.working_dir(main_path):
|
with util.working_dir(main_path):
|
||||||
util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False)
|
util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False)
|
||||||
wheel = main_path / "dist" / f"{model_name_v}{WHEEL_SUFFIX}"
|
wheel_name_squashed = re.sub("_+", "_", model_name_v)
|
||||||
|
wheel = main_path / "dist" / f"{wheel_name_squashed}{WHEEL_SUFFIX}"
|
||||||
msg.good(f"Successfully created binary wheel", wheel)
|
msg.good(f"Successfully created binary wheel", wheel)
|
||||||
|
if "__" in model_name:
|
||||||
|
msg.warn(
|
||||||
|
f"Model name ('{model_name}') contains a run of underscores. "
|
||||||
|
"Runs of underscores are not significant in installed package names.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def has_wheel() -> bool:
|
def has_wheel() -> bool:
|
||||||
|
@ -422,6 +447,14 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
|
||||||
return md.text
|
return md.text
|
||||||
|
|
||||||
|
|
||||||
|
def _is_permitted_package_name(package_name: str) -> bool:
|
||||||
|
# regex from: https://www.python.org/dev/peps/pep-0426/#name
|
||||||
|
permitted_match = re.search(
|
||||||
|
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", package_name, re.IGNORECASE
|
||||||
|
)
|
||||||
|
return permitted_match is not None
|
||||||
|
|
||||||
|
|
||||||
TEMPLATE_SETUP = """
|
TEMPLATE_SETUP = """
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import io
|
import io
|
||||||
|
|
|
@ -17,6 +17,7 @@ from spacy.cli.debug_data import _get_labels_from_spancat
|
||||||
from spacy.cli.download import get_compatibility, get_version
|
from spacy.cli.download import get_compatibility, get_version
|
||||||
from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
|
from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
|
||||||
from spacy.cli.package import get_third_party_dependencies
|
from spacy.cli.package import get_third_party_dependencies
|
||||||
|
from spacy.cli.package import _is_permitted_package_name
|
||||||
from spacy.cli.validate import get_model_pkgs
|
from spacy.cli.validate import get_model_pkgs
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.lang.nl import Dutch
|
from spacy.lang.nl import Dutch
|
||||||
|
@ -695,6 +696,17 @@ def test_get_labels_from_model(factory_name, pipe_name):
|
||||||
assert _get_labels_from_model(nlp, factory_name) == set(labels)
|
assert _get_labels_from_model(nlp, factory_name) == set(labels)
|
||||||
|
|
||||||
|
|
||||||
|
def test_permitted_package_names():
|
||||||
|
# https://www.python.org/dev/peps/pep-0426/#name
|
||||||
|
assert _is_permitted_package_name("Meine_Bäume") == False
|
||||||
|
assert _is_permitted_package_name("_package") == False
|
||||||
|
assert _is_permitted_package_name("package_") == False
|
||||||
|
assert _is_permitted_package_name(".package") == False
|
||||||
|
assert _is_permitted_package_name("package.") == False
|
||||||
|
assert _is_permitted_package_name("-package") == False
|
||||||
|
assert _is_permitted_package_name("package-") == False
|
||||||
|
|
||||||
|
|
||||||
def test_debug_data_compile_gold():
|
def test_debug_data_compile_gold():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
pred = Doc(nlp.vocab, words=["Token", ".", "New", "York", "City"])
|
pred = Doc(nlp.vocab, words=["Token", ".", "New", "York", "City"])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user