diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3499042cb..9c3b92f06 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -87,13 +87,13 @@ jobs: # python.version: "3.10" Python311Linux: imageName: 'ubuntu-latest' - python.version: '3.11.0' + python.version: '3.11' Python311Windows: imageName: 'windows-latest' - python.version: '3.11.0' + python.version: '3.11' Python311Mac: imageName: 'macos-latest' - python.version: '3.11.0' + python.version: '3.11' maxParallel: 4 pool: vmImage: $(imageName) diff --git a/requirements.txt b/requirements.txt index bf96cbc54..8e90082b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ murmurhash>=0.28.0,<1.1.0 wasabi>=0.9.1,<1.1.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 -typer>=0.3.0,<0.5.0 +typer>=0.3.0,<0.8.0 cloudpathlib>=0.7.0,<0.11.0 # Third party dependencies numpy>=1.15.0 diff --git a/setup.cfg b/setup.cfg index 282aeec59..79e10b08d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,7 +51,7 @@ install_requires = srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 # Third-party dependencies - typer>=0.3.0,<0.5.0 + typer>=0.3.0,<0.8.0 cloudpathlib>=0.7.0,<0.11.0 tqdm>=4.38.0,<5.0.0 numpy>=1.15.0 diff --git a/spacy/cli/project/remote_storage.py b/spacy/cli/project/remote_storage.py index a76461b83..86786f533 100644 --- a/spacy/cli/project/remote_storage.py +++ b/spacy/cli/project/remote_storage.py @@ -12,6 +12,7 @@ from .._util import ensure_pathy, make_tempdir from ...util import get_minor_version, ENV_VARS, check_bool_env_var from ...git_info import GIT_VERSION from ... import about +from ...errors import Errors if TYPE_CHECKING: from cloudpathlib import CloudPath # noqa: F401 @@ -84,7 +85,23 @@ class RemoteStorage: with tarfile.open(tar_loc, mode=mode_string) as tar_file: # This requires that the path is added correctly, relative # to root. This is how we set things up in push() - tar_file.extractall(self.root) + + # Disallow paths outside the current directory for the tar + # file (CVE-2007-4559, directory traversal vulnerability) + def is_within_directory(directory, target): + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + prefix = os.path.commonprefix([abs_directory, abs_target]) + return prefix == abs_directory + + def safe_extract(tar, path): + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise ValueError(Errors.E852) + tar.extractall(path) + + safe_extract(tar_file, self.root) return url def find( diff --git a/spacy/errors.py b/spacy/errors.py index e0628819d..2f8a3996f 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -544,6 +544,8 @@ class Errors(metaclass=ErrorsWithCodes): "during training, make sure to include it in 'annotating components'") # New errors added in v3.x + E852 = ("The tar file pulled from the remote attempted an unsafe path " + "traversal.") E853 = ("Unsupported component factory name '{name}'. The character '.' is " "not permitted in factory names.") E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not " diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 238a768ed..4023c4456 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -155,7 +155,11 @@ class TextCategorizer(TrainablePipe): self.model = model self.name = name self._rehearsal_model = None - cfg: Dict[str, Any] = {"labels": [], "threshold": threshold, "positive_label": None} + cfg: Dict[str, Any] = { + "labels": [], + "threshold": threshold, + "positive_label": None, + } self.cfg = dict(cfg) self.scorer = scorer diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi index 617e3d19d..0a6f306a6 100644 --- a/spacy/tokens/span.pyi +++ b/spacy/tokens/span.pyi @@ -117,15 +117,13 @@ class Span: end_char: int label: int kb_id: int + id: int ent_id: int ent_id_: str @property - def id(self) -> int: ... - @property - def id_(self) -> str: ... - @property def orth_(self) -> str: ... @property def lemma_(self) -> str: ... label_: str kb_id_: str + id_: str