From bbf64cfc4391cccba447346badaacca4d42e583d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 4 Nov 2022 11:17:43 +0100 Subject: [PATCH 1/5] Auto-format code with black (#11749) Co-authored-by: explosion-bot --- spacy/pipeline/textcat.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 238a768ed..4023c4456 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -155,7 +155,11 @@ class TextCategorizer(TrainablePipe): self.model = model self.name = name self._rehearsal_model = None - cfg: Dict[str, Any] = {"labels": [], "threshold": threshold, "positive_label": None} + cfg: Dict[str, Any] = { + "labels": [], + "threshold": threshold, + "positive_label": None, + } self.cfg = dict(cfg) self.scorer = scorer From ea326cf47d5324cff14bef983b0da122b9f0d1ed Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 7 Nov 2022 08:11:13 +0100 Subject: [PATCH 2/5] Fix types for Span.id and Span.id_ (#11744) --- spacy/tokens/span.pyi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi index 617e3d19d..0a6f306a6 100644 --- a/spacy/tokens/span.pyi +++ b/spacy/tokens/span.pyi @@ -117,15 +117,13 @@ class Span: end_char: int label: int kb_id: int + id: int ent_id: int ent_id_: str @property - def id(self) -> int: ... - @property - def id_(self) -> str: ... - @property def orth_(self) -> str: ... @property def lemma_(self) -> str: ... label_: str kb_id_: str + id_: str From b76222e56adb49e33d7d0471674dfe2f207b2020 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Mon, 7 Nov 2022 16:11:55 +0900 Subject: [PATCH 3/5] Raise Typer limit (#11720) * Raise typer limit to <0.7.0 * Raise limit to <0.8.0 --- requirements.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9d6bbb2c4..d91a3b3d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ murmurhash>=0.28.0,<1.1.0 wasabi>=0.9.1,<1.1.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 -typer>=0.3.0,<0.5.0 +typer>=0.3.0,<0.8.0 pathy>=0.3.5 # Third party dependencies numpy>=1.15.0 diff --git a/setup.cfg b/setup.cfg index c2653feba..82d4d2758 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,7 +51,7 @@ install_requires = srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 # Third-party dependencies - typer>=0.3.0,<0.5.0 + typer>=0.3.0,<0.8.0 pathy>=0.3.5 tqdm>=4.38.0,<5.0.0 numpy>=1.15.0 From e91b47a22655c0384202f797e9d50d3660596d32 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 7 Nov 2022 10:43:34 +0100 Subject: [PATCH 4/5] Check for unsafe paths in tarfile.extractall (CVE-2007-4559) (#11746) * Adding tarfile member sanitization to extractall() * Format * Simplify and add error message * Fix import * Add comment about CVE Co-authored-by: TrellixVulnTeam --- spacy/cli/project/remote_storage.py | 19 ++++++++++++++++++- spacy/errors.py | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/spacy/cli/project/remote_storage.py b/spacy/cli/project/remote_storage.py index 336a4bcb3..12e252b3c 100644 --- a/spacy/cli/project/remote_storage.py +++ b/spacy/cli/project/remote_storage.py @@ -10,6 +10,7 @@ from .._util import get_hash, get_checksum, download_file, ensure_pathy from ...util import make_tempdir, get_minor_version, ENV_VARS, check_bool_env_var from ...git_info import GIT_VERSION from ... import about +from ...errors import Errors if TYPE_CHECKING: from pathy import Pathy # noqa: F401 @@ -84,7 +85,23 @@ class RemoteStorage: with tarfile.open(tar_loc, mode=mode_string) as tar_file: # This requires that the path is added correctly, relative # to root. This is how we set things up in push() - tar_file.extractall(self.root) + + # Disallow paths outside the current directory for the tar + # file (CVE-2007-4559, directory traversal vulnerability) + def is_within_directory(directory, target): + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + prefix = os.path.commonprefix([abs_directory, abs_target]) + return prefix == abs_directory + + def safe_extract(tar, path): + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise ValueError(Errors.E852) + tar.extractall(path) + + safe_extract(tar_file, self.root) return url def find( diff --git a/spacy/errors.py b/spacy/errors.py index e0628819d..2f8a3996f 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -544,6 +544,8 @@ class Errors(metaclass=ErrorsWithCodes): "during training, make sure to include it in 'annotating components'") # New errors added in v3.x + E852 = ("The tar file pulled from the remote attempted an unsafe path " + "traversal.") E853 = ("Unsupported component factory name '{name}'. The character '.' is " "not permitted in factory names.") E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not " From 6105f20d8a10a18a0e5985d310664812198840a8 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 7 Nov 2022 13:25:40 +0100 Subject: [PATCH 5/5] Switch CI to python 3.11 (#11765) --- azure-pipelines.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3499042cb..9c3b92f06 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -87,13 +87,13 @@ jobs: # python.version: "3.10" Python311Linux: imageName: 'ubuntu-latest' - python.version: '3.11.0' + python.version: '3.11' Python311Windows: imageName: 'windows-latest' - python.version: '3.11.0' + python.version: '3.11' Python311Mac: imageName: 'macos-latest' - python.version: '3.11.0' + python.version: '3.11' maxParallel: 4 pool: vmImage: $(imageName)