From e91b47a22655c0384202f797e9d50d3660596d32 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 7 Nov 2022 10:43:34 +0100 Subject: [PATCH] Check for unsafe paths in tarfile.extractall (CVE-2007-4559) (#11746) * Adding tarfile member sanitization to extractall() * Format * Simplify and add error message * Fix import * Add comment about CVE Co-authored-by: TrellixVulnTeam --- spacy/cli/project/remote_storage.py | 19 ++++++++++++++++++- spacy/errors.py | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/spacy/cli/project/remote_storage.py b/spacy/cli/project/remote_storage.py index 336a4bcb3..12e252b3c 100644 --- a/spacy/cli/project/remote_storage.py +++ b/spacy/cli/project/remote_storage.py @@ -10,6 +10,7 @@ from .._util import get_hash, get_checksum, download_file, ensure_pathy from ...util import make_tempdir, get_minor_version, ENV_VARS, check_bool_env_var from ...git_info import GIT_VERSION from ... import about +from ...errors import Errors if TYPE_CHECKING: from pathy import Pathy # noqa: F401 @@ -84,7 +85,23 @@ class RemoteStorage: with tarfile.open(tar_loc, mode=mode_string) as tar_file: # This requires that the path is added correctly, relative # to root. This is how we set things up in push() - tar_file.extractall(self.root) + + # Disallow paths outside the current directory for the tar + # file (CVE-2007-4559, directory traversal vulnerability) + def is_within_directory(directory, target): + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + prefix = os.path.commonprefix([abs_directory, abs_target]) + return prefix == abs_directory + + def safe_extract(tar, path): + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise ValueError(Errors.E852) + tar.extractall(path) + + safe_extract(tar_file, self.root) return url def find( diff --git a/spacy/errors.py b/spacy/errors.py index e0628819d..2f8a3996f 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -544,6 +544,8 @@ class Errors(metaclass=ErrorsWithCodes): "during training, make sure to include it in 'annotating components'") # New errors added in v3.x + E852 = ("The tar file pulled from the remote attempted an unsafe path " + "traversal.") E853 = ("Unsupported component factory name '{name}'. The character '.' is " "not permitted in factory names.") E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not "