mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 05:07:03 +03:00
Check for unsafe paths in tarfile.extractall (CVE-2007-4559) (#11746)
* Adding tarfile member sanitization to extractall() * Format * Simplify and add error message * Fix import * Add comment about CVE Co-authored-by: TrellixVulnTeam <charles.mcfarland@trellix.com>
This commit is contained in:
parent
b76222e56a
commit
e91b47a226
|
@ -10,6 +10,7 @@ from .._util import get_hash, get_checksum, download_file, ensure_pathy
|
||||||
from ...util import make_tempdir, get_minor_version, ENV_VARS, check_bool_env_var
|
from ...util import make_tempdir, get_minor_version, ENV_VARS, check_bool_env_var
|
||||||
from ...git_info import GIT_VERSION
|
from ...git_info import GIT_VERSION
|
||||||
from ... import about
|
from ... import about
|
||||||
|
from ...errors import Errors
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from pathy import Pathy # noqa: F401
|
from pathy import Pathy # noqa: F401
|
||||||
|
@ -84,7 +85,23 @@ class RemoteStorage:
|
||||||
with tarfile.open(tar_loc, mode=mode_string) as tar_file:
|
with tarfile.open(tar_loc, mode=mode_string) as tar_file:
|
||||||
# This requires that the path is added correctly, relative
|
# This requires that the path is added correctly, relative
|
||||||
# to root. This is how we set things up in push()
|
# to root. This is how we set things up in push()
|
||||||
tar_file.extractall(self.root)
|
|
||||||
|
# Disallow paths outside the current directory for the tar
|
||||||
|
# file (CVE-2007-4559, directory traversal vulnerability)
|
||||||
|
def is_within_directory(directory, target):
|
||||||
|
abs_directory = os.path.abspath(directory)
|
||||||
|
abs_target = os.path.abspath(target)
|
||||||
|
prefix = os.path.commonprefix([abs_directory, abs_target])
|
||||||
|
return prefix == abs_directory
|
||||||
|
|
||||||
|
def safe_extract(tar, path):
|
||||||
|
for member in tar.getmembers():
|
||||||
|
member_path = os.path.join(path, member.name)
|
||||||
|
if not is_within_directory(path, member_path):
|
||||||
|
raise ValueError(Errors.E852)
|
||||||
|
tar.extractall(path)
|
||||||
|
|
||||||
|
safe_extract(tar_file, self.root)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def find(
|
def find(
|
||||||
|
|
|
@ -544,6 +544,8 @@ class Errors(metaclass=ErrorsWithCodes):
|
||||||
"during training, make sure to include it in 'annotating components'")
|
"during training, make sure to include it in 'annotating components'")
|
||||||
|
|
||||||
# New errors added in v3.x
|
# New errors added in v3.x
|
||||||
|
E852 = ("The tar file pulled from the remote attempted an unsafe path "
|
||||||
|
"traversal.")
|
||||||
E853 = ("Unsupported component factory name '{name}'. The character '.' is "
|
E853 = ("Unsupported component factory name '{name}'. The character '.' is "
|
||||||
"not permitted in factory names.")
|
"not permitted in factory names.")
|
||||||
E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not "
|
E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not "
|
||||||
|
|
Loading…
Reference in New Issue
Block a user