Merge branch 'master' into feature/use-cloudpathlib

This commit is contained in:
Adriane Boyd 2022-11-07 13:41:39 +01:00 committed by GitHub
commit 6c94e02192
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 32 additions and 11 deletions

View File

@ -87,13 +87,13 @@ jobs:
# python.version: "3.10"
Python311Linux:
imageName: 'ubuntu-latest'
python.version: '3.11.0'
python.version: '3.11'
Python311Windows:
imageName: 'windows-latest'
python.version: '3.11.0'
python.version: '3.11'
Python311Mac:
imageName: 'macos-latest'
python.version: '3.11.0'
python.version: '3.11'
maxParallel: 4
pool:
vmImage: $(imageName)

View File

@ -9,7 +9,7 @@ murmurhash>=0.28.0,<1.1.0
wasabi>=0.9.1,<1.1.0
srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
typer>=0.3.0,<0.5.0
typer>=0.3.0,<0.8.0
cloudpathlib>=0.7.0,<0.11.0
# Third party dependencies
numpy>=1.15.0

View File

@ -51,7 +51,7 @@ install_requires =
srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
# Third-party dependencies
typer>=0.3.0,<0.5.0
typer>=0.3.0,<0.8.0
cloudpathlib>=0.7.0,<0.11.0
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0

View File

@ -12,6 +12,7 @@ from .._util import ensure_pathy, make_tempdir
from ...util import get_minor_version, ENV_VARS, check_bool_env_var
from ...git_info import GIT_VERSION
from ... import about
from ...errors import Errors
if TYPE_CHECKING:
from cloudpathlib import CloudPath # noqa: F401
@ -84,7 +85,23 @@ class RemoteStorage:
with tarfile.open(tar_loc, mode=mode_string) as tar_file:
# This requires that the path is added correctly, relative
# to root. This is how we set things up in push()
tar_file.extractall(self.root)
# Disallow paths outside the current directory for the tar
# file (CVE-2007-4559, directory traversal vulnerability)
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
def safe_extract(tar, path):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise ValueError(Errors.E852)
tar.extractall(path)
safe_extract(tar_file, self.root)
return url
def find(

View File

@ -544,6 +544,8 @@ class Errors(metaclass=ErrorsWithCodes):
"during training, make sure to include it in 'annotating components'")
# New errors added in v3.x
E852 = ("The tar file pulled from the remote attempted an unsafe path "
"traversal.")
E853 = ("Unsupported component factory name '{name}'. The character '.' is "
"not permitted in factory names.")
E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not "

View File

@ -155,7 +155,11 @@ class TextCategorizer(TrainablePipe):
self.model = model
self.name = name
self._rehearsal_model = None
cfg: Dict[str, Any] = {"labels": [], "threshold": threshold, "positive_label": None}
cfg: Dict[str, Any] = {
"labels": [],
"threshold": threshold,
"positive_label": None,
}
self.cfg = dict(cfg)
self.scorer = scorer

View File

@ -117,15 +117,13 @@ class Span:
end_char: int
label: int
kb_id: int
id: int
ent_id: int
ent_id_: str
@property
def id(self) -> int: ...
@property
def id_(self) -> str: ...
@property
def orth_(self) -> str: ...
@property
def lemma_(self) -> str: ...
label_: str
kb_id_: str
id_: str