mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-07 21:54:54 +03:00
Merge branch 'master' into feature/use-cloudpathlib
This commit is contained in:
commit
6c94e02192
|
@ -87,13 +87,13 @@ jobs:
|
||||||
# python.version: "3.10"
|
# python.version: "3.10"
|
||||||
Python311Linux:
|
Python311Linux:
|
||||||
imageName: 'ubuntu-latest'
|
imageName: 'ubuntu-latest'
|
||||||
python.version: '3.11.0'
|
python.version: '3.11'
|
||||||
Python311Windows:
|
Python311Windows:
|
||||||
imageName: 'windows-latest'
|
imageName: 'windows-latest'
|
||||||
python.version: '3.11.0'
|
python.version: '3.11'
|
||||||
Python311Mac:
|
Python311Mac:
|
||||||
imageName: 'macos-latest'
|
imageName: 'macos-latest'
|
||||||
python.version: '3.11.0'
|
python.version: '3.11'
|
||||||
maxParallel: 4
|
maxParallel: 4
|
||||||
pool:
|
pool:
|
||||||
vmImage: $(imageName)
|
vmImage: $(imageName)
|
||||||
|
|
|
@ -9,7 +9,7 @@ murmurhash>=0.28.0,<1.1.0
|
||||||
wasabi>=0.9.1,<1.1.0
|
wasabi>=0.9.1,<1.1.0
|
||||||
srsly>=2.4.3,<3.0.0
|
srsly>=2.4.3,<3.0.0
|
||||||
catalogue>=2.0.6,<2.1.0
|
catalogue>=2.0.6,<2.1.0
|
||||||
typer>=0.3.0,<0.5.0
|
typer>=0.3.0,<0.8.0
|
||||||
cloudpathlib>=0.7.0,<0.11.0
|
cloudpathlib>=0.7.0,<0.11.0
|
||||||
# Third party dependencies
|
# Third party dependencies
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
|
|
|
@ -51,7 +51,7 @@ install_requires =
|
||||||
srsly>=2.4.3,<3.0.0
|
srsly>=2.4.3,<3.0.0
|
||||||
catalogue>=2.0.6,<2.1.0
|
catalogue>=2.0.6,<2.1.0
|
||||||
# Third-party dependencies
|
# Third-party dependencies
|
||||||
typer>=0.3.0,<0.5.0
|
typer>=0.3.0,<0.8.0
|
||||||
cloudpathlib>=0.7.0,<0.11.0
|
cloudpathlib>=0.7.0,<0.11.0
|
||||||
tqdm>=4.38.0,<5.0.0
|
tqdm>=4.38.0,<5.0.0
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
|
|
|
@ -12,6 +12,7 @@ from .._util import ensure_pathy, make_tempdir
|
||||||
from ...util import get_minor_version, ENV_VARS, check_bool_env_var
|
from ...util import get_minor_version, ENV_VARS, check_bool_env_var
|
||||||
from ...git_info import GIT_VERSION
|
from ...git_info import GIT_VERSION
|
||||||
from ... import about
|
from ... import about
|
||||||
|
from ...errors import Errors
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from cloudpathlib import CloudPath # noqa: F401
|
from cloudpathlib import CloudPath # noqa: F401
|
||||||
|
@ -84,7 +85,23 @@ class RemoteStorage:
|
||||||
with tarfile.open(tar_loc, mode=mode_string) as tar_file:
|
with tarfile.open(tar_loc, mode=mode_string) as tar_file:
|
||||||
# This requires that the path is added correctly, relative
|
# This requires that the path is added correctly, relative
|
||||||
# to root. This is how we set things up in push()
|
# to root. This is how we set things up in push()
|
||||||
tar_file.extractall(self.root)
|
|
||||||
|
# Disallow paths outside the current directory for the tar
|
||||||
|
# file (CVE-2007-4559, directory traversal vulnerability)
|
||||||
|
def is_within_directory(directory, target):
|
||||||
|
abs_directory = os.path.abspath(directory)
|
||||||
|
abs_target = os.path.abspath(target)
|
||||||
|
prefix = os.path.commonprefix([abs_directory, abs_target])
|
||||||
|
return prefix == abs_directory
|
||||||
|
|
||||||
|
def safe_extract(tar, path):
|
||||||
|
for member in tar.getmembers():
|
||||||
|
member_path = os.path.join(path, member.name)
|
||||||
|
if not is_within_directory(path, member_path):
|
||||||
|
raise ValueError(Errors.E852)
|
||||||
|
tar.extractall(path)
|
||||||
|
|
||||||
|
safe_extract(tar_file, self.root)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def find(
|
def find(
|
||||||
|
|
|
@ -544,6 +544,8 @@ class Errors(metaclass=ErrorsWithCodes):
|
||||||
"during training, make sure to include it in 'annotating components'")
|
"during training, make sure to include it in 'annotating components'")
|
||||||
|
|
||||||
# New errors added in v3.x
|
# New errors added in v3.x
|
||||||
|
E852 = ("The tar file pulled from the remote attempted an unsafe path "
|
||||||
|
"traversal.")
|
||||||
E853 = ("Unsupported component factory name '{name}'. The character '.' is "
|
E853 = ("Unsupported component factory name '{name}'. The character '.' is "
|
||||||
"not permitted in factory names.")
|
"not permitted in factory names.")
|
||||||
E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not "
|
E854 = ("Unable to set doc.ents. Check that the 'ents_filter' does not "
|
||||||
|
|
|
@ -155,7 +155,11 @@ class TextCategorizer(TrainablePipe):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.name = name
|
self.name = name
|
||||||
self._rehearsal_model = None
|
self._rehearsal_model = None
|
||||||
cfg: Dict[str, Any] = {"labels": [], "threshold": threshold, "positive_label": None}
|
cfg: Dict[str, Any] = {
|
||||||
|
"labels": [],
|
||||||
|
"threshold": threshold,
|
||||||
|
"positive_label": None,
|
||||||
|
}
|
||||||
self.cfg = dict(cfg)
|
self.cfg = dict(cfg)
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
|
|
||||||
|
|
|
@ -117,15 +117,13 @@ class Span:
|
||||||
end_char: int
|
end_char: int
|
||||||
label: int
|
label: int
|
||||||
kb_id: int
|
kb_id: int
|
||||||
|
id: int
|
||||||
ent_id: int
|
ent_id: int
|
||||||
ent_id_: str
|
ent_id_: str
|
||||||
@property
|
@property
|
||||||
def id(self) -> int: ...
|
|
||||||
@property
|
|
||||||
def id_(self) -> str: ...
|
|
||||||
@property
|
|
||||||
def orth_(self) -> str: ...
|
def orth_(self) -> str: ...
|
||||||
@property
|
@property
|
||||||
def lemma_(self) -> str: ...
|
def lemma_(self) -> str: ...
|
||||||
label_: str
|
label_: str
|
||||||
kb_id_: str
|
kb_id_: str
|
||||||
|
id_: str
|
||||||
|
|
Loading…
Reference in New Issue
Block a user