mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-10 15:14:56 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy into predict-cli
This commit is contained in:
commit
4c028b338e
|
@ -6,7 +6,6 @@ requires = [
|
||||||
"preshed>=3.0.2,<3.1.0",
|
"preshed>=3.0.2,<3.1.0",
|
||||||
"murmurhash>=0.28.0,<1.1.0",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
"thinc>=8.1.0,<8.2.0",
|
"thinc>=8.1.0,<8.2.0",
|
||||||
"pathy",
|
|
||||||
"numpy>=1.15.0",
|
"numpy>=1.15.0",
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# Our libraries
|
# Our libraries
|
||||||
spacy-legacy>=3.0.9,<3.1.0
|
spacy-legacy>=3.0.10,<3.1.0
|
||||||
spacy-loggers>=1.0.0,<2.0.0
|
spacy-loggers>=1.0.0,<2.0.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
|
|
|
@ -41,7 +41,7 @@ setup_requires =
|
||||||
thinc>=8.1.0,<8.2.0
|
thinc>=8.1.0,<8.2.0
|
||||||
install_requires =
|
install_requires =
|
||||||
# Our libraries
|
# Our libraries
|
||||||
spacy-legacy>=3.0.9,<3.1.0
|
spacy-legacy>=3.0.10,<3.1.0
|
||||||
spacy-loggers>=1.0.0,<2.0.0
|
spacy-loggers>=1.0.0,<2.0.0
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
|
@ -50,9 +50,9 @@ install_requires =
|
||||||
wasabi>=0.9.1,<1.1.0
|
wasabi>=0.9.1,<1.1.0
|
||||||
srsly>=2.4.3,<3.0.0
|
srsly>=2.4.3,<3.0.0
|
||||||
catalogue>=2.0.6,<2.1.0
|
catalogue>=2.0.6,<2.1.0
|
||||||
|
# Third-party dependencies
|
||||||
typer>=0.3.0,<0.5.0
|
typer>=0.3.0,<0.5.0
|
||||||
pathy>=0.3.5
|
pathy>=0.3.5
|
||||||
# Third-party dependencies
|
|
||||||
tqdm>=4.38.0,<5.0.0
|
tqdm>=4.38.0,<5.0.0
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
|
|
|
@ -31,21 +31,21 @@ def load(
|
||||||
name: Union[str, Path],
|
name: Union[str, Path],
|
||||||
*,
|
*,
|
||||||
vocab: Union[Vocab, bool] = True,
|
vocab: Union[Vocab, bool] = True,
|
||||||
disable: Iterable[str] = util.SimpleFrozenList(),
|
disable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
|
||||||
enable: Iterable[str] = util.SimpleFrozenList(),
|
enable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = util.SimpleFrozenList(),
|
exclude: Union[str, Iterable[str]] = util.SimpleFrozenList(),
|
||||||
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
||||||
) -> Language:
|
) -> Language:
|
||||||
"""Load a spaCy model from an installed package or a local path.
|
"""Load a spaCy model from an installed package or a local path.
|
||||||
|
|
||||||
name (str): Package name or model path.
|
name (str): Package name or model path.
|
||||||
vocab (Vocab): A Vocab object. If True, a vocab is created.
|
vocab (Vocab): A Vocab object. If True, a vocab is created.
|
||||||
disable (Iterable[str]): Names of pipeline components to disable. Disabled
|
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
|
||||||
pipes will be loaded but they won't be run unless you explicitly
|
pipes will be loaded but they won't be run unless you explicitly
|
||||||
enable them by calling nlp.enable_pipe.
|
enable them by calling nlp.enable_pipe.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable. All other
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
|
||||||
pipes will be disabled (but can be enabled later using nlp.enable_pipe).
|
pipes will be disabled (but can be enabled later using nlp.enable_pipe).
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
|
18
spacy/lang/la/__init__.py
Normal file
18
spacy/lang/la/__init__.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from ...language import Language, BaseDefaults
|
||||||
|
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||||
|
from .stop_words import STOP_WORDS
|
||||||
|
from .lex_attrs import LEX_ATTRS
|
||||||
|
|
||||||
|
|
||||||
|
class LatinDefaults(BaseDefaults):
|
||||||
|
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||||
|
stop_words = STOP_WORDS
|
||||||
|
lex_attr_getters = LEX_ATTRS
|
||||||
|
|
||||||
|
|
||||||
|
class Latin(Language):
|
||||||
|
lang = "la"
|
||||||
|
Defaults = LatinDefaults
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["Latin"]
|
32
spacy/lang/la/lex_attrs.py
Normal file
32
spacy/lang/la/lex_attrs.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
from ...attrs import LIKE_NUM
|
||||||
|
import re
|
||||||
|
|
||||||
|
# cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4
|
||||||
|
roman_numerals_compile = re.compile(r'(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$')
|
||||||
|
|
||||||
|
_num_words = set(
|
||||||
|
"""
|
||||||
|
unus una unum duo duae tres tria quattuor quinque sex septem octo novem decem
|
||||||
|
""".split()
|
||||||
|
)
|
||||||
|
|
||||||
|
_ordinal_words = set(
|
||||||
|
"""
|
||||||
|
primus prima primum secundus secunda secundum tertius tertia tertium
|
||||||
|
""".split()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def like_num(text):
|
||||||
|
if text.isdigit():
|
||||||
|
return True
|
||||||
|
if roman_numerals_compile.match(text):
|
||||||
|
return True
|
||||||
|
if text.lower() in _num_words:
|
||||||
|
return True
|
||||||
|
if text.lower() in _ordinal_words:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
LEX_ATTRS = {LIKE_NUM: like_num}
|
37
spacy/lang/la/stop_words.py
Normal file
37
spacy/lang/la/stop_words.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
# Corrected Perseus list, cf. https://wiki.digitalclassicist.org/Stopwords_for_Greek_and_Latin
|
||||||
|
|
||||||
|
STOP_WORDS = set(
|
||||||
|
"""
|
||||||
|
ab ac ad adhuc aliqui aliquis an ante apud at atque aut autem
|
||||||
|
|
||||||
|
cum cur
|
||||||
|
|
||||||
|
de deinde dum
|
||||||
|
|
||||||
|
ego enim ergo es est et etiam etsi ex
|
||||||
|
|
||||||
|
fio
|
||||||
|
|
||||||
|
haud hic
|
||||||
|
|
||||||
|
iam idem igitur ille in infra inter interim ipse is ita
|
||||||
|
|
||||||
|
magis modo mox
|
||||||
|
|
||||||
|
nam ne nec necque neque nisi non nos
|
||||||
|
|
||||||
|
o ob
|
||||||
|
|
||||||
|
per possum post pro
|
||||||
|
|
||||||
|
quae quam quare qui quia quicumque quidem quilibet quis quisnam quisquam quisque quisquis quo quoniam
|
||||||
|
|
||||||
|
sed si sic sive sub sui sum super suus
|
||||||
|
|
||||||
|
tam tamen trans tu tum
|
||||||
|
|
||||||
|
ubi uel uero
|
||||||
|
|
||||||
|
vel vero
|
||||||
|
""".split()
|
||||||
|
)
|
30
spacy/lang/la/tokenizer_exceptions.py
Normal file
30
spacy/lang/la/tokenizer_exceptions.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
|
from ...symbols import ORTH
|
||||||
|
from ...util import update_exc
|
||||||
|
|
||||||
|
|
||||||
|
## TODO: Look into systematically handling u/v
|
||||||
|
_exc = {
|
||||||
|
"mecum": [{ORTH: "me"}, {ORTH: "cum"}],
|
||||||
|
"tecum": [{ORTH: "te"}, {ORTH: "cum"}],
|
||||||
|
"nobiscum": [{ORTH: "nobis"}, {ORTH: "cum"}],
|
||||||
|
"vobiscum": [{ORTH: "vobis"}, {ORTH: "cum"}],
|
||||||
|
"uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
for orth in [
|
||||||
|
|
||||||
|
'A.', 'Agr.', 'Ap.', 'C.', 'Cn.', 'D.', 'F.', 'K.', 'L.', "M'.", 'M.', 'Mam.', 'N.', 'Oct.',
|
||||||
|
'Opet.', 'P.', 'Paul.', 'Post.', 'Pro.', 'Q.', 'S.', 'Ser.', 'Sert.', 'Sex.', 'St.', 'Sta.',
|
||||||
|
'T.', 'Ti.', 'V.', 'Vol.', 'Vop.', 'U.', 'Uol.', 'Uop.',
|
||||||
|
|
||||||
|
'Ian.', 'Febr.', 'Mart.', 'Apr.', 'Mai.', 'Iun.', 'Iul.', 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Nou.',
|
||||||
|
'Dec.',
|
||||||
|
|
||||||
|
'Non.', 'Id.', 'A.D.',
|
||||||
|
|
||||||
|
'Coll.', 'Cos.', 'Ord.', 'Pl.', 'S.C.', 'Suff.', 'Trib.',
|
||||||
|
]:
|
||||||
|
_exc[orth] = [{ORTH: orth}]
|
||||||
|
|
||||||
|
TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
|
|
@ -1063,7 +1063,7 @@ class Language:
|
||||||
"""
|
"""
|
||||||
if enable is None and disable is None:
|
if enable is None and disable is None:
|
||||||
raise ValueError(Errors.E991)
|
raise ValueError(Errors.E991)
|
||||||
if disable is not None and isinstance(disable, str):
|
if isinstance(disable, str):
|
||||||
disable = [disable]
|
disable = [disable]
|
||||||
if enable is not None:
|
if enable is not None:
|
||||||
if isinstance(enable, str):
|
if isinstance(enable, str):
|
||||||
|
@ -1698,9 +1698,9 @@ class Language:
|
||||||
config: Union[Dict[str, Any], Config] = {},
|
config: Union[Dict[str, Any], Config] = {},
|
||||||
*,
|
*,
|
||||||
vocab: Union[Vocab, bool] = True,
|
vocab: Union[Vocab, bool] = True,
|
||||||
disable: Iterable[str] = SimpleFrozenList(),
|
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
enable: Iterable[str] = SimpleFrozenList(),
|
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = SimpleFrozenList(),
|
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
meta: Dict[str, Any] = SimpleFrozenDict(),
|
meta: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
auto_fill: bool = True,
|
auto_fill: bool = True,
|
||||||
validate: bool = True,
|
validate: bool = True,
|
||||||
|
@ -1711,12 +1711,12 @@ class Language:
|
||||||
|
|
||||||
config (Dict[str, Any] / Config): The loaded config.
|
config (Dict[str, Any] / Config): The loaded config.
|
||||||
vocab (Vocab): A Vocab object. If True, a vocab is created.
|
vocab (Vocab): A Vocab object. If True, a vocab is created.
|
||||||
disable (Iterable[str]): Names of pipeline components to disable.
|
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
|
||||||
Disabled pipes will be loaded but they won't be run unless you
|
Disabled pipes will be loaded but they won't be run unless you
|
||||||
explicitly enable them by calling nlp.enable_pipe.
|
explicitly enable them by calling nlp.enable_pipe.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable. All other
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
|
||||||
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude.
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
|
||||||
Excluded components won't be loaded.
|
Excluded components won't be loaded.
|
||||||
meta (Dict[str, Any]): Meta overrides for nlp.meta.
|
meta (Dict[str, Any]): Meta overrides for nlp.meta.
|
||||||
auto_fill (bool): Automatically fill in missing values in config based
|
auto_fill (bool): Automatically fill in missing values in config based
|
||||||
|
@ -1727,6 +1727,12 @@ class Language:
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#from_config
|
DOCS: https://spacy.io/api/language#from_config
|
||||||
"""
|
"""
|
||||||
|
if isinstance(disable, str):
|
||||||
|
disable = [disable]
|
||||||
|
if isinstance(enable, str):
|
||||||
|
enable = [enable]
|
||||||
|
if isinstance(exclude, str):
|
||||||
|
exclude = [exclude]
|
||||||
if auto_fill:
|
if auto_fill:
|
||||||
config = Config(
|
config = Config(
|
||||||
cls.default_config, section_order=CONFIG_SECTION_ORDER
|
cls.default_config, section_order=CONFIG_SECTION_ORDER
|
||||||
|
@ -2031,25 +2037,29 @@ class Language:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_component_status(
|
def _resolve_component_status(
|
||||||
disable: Iterable[str], enable: Iterable[str], pipe_names: Collection[str]
|
disable: Union[str, Iterable[str]],
|
||||||
|
enable: Union[str, Iterable[str]],
|
||||||
|
pipe_names: Iterable[str],
|
||||||
) -> Tuple[str, ...]:
|
) -> Tuple[str, ...]:
|
||||||
"""Derives whether (1) `disable` and `enable` values are consistent and (2)
|
"""Derives whether (1) `disable` and `enable` values are consistent and (2)
|
||||||
resolves those to a single set of disabled components. Raises an error in
|
resolves those to a single set of disabled components. Raises an error in
|
||||||
case of inconsistency.
|
case of inconsistency.
|
||||||
|
|
||||||
disable (Iterable[str]): Names of components or serialization fields to disable.
|
disable (Union[str, Iterable[str]]): Name(s) of component(s) or serialization fields to disable.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable.
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable.
|
||||||
pipe_names (Iterable[str]): Names of all pipeline components.
|
pipe_names (Iterable[str]): Names of all pipeline components.
|
||||||
|
|
||||||
RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t.
|
RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t.
|
||||||
specified includes and excludes.
|
specified includes and excludes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if disable is not None and isinstance(disable, str):
|
if isinstance(disable, str):
|
||||||
disable = [disable]
|
disable = [disable]
|
||||||
to_disable = disable
|
to_disable = disable
|
||||||
|
|
||||||
if enable:
|
if enable:
|
||||||
|
if isinstance(enable, str):
|
||||||
|
enable = [enable]
|
||||||
to_disable = [
|
to_disable = [
|
||||||
pipe_name for pipe_name in pipe_names if pipe_name not in enable
|
pipe_name for pipe_name in pipe_names if pipe_name not in enable
|
||||||
]
|
]
|
||||||
|
|
|
@ -256,6 +256,11 @@ def ko_tokenizer_tokenizer():
|
||||||
return nlp.tokenizer
|
return nlp.tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def la_tokenizer():
|
||||||
|
return get_lang_class("la")().tokenizer
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def lb_tokenizer():
|
def lb_tokenizer():
|
||||||
return get_lang_class("lb")().tokenizer
|
return get_lang_class("lb")().tokenizer
|
||||||
|
|
0
spacy/tests/lang/la/__init__.py
Normal file
0
spacy/tests/lang/la/__init__.py
Normal file
7
spacy/tests/lang/la/test_exception.py
Normal file
7
spacy/tests/lang/la/test_exception.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
def test_la_tokenizer_handles_exc_in_text(la_tokenizer):
|
||||||
|
text = "scio te omnia facturum, ut nobiscum quam primum sis"
|
||||||
|
tokens = la_tokenizer(text)
|
||||||
|
assert len(tokens) == 11
|
||||||
|
assert tokens[6].text == "nobis"
|
33
spacy/tests/lang/la/test_text.py
Normal file
33
spacy/tests/lang/la/test_text.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import pytest
|
||||||
|
from spacy.lang.la.lex_attrs import like_num
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text,match",
|
||||||
|
[
|
||||||
|
("IIII", True),
|
||||||
|
("VI", True),
|
||||||
|
("vi", True),
|
||||||
|
("IV", True),
|
||||||
|
("iv", True),
|
||||||
|
("IX", True),
|
||||||
|
("ix", True),
|
||||||
|
("MMXXII", True),
|
||||||
|
("0", True),
|
||||||
|
("1", True),
|
||||||
|
("quattuor", True),
|
||||||
|
("decem", True),
|
||||||
|
("tertius", True),
|
||||||
|
("canis", False),
|
||||||
|
("MMXX11", False),
|
||||||
|
(",", False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_lex_attrs_like_number(la_tokenizer, text, match):
|
||||||
|
tokens = la_tokenizer(text)
|
||||||
|
assert len(tokens) == 1
|
||||||
|
assert tokens[0].like_num == match
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("word", ["quinque"])
|
||||||
|
def test_la_lex_attrs_capitals(word):
|
||||||
|
assert like_num(word)
|
||||||
|
assert like_num(word.upper())
|
|
@ -618,6 +618,7 @@ def test_load_disable_enable() -> None:
|
||||||
base_nlp.to_disk(tmp_dir)
|
base_nlp.to_disk(tmp_dir)
|
||||||
to_disable = ["parser", "tagger"]
|
to_disable = ["parser", "tagger"]
|
||||||
to_enable = ["tagger", "parser"]
|
to_enable = ["tagger", "parser"]
|
||||||
|
single_str = "tagger"
|
||||||
|
|
||||||
# Setting only `disable`.
|
# Setting only `disable`.
|
||||||
nlp = spacy.load(tmp_dir, disable=to_disable)
|
nlp = spacy.load(tmp_dir, disable=to_disable)
|
||||||
|
@ -632,6 +633,16 @@ def test_load_disable_enable() -> None:
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Loading with a string representing one component
|
||||||
|
nlp = spacy.load(tmp_dir, exclude=single_str)
|
||||||
|
assert single_str not in nlp.component_names
|
||||||
|
|
||||||
|
nlp = spacy.load(tmp_dir, disable=single_str)
|
||||||
|
assert single_str in nlp.component_names
|
||||||
|
assert single_str not in nlp.pipe_names
|
||||||
|
assert nlp._disabled == {single_str}
|
||||||
|
assert nlp.disabled == [single_str]
|
||||||
|
|
||||||
# Testing consistent enable/disable combination.
|
# Testing consistent enable/disable combination.
|
||||||
nlp = spacy.load(
|
nlp = spacy.load(
|
||||||
tmp_dir,
|
tmp_dir,
|
||||||
|
|
|
@ -10,7 +10,8 @@ from spacy.ml._precomputable_affine import _backprop_precomputable_affine_paddin
|
||||||
from spacy.util import dot_to_object, SimpleFrozenList, import_file
|
from spacy.util import dot_to_object, SimpleFrozenList, import_file
|
||||||
from spacy.util import to_ternary_int
|
from spacy.util import to_ternary_int
|
||||||
from thinc.api import Config, Optimizer, ConfigValidationError
|
from thinc.api import Config, Optimizer, ConfigValidationError
|
||||||
from thinc.api import set_current_ops
|
from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps
|
||||||
|
from thinc.compat import has_cupy_gpu, has_torch_mps_gpu
|
||||||
from spacy.training.batchers import minibatch_by_words
|
from spacy.training.batchers import minibatch_by_words
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.lang.nl import Dutch
|
from spacy.lang.nl import Dutch
|
||||||
|
@ -18,7 +19,6 @@ from spacy.language import DEFAULT_CONFIG_PATH
|
||||||
from spacy.schemas import ConfigSchemaTraining, TokenPattern, TokenPatternSchema
|
from spacy.schemas import ConfigSchemaTraining, TokenPattern, TokenPatternSchema
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
from thinc.api import get_current_ops, NumpyOps, CupyOps
|
|
||||||
|
|
||||||
from .util import get_random_doc, make_tempdir
|
from .util import get_random_doc, make_tempdir
|
||||||
|
|
||||||
|
@ -111,26 +111,25 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
|
||||||
|
|
||||||
def test_prefer_gpu():
|
def test_prefer_gpu():
|
||||||
current_ops = get_current_ops()
|
current_ops = get_current_ops()
|
||||||
try:
|
if has_cupy_gpu:
|
||||||
import cupy # noqa: F401
|
assert prefer_gpu()
|
||||||
|
|
||||||
prefer_gpu()
|
|
||||||
assert isinstance(get_current_ops(), CupyOps)
|
assert isinstance(get_current_ops(), CupyOps)
|
||||||
except ImportError:
|
elif has_torch_mps_gpu:
|
||||||
|
assert prefer_gpu()
|
||||||
|
assert isinstance(get_current_ops(), MPSOps)
|
||||||
|
else:
|
||||||
assert not prefer_gpu()
|
assert not prefer_gpu()
|
||||||
set_current_ops(current_ops)
|
set_current_ops(current_ops)
|
||||||
|
|
||||||
|
|
||||||
def test_require_gpu():
|
def test_require_gpu():
|
||||||
current_ops = get_current_ops()
|
current_ops = get_current_ops()
|
||||||
try:
|
if has_cupy_gpu:
|
||||||
import cupy # noqa: F401
|
|
||||||
|
|
||||||
require_gpu()
|
require_gpu()
|
||||||
assert isinstance(get_current_ops(), CupyOps)
|
assert isinstance(get_current_ops(), CupyOps)
|
||||||
except ImportError:
|
elif has_torch_mps_gpu:
|
||||||
with pytest.raises(ValueError):
|
|
||||||
require_gpu()
|
require_gpu()
|
||||||
|
assert isinstance(get_current_ops(), MPSOps)
|
||||||
set_current_ops(current_ops)
|
set_current_ops(current_ops)
|
||||||
|
|
||||||
|
|
||||||
|
|
30
spacy/tests/training/test_logger.py
Normal file
30
spacy/tests/training/test_logger.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
import pytest
|
||||||
|
import spacy
|
||||||
|
|
||||||
|
from spacy.training import loggers
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def nlp():
|
||||||
|
nlp = spacy.blank("en")
|
||||||
|
nlp.add_pipe("ner")
|
||||||
|
return nlp
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def info():
|
||||||
|
return {
|
||||||
|
"losses": {"ner": 100},
|
||||||
|
"other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80},
|
||||||
|
"epoch": 100,
|
||||||
|
"step": 125,
|
||||||
|
"score": 85,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_console_logger(nlp, info):
|
||||||
|
console_logger = loggers.console_logger(
|
||||||
|
progress_bar=True, console_output=True, output_file=None
|
||||||
|
)
|
||||||
|
log_step, finalize = console_logger(nlp)
|
||||||
|
log_step(info)
|
|
@ -1,10 +1,13 @@
|
||||||
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
|
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
|
||||||
from wasabi import Printer
|
from wasabi import Printer
|
||||||
|
from pathlib import Path
|
||||||
import tqdm
|
import tqdm
|
||||||
import sys
|
import sys
|
||||||
|
import srsly
|
||||||
|
|
||||||
from ..util import registry
|
from ..util import registry
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
from .. import util
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from ..language import Language # noqa: F401
|
from ..language import Language # noqa: F401
|
||||||
|
@ -23,13 +26,44 @@ def setup_table(
|
||||||
return final_cols, final_widths, ["r" for _ in final_widths]
|
return final_cols, final_widths, ["r" for _ in final_widths]
|
||||||
|
|
||||||
|
|
||||||
@registry.loggers("spacy.ConsoleLogger.v1")
|
@registry.loggers("spacy.ConsoleLogger.v2")
|
||||||
def console_logger(progress_bar: bool = False):
|
def console_logger(
|
||||||
|
progress_bar: bool = False,
|
||||||
|
console_output: bool = True,
|
||||||
|
output_file: Optional[Union[str, Path]] = None,
|
||||||
|
):
|
||||||
|
"""The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
|
||||||
|
progress_bar (bool): Whether the logger should print the progress bar.
|
||||||
|
console_output (bool): Whether the logger should print the logs on the console.
|
||||||
|
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
|
||||||
|
"""
|
||||||
|
_log_exist = False
|
||||||
|
if output_file:
|
||||||
|
output_file = util.ensure_path(output_file) # type: ignore
|
||||||
|
if output_file.exists(): # type: ignore
|
||||||
|
_log_exist = True
|
||||||
|
if not output_file.parents[0].exists(): # type: ignore
|
||||||
|
output_file.parents[0].mkdir(parents=True) # type: ignore
|
||||||
|
|
||||||
def setup_printer(
|
def setup_printer(
|
||||||
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
||||||
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
|
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
|
||||||
write = lambda text: print(text, file=stdout, flush=True)
|
write = lambda text: print(text, file=stdout, flush=True)
|
||||||
msg = Printer(no_print=True)
|
msg = Printer(no_print=True)
|
||||||
|
|
||||||
|
nonlocal output_file
|
||||||
|
output_stream = None
|
||||||
|
if _log_exist:
|
||||||
|
write(
|
||||||
|
msg.warn(
|
||||||
|
f"Saving logs is disabled because {output_file} already exists."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
output_file = None
|
||||||
|
elif output_file:
|
||||||
|
write(msg.info(f"Saving results to {output_file}"))
|
||||||
|
output_stream = open(output_file, "w", encoding="utf-8")
|
||||||
|
|
||||||
# ensure that only trainable components are logged
|
# ensure that only trainable components are logged
|
||||||
logged_pipes = [
|
logged_pipes = [
|
||||||
name
|
name
|
||||||
|
@ -40,6 +74,8 @@ def console_logger(progress_bar: bool = False):
|
||||||
score_weights = nlp.config["training"]["score_weights"]
|
score_weights = nlp.config["training"]["score_weights"]
|
||||||
score_cols = [col for col, value in score_weights.items() if value is not None]
|
score_cols = [col for col, value in score_weights.items() if value is not None]
|
||||||
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
|
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
|
||||||
|
|
||||||
|
if console_output:
|
||||||
spacing = 2
|
spacing = 2
|
||||||
table_header, table_widths, table_aligns = setup_table(
|
table_header, table_widths, table_aligns = setup_table(
|
||||||
cols=["E", "#"] + loss_cols + score_cols + ["Score"],
|
cols=["E", "#"] + loss_cols + score_cols + ["Score"],
|
||||||
|
@ -57,12 +93,15 @@ def console_logger(progress_bar: bool = False):
|
||||||
if progress is not None:
|
if progress is not None:
|
||||||
progress.update(1)
|
progress.update(1)
|
||||||
return
|
return
|
||||||
losses = [
|
|
||||||
"{0:.2f}".format(float(info["losses"][pipe_name]))
|
losses = []
|
||||||
for pipe_name in logged_pipes
|
log_losses = {}
|
||||||
]
|
for pipe_name in logged_pipes:
|
||||||
|
losses.append("{0:.2f}".format(float(info["losses"][pipe_name])))
|
||||||
|
log_losses[pipe_name] = float(info["losses"][pipe_name])
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
|
log_scores = {}
|
||||||
for col in score_cols:
|
for col in score_cols:
|
||||||
score = info["other_scores"].get(col, 0.0)
|
score = info["other_scores"].get(col, 0.0)
|
||||||
try:
|
try:
|
||||||
|
@ -73,6 +112,7 @@ def console_logger(progress_bar: bool = False):
|
||||||
if col != "speed":
|
if col != "speed":
|
||||||
score *= 100
|
score *= 100
|
||||||
scores.append("{0:.2f}".format(score))
|
scores.append("{0:.2f}".format(score))
|
||||||
|
log_scores[str(col)] = score
|
||||||
|
|
||||||
data = (
|
data = (
|
||||||
[info["epoch"], info["step"]]
|
[info["epoch"], info["step"]]
|
||||||
|
@ -80,10 +120,25 @@ def console_logger(progress_bar: bool = False):
|
||||||
+ scores
|
+ scores
|
||||||
+ ["{0:.2f}".format(float(info["score"]))]
|
+ ["{0:.2f}".format(float(info["score"]))]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if output_stream:
|
||||||
|
# Write to log file per log_step
|
||||||
|
log_data = {
|
||||||
|
"epoch": info["epoch"],
|
||||||
|
"step": info["step"],
|
||||||
|
"losses": log_losses,
|
||||||
|
"scores": log_scores,
|
||||||
|
"score": float(info["score"]),
|
||||||
|
}
|
||||||
|
output_stream.write(srsly.json_dumps(log_data) + "\n")
|
||||||
|
|
||||||
if progress is not None:
|
if progress is not None:
|
||||||
progress.close()
|
progress.close()
|
||||||
|
if console_output:
|
||||||
write(
|
write(
|
||||||
msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing)
|
msg.row(
|
||||||
|
data, widths=table_widths, aligns=table_aligns, spacing=spacing
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if progress_bar:
|
if progress_bar:
|
||||||
# Set disable=None, so that it disables on non-TTY
|
# Set disable=None, so that it disables on non-TTY
|
||||||
|
@ -93,7 +148,8 @@ def console_logger(progress_bar: bool = False):
|
||||||
progress.set_description(f"Epoch {info['epoch']+1}")
|
progress.set_description(f"Epoch {info['epoch']+1}")
|
||||||
|
|
||||||
def finalize() -> None:
|
def finalize() -> None:
|
||||||
pass
|
if output_stream:
|
||||||
|
output_stream.close()
|
||||||
|
|
||||||
return log_step, finalize
|
return log_step, finalize
|
||||||
|
|
||||||
|
|
|
@ -398,9 +398,9 @@ def load_model(
|
||||||
name: Union[str, Path],
|
name: Union[str, Path],
|
||||||
*,
|
*,
|
||||||
vocab: Union["Vocab", bool] = True,
|
vocab: Union["Vocab", bool] = True,
|
||||||
disable: Iterable[str] = SimpleFrozenList(),
|
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
enable: Iterable[str] = SimpleFrozenList(),
|
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = SimpleFrozenList(),
|
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Load a model from a package or data path.
|
"""Load a model from a package or data path.
|
||||||
|
@ -408,9 +408,9 @@ def load_model(
|
||||||
name (str): Package name or model path.
|
name (str): Package name or model path.
|
||||||
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
||||||
a new Vocab object will be created.
|
a new Vocab object will be created.
|
||||||
disable (Iterable[str]): Names of pipeline components to disable.
|
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable. All others will be disabled.
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled.
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude.
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
RETURNS (Language): The loaded nlp object.
|
RETURNS (Language): The loaded nlp object.
|
||||||
|
@ -440,9 +440,9 @@ def load_model_from_package(
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
vocab: Union["Vocab", bool] = True,
|
vocab: Union["Vocab", bool] = True,
|
||||||
disable: Iterable[str] = SimpleFrozenList(),
|
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
enable: Iterable[str] = SimpleFrozenList(),
|
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = SimpleFrozenList(),
|
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Load a model from an installed package.
|
"""Load a model from an installed package.
|
||||||
|
@ -450,12 +450,12 @@ def load_model_from_package(
|
||||||
name (str): The package name.
|
name (str): The package name.
|
||||||
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
||||||
a new Vocab object will be created.
|
a new Vocab object will be created.
|
||||||
disable (Iterable[str]): Names of pipeline components to disable. Disabled
|
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
|
||||||
pipes will be loaded but they won't be run unless you explicitly
|
pipes will be loaded but they won't be run unless you explicitly
|
||||||
enable them by calling nlp.enable_pipe.
|
enable them by calling nlp.enable_pipe.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable. All other
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
|
||||||
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
@ -470,9 +470,9 @@ def load_model_from_path(
|
||||||
*,
|
*,
|
||||||
meta: Optional[Dict[str, Any]] = None,
|
meta: Optional[Dict[str, Any]] = None,
|
||||||
vocab: Union["Vocab", bool] = True,
|
vocab: Union["Vocab", bool] = True,
|
||||||
disable: Iterable[str] = SimpleFrozenList(),
|
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
enable: Iterable[str] = SimpleFrozenList(),
|
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = SimpleFrozenList(),
|
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Load a model from a data directory path. Creates Language class with
|
"""Load a model from a data directory path. Creates Language class with
|
||||||
|
@ -482,12 +482,12 @@ def load_model_from_path(
|
||||||
meta (Dict[str, Any]): Optional model meta.
|
meta (Dict[str, Any]): Optional model meta.
|
||||||
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
||||||
a new Vocab object will be created.
|
a new Vocab object will be created.
|
||||||
disable (Iterable[str]): Names of pipeline components to disable. Disabled
|
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
|
||||||
pipes will be loaded but they won't be run unless you explicitly
|
pipes will be loaded but they won't be run unless you explicitly
|
||||||
enable them by calling nlp.enable_pipe.
|
enable them by calling nlp.enable_pipe.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable. All other
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
|
||||||
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
@ -516,9 +516,9 @@ def load_model_from_config(
|
||||||
*,
|
*,
|
||||||
meta: Dict[str, Any] = SimpleFrozenDict(),
|
meta: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
vocab: Union["Vocab", bool] = True,
|
vocab: Union["Vocab", bool] = True,
|
||||||
disable: Iterable[str] = SimpleFrozenList(),
|
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
enable: Iterable[str] = SimpleFrozenList(),
|
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = SimpleFrozenList(),
|
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
auto_fill: bool = False,
|
auto_fill: bool = False,
|
||||||
validate: bool = True,
|
validate: bool = True,
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
|
@ -529,12 +529,12 @@ def load_model_from_config(
|
||||||
meta (Dict[str, Any]): Optional model meta.
|
meta (Dict[str, Any]): Optional model meta.
|
||||||
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
||||||
a new Vocab object will be created.
|
a new Vocab object will be created.
|
||||||
disable (Iterable[str]): Names of pipeline components to disable. Disabled
|
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
|
||||||
pipes will be loaded but they won't be run unless you explicitly
|
pipes will be loaded but they won't be run unless you explicitly
|
||||||
enable them by calling nlp.enable_pipe.
|
enable them by calling nlp.enable_pipe.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable. All other
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
|
||||||
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
auto_fill (bool): Whether to auto-fill config with missing defaults.
|
auto_fill (bool): Whether to auto-fill config with missing defaults.
|
||||||
validate (bool): Whether to show config validation errors.
|
validate (bool): Whether to show config validation errors.
|
||||||
|
@ -616,9 +616,9 @@ def load_model_from_init_py(
|
||||||
init_file: Union[Path, str],
|
init_file: Union[Path, str],
|
||||||
*,
|
*,
|
||||||
vocab: Union["Vocab", bool] = True,
|
vocab: Union["Vocab", bool] = True,
|
||||||
disable: Iterable[str] = SimpleFrozenList(),
|
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
enable: Iterable[str] = SimpleFrozenList(),
|
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = SimpleFrozenList(),
|
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Helper function to use in the `load()` method of a model package's
|
"""Helper function to use in the `load()` method of a model package's
|
||||||
|
@ -626,12 +626,12 @@ def load_model_from_init_py(
|
||||||
|
|
||||||
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
|
||||||
a new Vocab object will be created.
|
a new Vocab object will be created.
|
||||||
disable (Iterable[str]): Names of pipeline components to disable. Disabled
|
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
|
||||||
pipes will be loaded but they won't be run unless you explicitly
|
pipes will be loaded but they won't be run unless you explicitly
|
||||||
enable them by calling nlp.enable_pipe.
|
enable them by calling nlp.enable_pipe.
|
||||||
enable (Iterable[str]): Names of pipeline components to enable. All other
|
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
|
||||||
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
|
|
@ -64,12 +64,13 @@ spaCy loads a model under the hood based on its
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
|
| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
|
| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
|
||||||
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
|
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
| `exclude` | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
|
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
|
| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
|
| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
|
||||||
| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
|
| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
|
||||||
| `validate` | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
|
| `validate` | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
|
||||||
|
@ -695,8 +696,8 @@ As of spaCy v3.0, the `disable_pipes` method has been renamed to `select_pipes`:
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | ------------------------------------------------------------------------------------------------------ |
|
| -------------- | ------------------------------------------------------------------------------------------------------ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `disable` | Name(s) of pipeline components to disable. ~~Optional[Union[str, Iterable[str]]]~~ |
|
| `disable` | Name(s) of pipeline component(s) to disable. ~~Optional[Union[str, Iterable[str]]]~~ |
|
||||||
| `enable` | Name(s) of pipeline components that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~ |
|
| `enable` | Name(s) of pipeline component(s) that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~ |
|
||||||
| **RETURNS** | The disabled pipes that can be restored by calling the object's `.restore()` method. ~~DisabledPipes~~ |
|
| **RETURNS** | The disabled pipes that can be restored by calling the object's `.restore()` method. ~~DisabledPipes~~ |
|
||||||
|
|
||||||
## Language.get_factory_meta {#get_factory_meta tag="classmethod" new="3"}
|
## Language.get_factory_meta {#get_factory_meta tag="classmethod" new="3"}
|
||||||
|
|
|
@ -248,6 +248,59 @@ added to an existing vectors table. See more details in
|
||||||
|
|
||||||
## Loggers {#loggers}
|
## Loggers {#loggers}
|
||||||
|
|
||||||
|
These functions are available from `@spacy.registry.loggers`.
|
||||||
|
|
||||||
|
### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1}
|
||||||
|
|
||||||
|
> #### Example config
|
||||||
|
>
|
||||||
|
> ```ini
|
||||||
|
> [training.logger]
|
||||||
|
> @loggers = "spacy.ConsoleLogger.v1"
|
||||||
|
> progress_bar = true
|
||||||
|
> ```
|
||||||
|
|
||||||
|
Writes the results of a training step to the console in a tabular format.
|
||||||
|
|
||||||
|
<Accordion title="Example console output" spaced>
|
||||||
|
|
||||||
|
```cli
|
||||||
|
$ python -m spacy train config.cfg
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
ℹ Using CPU
|
||||||
|
ℹ Loading config and nlp from: config.cfg
|
||||||
|
ℹ Pipeline: ['tok2vec', 'tagger']
|
||||||
|
ℹ Start training
|
||||||
|
ℹ Training. Initial learn rate: 0.0
|
||||||
|
|
||||||
|
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
||||||
|
--- ------ ------------ ----------- ------- ------
|
||||||
|
0 0 0.00 86.20 0.22 0.00
|
||||||
|
0 200 3.08 18968.78 34.00 0.34
|
||||||
|
0 400 31.81 22539.06 33.64 0.34
|
||||||
|
0 600 92.13 22794.91 43.80 0.44
|
||||||
|
0 800 183.62 21541.39 56.05 0.56
|
||||||
|
0 1000 352.49 25461.82 65.15 0.65
|
||||||
|
0 1200 422.87 23708.82 71.84 0.72
|
||||||
|
0 1400 601.92 24994.79 76.57 0.77
|
||||||
|
0 1600 662.57 22268.02 80.20 0.80
|
||||||
|
0 1800 1101.50 28413.77 82.56 0.83
|
||||||
|
0 2000 1253.43 28736.36 85.00 0.85
|
||||||
|
0 2200 1411.02 28237.53 87.42 0.87
|
||||||
|
0 2400 1605.35 28439.95 88.70 0.89
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the cumulative loss keeps increasing within one epoch, but should
|
||||||
|
start decreasing across epochs.
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| -------------- | --------------------------------------------------------- |
|
||||||
|
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
|
||||||
|
|
||||||
Logging utilities for spaCy are implemented in the
|
Logging utilities for spaCy are implemented in the
|
||||||
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
|
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
|
||||||
functions are typically available from `@spacy.registry.loggers`.
|
functions are typically available from `@spacy.registry.loggers`.
|
||||||
|
|
|
@ -46,13 +46,13 @@ specified separately using the new `exclude` keyword argument.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||||
| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
|
| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
||||||
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
|
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
| `enable` | Names of pipeline components to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~List[str]~~ |
|
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
|
| `exclude` <Tag variant="new">3</Tag> | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `config` <Tag variant="new">3</Tag> | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
|
| `config` <Tag variant="new">3</Tag> | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
|
||||||
| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
|
| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
|
||||||
|
|
||||||
|
@ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization.
|
||||||
|
|
||||||
### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}
|
### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}
|
||||||
|
|
||||||
Generate dependency parse in `{'words': [], 'arcs': []}` format.
|
Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with
|
||||||
For use with the `manual=True` argument in `displacy.render`.
|
the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}
|
### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}
|
||||||
|
|
||||||
Generate named entities in `[{start: i, end: i, label: 'label'}]` format.
|
Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For
|
||||||
For use with the `manual=True` argument in `displacy.render`.
|
use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}
|
### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}
|
||||||
|
|
||||||
Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format.
|
Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For
|
||||||
For use with the `manual=True` argument in `displacy.render`.
|
use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -451,7 +451,7 @@ factories.
|
||||||
| Registry name | Description |
|
| Registry name | Description |
|
||||||
| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `architectures` | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`. |
|
| `architectures` | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`. |
|
||||||
| `augmenters` | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators. |
|
| `augmenters` | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators. |
|
||||||
| `batchers` | Registry for training and evaluation [data batchers](#batchers). |
|
| `batchers` | Registry for training and evaluation [data batchers](#batchers). |
|
||||||
| `callbacks` | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training. |
|
| `callbacks` | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training. |
|
||||||
| `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points). |
|
| `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points). |
|
||||||
|
@ -505,7 +505,7 @@ finished. To log each training step, a
|
||||||
and the accuracy scores on the development set.
|
and the accuracy scores on the development set.
|
||||||
|
|
||||||
The built-in, default logger is the ConsoleLogger, which prints results to the
|
The built-in, default logger is the ConsoleLogger, which prints results to the
|
||||||
console in tabular format. The
|
console in tabular format and saves them to a `jsonl` file. The
|
||||||
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
||||||
a dependency of spaCy, enables other loggers, such as one that sends results to
|
a dependency of spaCy, enables other loggers, such as one that sends results to
|
||||||
a [Weights & Biases](https://www.wandb.com/) dashboard.
|
a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||||
|
@ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||||
Instead of using one of the built-in loggers, you can
|
Instead of using one of the built-in loggers, you can
|
||||||
[implement your own](/usage/training#custom-logging).
|
[implement your own](/usage/training#custom-logging).
|
||||||
|
|
||||||
#### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"}
|
#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
|
||||||
|
|
||||||
> #### Example config
|
> #### Example config
|
||||||
>
|
>
|
||||||
> ```ini
|
> ```ini
|
||||||
> [training.logger]
|
> [training.logger]
|
||||||
> @loggers = "spacy.ConsoleLogger.v1"
|
> @loggers = "spacy.ConsoleLogger.v2"
|
||||||
|
> progress_bar = true
|
||||||
|
> console_output = true
|
||||||
|
> output_file = "training_log.jsonl"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
Writes the results of a training step to the console in a tabular format.
|
Writes the results of a training step to the console in a tabular format and
|
||||||
|
saves them to a `jsonl` file.
|
||||||
|
|
||||||
<Accordion title="Example console output" spaced>
|
<Accordion title="Example console output" spaced>
|
||||||
|
|
||||||
|
@ -536,22 +540,23 @@ $ python -m spacy train config.cfg
|
||||||
ℹ Pipeline: ['tok2vec', 'tagger']
|
ℹ Pipeline: ['tok2vec', 'tagger']
|
||||||
ℹ Start training
|
ℹ Start training
|
||||||
ℹ Training. Initial learn rate: 0.0
|
ℹ Training. Initial learn rate: 0.0
|
||||||
|
ℹ Saving results to training_log.jsonl
|
||||||
|
|
||||||
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
||||||
--- ------ ------------ ----------- ------- ------
|
--- ------ ------------ ----------- ------- ------
|
||||||
1 0 0.00 86.20 0.22 0.00
|
0 0 0.00 86.20 0.22 0.00
|
||||||
1 200 3.08 18968.78 34.00 0.34
|
0 200 3.08 18968.78 34.00 0.34
|
||||||
1 400 31.81 22539.06 33.64 0.34
|
0 400 31.81 22539.06 33.64 0.34
|
||||||
1 600 92.13 22794.91 43.80 0.44
|
0 600 92.13 22794.91 43.80 0.44
|
||||||
1 800 183.62 21541.39 56.05 0.56
|
0 800 183.62 21541.39 56.05 0.56
|
||||||
1 1000 352.49 25461.82 65.15 0.65
|
0 1000 352.49 25461.82 65.15 0.65
|
||||||
1 1200 422.87 23708.82 71.84 0.72
|
0 1200 422.87 23708.82 71.84 0.72
|
||||||
1 1400 601.92 24994.79 76.57 0.77
|
0 1400 601.92 24994.79 76.57 0.77
|
||||||
1 1600 662.57 22268.02 80.20 0.80
|
0 1600 662.57 22268.02 80.20 0.80
|
||||||
1 1800 1101.50 28413.77 82.56 0.83
|
0 1800 1101.50 28413.77 82.56 0.83
|
||||||
1 2000 1253.43 28736.36 85.00 0.85
|
0 2000 1253.43 28736.36 85.00 0.85
|
||||||
1 2200 1411.02 28237.53 87.42 0.87
|
0 2200 1411.02 28237.53 87.42 0.87
|
||||||
1 2400 1605.35 28439.95 88.70 0.89
|
0 2400 1605.35 28439.95 88.70 0.89
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that the cumulative loss keeps increasing within one epoch, but should
|
Note that the cumulative loss keeps increasing within one epoch, but should
|
||||||
|
@ -559,6 +564,12 @@ start decreasing across epochs.
|
||||||
|
|
||||||
</Accordion>
|
</Accordion>
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| ---------------- | --------------------------------------------------------------------- |
|
||||||
|
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
|
||||||
|
| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
|
||||||
|
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
|
||||||
|
|
||||||
## Readers {#readers}
|
## Readers {#readers}
|
||||||
|
|
||||||
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
||||||
|
@ -1039,12 +1050,13 @@ and create a `Language` object. The model data will then be loaded in via
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `name` | Package name or path. ~~str~~ |
|
| `name` | Package name or path. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
||||||
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
|
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
|
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
|
| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
|
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
|
||||||
| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
|
| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
|
||||||
|
|
||||||
|
@ -1063,12 +1075,13 @@ A helper function to use in the `load()` method of a pipeline package's
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `init_file` | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~ |
|
| `init_file` | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `vocab` <Tag variant="new">3</Tag> | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
| `vocab` <Tag variant="new">3</Tag> | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
||||||
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
|
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
|
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
|
| `exclude` <Tag variant="new">3</Tag> | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
|
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
|
||||||
| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
|
| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user