From 3ce5be4b76da3435e58dd6fd5d2f30bf6d575789 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 28 Aug 2020 15:20:14 +0200 Subject: [PATCH] Allow loaded but disabled components --- spacy/__init__.py | 9 +- spacy/default_config.cfg | 1 + spacy/language.py | 168 ++++++++++++------ spacy/schemas.py | 1 + spacy/tests/pipeline/test_pipe_methods.py | 63 +++++++ spacy/tests/regression/test_issue4501-5000.py | 1 + .../serialize/test_serialize_pipeline.py | 33 ++++ spacy/util.py | 53 ++++-- 8 files changed, 259 insertions(+), 70 deletions(-) diff --git a/spacy/__init__.py b/spacy/__init__.py index ed53787f2..8e9c8db69 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -28,17 +28,22 @@ if sys.maxunicode == 65535: def load( name: Union[str, Path], disable: Iterable[str] = tuple(), + exclude: Iterable[str] = tuple(), config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(), ) -> Language: """Load a spaCy model from an installed package or a local path. name (str): Package name or model path. - disable (Iterable[str]): Names of pipeline components to disable. + disable (Iterable[str]): Names of pipeline components to disable. Disabled + pipes will be loaded but they won't be run unless you explicitly + enable them by calling nlp.enable_pipe. + exclude (Iterable[str]): Names of pipeline components to exclude. Excluded + components won't be loaded. config (Dict[str, Any] / Config): Config overrides as nested dict or dict keyed by section values in dot notation. RETURNS (Language): The loaded nlp object. """ - return util.load_model(name, disable=disable, config=config) + return util.load_model(name, disable=disable, exclude=exclude, config=config) def blank(name: str, **overrides) -> Language: diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg index 7db23ec1c..d76ef630d 100644 --- a/spacy/default_config.cfg +++ b/spacy/default_config.cfg @@ -11,6 +11,7 @@ use_pytorch_for_gpu_memory = false [nlp] lang = null pipeline = [] +disabled = [] load_vocab_data = true before_creation = null after_creation = null diff --git a/spacy/language.py b/spacy/language.py index bac90af85..45f6eb493 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -6,7 +6,7 @@ import itertools import weakref import functools from contextlib import contextmanager -from copy import copy, deepcopy +from copy import deepcopy from pathlib import Path import warnings from thinc.api import get_current_ops, Config, require_gpu, Optimizer @@ -159,7 +159,8 @@ class Language: self.vocab: Vocab = vocab if self.lang is None: self.lang = self.vocab.lang - self.pipeline = [] + self._pipeline = [] + self._disabled = set() self.max_length = max_length self.resolved = {} # Create the default tokenizer from the default config @@ -210,6 +211,7 @@ class Language: # TODO: Adding this back to prevent breaking people's code etc., but # we should consider removing it self._meta["pipeline"] = self.pipe_names + self._meta["disabled"] = list(self._disabled) return self._meta @meta.setter @@ -232,13 +234,14 @@ class Language: # we can populate the config again later pipeline = {} score_weights = [] - for pipe_name in self.pipe_names: + for pipe_name in self._pipe_names: pipe_meta = self.get_pipe_meta(pipe_name) pipe_config = self.get_pipe_config(pipe_name) pipeline[pipe_name] = {"factory": pipe_meta.factory, **pipe_config} if pipe_meta.default_score_weights: score_weights.append(pipe_meta.default_score_weights) - self._config["nlp"]["pipeline"] = self.pipe_names + self._config["nlp"]["pipeline"] = self._pipe_names + self._config["nlp"]["disabled"] = list(self._disabled) self._config["components"] = pipeline self._config["training"]["score_weights"] = combine_score_weights(score_weights) if not srsly.is_json_serializable(self._config): @@ -257,9 +260,30 @@ class Language: """ return list(self.factories.keys()) + @property + def _pipe_names(self) -> List[str]: + """Get the names of the available pipeline components. Includes all + active and inactive pipeline components. + + RETURNS (List[str]): List of component name strings, in order. + """ + # TODO: Should we make this available via a user-facing property? (The + # underscore distinction works well internally) + return [pipe_name for pipe_name, _ in self._pipeline] + + @property + def pipeline(self) -> List[Tuple[str, Callable[[Doc], Doc]]]: + """The processing pipeline consisting of (name, component) tuples. The + components are called on the Doc in order as it passes through the + pipeline. + + RETURNS (List[Tuple[str, Callable[[Doc], Doc]]]): The pipeline. + """ + return [(name, p) for name, p in self._pipeline if name not in self._disabled] + @property def pipe_names(self) -> List[str]: - """Get names of available pipeline components. + """Get names of available active pipeline components. RETURNS (List[str]): List of component name strings, in order. """ @@ -272,7 +296,7 @@ class Language: RETURNS (Dict[str, str]): Factory names, keyed by component names. """ factories = {} - for pipe_name, pipe in self.pipeline: + for pipe_name, pipe in self._pipeline: factories[pipe_name] = self.get_pipe_meta(pipe_name).factory return factories @@ -284,7 +308,7 @@ class Language: RETURNS (Dict[str, List[str]]): Labels keyed by component name. """ labels = {} - for name, pipe in self.pipeline: + for name, pipe in self._pipeline: if hasattr(pipe, "labels"): labels[name] = list(pipe.labels) return labels @@ -512,10 +536,10 @@ class Language: DOCS: https://spacy.io/api/language#get_pipe """ - for pipe_name, component in self.pipeline: + for pipe_name, component in self._pipeline: if pipe_name == name: return component - raise KeyError(Errors.E001.format(name=name, opts=self.pipe_names)) + raise KeyError(Errors.E001.format(name=name, opts=self._pipe_names)) def create_pipe( self, @@ -660,8 +684,8 @@ class Language: err = Errors.E966.format(component=bad_val, name=name) raise ValueError(err) name = name if name is not None else factory_name - if name in self.pipe_names: - raise ValueError(Errors.E007.format(name=name, opts=self.pipe_names)) + if name in self._pipe_names: + raise ValueError(Errors.E007.format(name=name, opts=self._pipe_names)) if source is not None: # We're loading the component from a model. After loading the # component, we know its real factory name @@ -686,7 +710,7 @@ class Language: ) pipe_index = self._get_pipe_index(before, after, first, last) self._pipe_meta[name] = self.get_factory_meta(factory_name) - self.pipeline.insert(pipe_index, (name, pipe_component)) + self._pipeline.insert(pipe_index, (name, pipe_component)) return pipe_component def _get_pipe_index( @@ -707,32 +731,34 @@ class Language: """ all_args = {"before": before, "after": after, "first": first, "last": last} if sum(arg is not None for arg in [before, after, first, last]) >= 2: - raise ValueError(Errors.E006.format(args=all_args, opts=self.pipe_names)) + raise ValueError(Errors.E006.format(args=all_args, opts=self._pipe_names)) if last or not any(value is not None for value in [first, before, after]): - return len(self.pipeline) + return len(self._pipeline) elif first: return 0 elif isinstance(before, str): - if before not in self.pipe_names: - raise ValueError(Errors.E001.format(name=before, opts=self.pipe_names)) - return self.pipe_names.index(before) + if before not in self._pipe_names: + raise ValueError(Errors.E001.format(name=before, opts=self._pipe_names)) + return self._pipe_names.index(before) elif isinstance(after, str): - if after not in self.pipe_names: - raise ValueError(Errors.E001.format(name=after, opts=self.pipe_names)) - return self.pipe_names.index(after) + 1 + if after not in self._pipe_names: + raise ValueError(Errors.E001.format(name=after, opts=self._pipe_names)) + return self._pipe_names.index(after) + 1 # We're only accepting indices referring to components that exist # (can't just do isinstance here because bools are instance of int, too) elif type(before) == int: - if before >= len(self.pipeline) or before < 0: - err = Errors.E959.format(dir="before", idx=before, opts=self.pipe_names) + if before >= len(self._pipeline) or before < 0: + err = Errors.E959.format( + dir="before", idx=before, opts=self._pipe_names + ) raise ValueError(err) return before elif type(after) == int: - if after >= len(self.pipeline) or after < 0: - err = Errors.E959.format(dir="after", idx=after, opts=self.pipe_names) + if after >= len(self._pipeline) or after < 0: + err = Errors.E959.format(dir="after", idx=after, opts=self._pipe_names) raise ValueError(err) return after + 1 - raise ValueError(Errors.E006.format(args=all_args, opts=self.pipe_names)) + raise ValueError(Errors.E006.format(args=all_args, opts=self._pipe_names)) def has_pipe(self, name: str) -> bool: """Check if a component name is present in the pipeline. Equivalent to @@ -773,7 +799,7 @@ class Language: # to Language.pipeline to make sure the configs are handled correctly pipe_index = self.pipe_names.index(name) self.remove_pipe(name) - if not len(self.pipeline) or pipe_index == len(self.pipeline): + if not len(self._pipeline) or pipe_index == len(self._pipeline): # we have no components to insert before/after, or we're replacing the last component self.add_pipe(factory_name, name=name, config=config, validate=validate) else: @@ -793,12 +819,12 @@ class Language: DOCS: https://spacy.io/api/language#rename_pipe """ - if old_name not in self.pipe_names: - raise ValueError(Errors.E001.format(name=old_name, opts=self.pipe_names)) - if new_name in self.pipe_names: - raise ValueError(Errors.E007.format(name=new_name, opts=self.pipe_names)) - i = self.pipe_names.index(old_name) - self.pipeline[i] = (new_name, self.pipeline[i][1]) + if old_name not in self._pipe_names: + raise ValueError(Errors.E001.format(name=old_name, opts=self._pipe_names)) + if new_name in self._pipe_names: + raise ValueError(Errors.E007.format(name=new_name, opts=self._pipe_names)) + i = self._pipe_names.index(old_name) + self._pipeline[i] = (new_name, self._pipeline[i][1]) self._pipe_meta[new_name] = self._pipe_meta.pop(old_name) self._pipe_configs[new_name] = self._pipe_configs.pop(old_name) @@ -810,15 +836,41 @@ class Language: DOCS: https://spacy.io/api/language#remove_pipe """ - if name not in self.pipe_names: - raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names)) - removed = self.pipeline.pop(self.pipe_names.index(name)) + if name not in self._pipe_names: + raise ValueError(Errors.E001.format(name=name, opts=self._pipe_names)) + removed = self._pipeline.pop(self._pipe_names.index(name)) # We're only removing the component itself from the metas/configs here # because factory may be used for something else self._pipe_meta.pop(name) self._pipe_configs.pop(name) + # Make sure the name is also removed from the set of disabled components + if name in self._disabled: + self._disabled.remove(name) return removed + def disable_pipe(self, name: str) -> None: + """Disable a pipeline component. The component will still exist on + the nlp object, but it won't be run as part of the pipeline. + + name (str): The name of the component to disable. + """ + if name not in self._pipe_names: + raise ValueError(Errors.E001.format(name=name, opts=self._pipe_names)) + # TODO: should we raise if pipe is already disabled? + self._disabled.add(name) + + def enable_pipe(self, name: str) -> None: + """Enable a previously disabled pipeline component so it's run as part + of the pipeline. + + name (str): The name of the component to enable. + """ + if name not in self._pipe_names: + raise ValueError(Errors.E001.format(name=name, opts=self._pipe_names)) + # TODO: should we raise if pipe is already enabled? + if name in self._disabled: + self._disabled.remove(name) + def __call__( self, text: str, @@ -1366,6 +1418,7 @@ class Language: *, vocab: Union[Vocab, bool] = True, disable: Iterable[str] = tuple(), + exclude: Iterable[str] = tuple(), auto_fill: bool = True, validate: bool = True, ) -> "Language": @@ -1375,7 +1428,11 @@ class Language: config (Dict[str, Any] / Config): The loaded config. vocab (Vocab): A Vocab object. If True, a vocab is created. - disable (Iterable[str]): List of pipeline component names to disable. + disable (Iterable[str]): Names of pipeline components to disable. + Disabled pipes will be loaded but they won't be run unless you + explicitly enable them by calling nlp.enable_pipe. + exclude (Iterable[str]): Names of pipeline components to exclude. + Excluded components won't be loaded. auto_fill (bool): Automatically fill in missing values in config based on defaults and function argument annotations. validate (bool): Validate the component config and arguments against @@ -1448,7 +1505,7 @@ class Language: raise ValueError(Errors.E956.format(name=pipe_name, opts=opts)) pipe_cfg = util.copy_config(pipeline[pipe_name]) raw_config = Config(filled["components"][pipe_name]) - if pipe_name not in disable: + if pipe_name not in exclude: if "factory" not in pipe_cfg and "source" not in pipe_cfg: err = Errors.E984.format(name=pipe_name, config=pipe_cfg) raise ValueError(err) @@ -1473,6 +1530,8 @@ class Language: ) source_name = pipe_cfg.get("component", pipe_name) nlp.add_pipe(source_name, source=source_nlps[model], name=pipe_name) + disabled_pipes = [*config["nlp"]["disabled"], *disable] + nlp._disabled = set(p for p in disabled_pipes if p not in exclude) nlp.config = filled if auto_fill else config nlp.resolved = resolved if after_pipeline_creation is not None: @@ -1502,9 +1561,10 @@ class Language: ) serializers["meta.json"] = lambda p: srsly.write_json(p, self.meta) serializers["config.cfg"] = lambda p: self.config.to_disk(p) - for name, proc in self.pipeline: - if not hasattr(proc, "name"): - continue + for name, proc in self._pipeline: + # TODO: why did we add this? + # if not hasattr(proc, "name"): + # continue if name in exclude: continue if not hasattr(proc, "to_disk"): @@ -1548,7 +1608,7 @@ class Language: deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk( p, exclude=["vocab"] ) - for name, proc in self.pipeline: + for name, proc in self._pipeline: if name in exclude: continue if not hasattr(proc, "from_disk"): @@ -1577,7 +1637,7 @@ class Language: serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"]) serializers["meta.json"] = lambda: srsly.json_dumps(self.meta) serializers["config.cfg"] = lambda: self.config.to_bytes() - for name, proc in self.pipeline: + for name, proc in self._pipeline: if name in exclude: continue if not hasattr(proc, "to_bytes"): @@ -1611,7 +1671,7 @@ class Language: deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes( b, exclude=["vocab"] ) - for name, proc in self.pipeline: + for name, proc in self._pipeline: if name in exclude: continue if not hasattr(proc, "from_bytes"): @@ -1647,14 +1707,10 @@ class DisabledPipes(list): def __init__(self, nlp: Language, names: List[str]) -> None: self.nlp = nlp self.names = names - # Important! Not deep copy -- we just want the container (but we also - # want to support people providing arbitrarily typed nlp.pipeline - # objects.) - self.original_pipeline = copy(nlp.pipeline) - self.metas = {name: nlp.get_pipe_meta(name) for name in names} - self.configs = {name: nlp.get_pipe_config(name) for name in names} + for name in self.names: + self.nlp.disable_pipe(name) list.__init__(self) - self.extend(nlp.remove_pipe(name) for name in names) + self.extend(self.names) def __enter__(self): return self @@ -1664,14 +1720,10 @@ class DisabledPipes(list): def restore(self) -> None: """Restore the pipeline to its state when DisabledPipes was created.""" - current, self.nlp.pipeline = self.nlp.pipeline, self.original_pipeline - unexpected = [name for name, pipe in current if not self.nlp.has_pipe(name)] - if unexpected: - # Don't change the pipeline if we're raising an error. - self.nlp.pipeline = current - raise ValueError(Errors.E008.format(names=unexpected)) - self.nlp._pipe_meta.update(self.metas) - self.nlp._pipe_configs.update(self.configs) + for name in self.names: + self.nlp.enable_pipe(name) + # TODO: maybe add some more checks / catch errors that may occur if + # user removes a disabled pipe in the with block self[:] = [] diff --git a/spacy/schemas.py b/spacy/schemas.py index 399a25932..be8db6a99 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -223,6 +223,7 @@ class ConfigSchemaNlp(BaseModel): # fmt: off lang: StrictStr = Field(..., title="The base language to use") pipeline: List[StrictStr] = Field(..., title="The pipeline component names in order") + disabled: List[StrictStr] = Field(..., title="Pipeline components to disable by default") tokenizer: Callable = Field(..., title="The tokenizer to use") load_vocab_data: StrictBool = Field(..., title="Whether to load additional vocab data from spacy-lookups-data") before_creation: Optional[Callable[[Type["Language"]], Type["Language"]]] = Field(..., title="Optional callback to modify Language class before initialization") diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py index 2099964a6..cb91f546f 100644 --- a/spacy/tests/pipeline/test_pipe_methods.py +++ b/spacy/tests/pipeline/test_pipe_methods.py @@ -249,3 +249,66 @@ def test_add_pipe_before_after(): nlp.add_pipe("entity_ruler", before=True) with pytest.raises(ValueError): nlp.add_pipe("entity_ruler", first=False) + + +def test_disable_enable_pipes(): + name = "test_disable_enable_pipes" + results = {} + + def make_component(name): + results[name] = "" + + def component(doc): + nonlocal results + results[name] = doc.text + return doc + + return component + + c1 = Language.component(f"{name}1", func=make_component(f"{name}1")) + c2 = Language.component(f"{name}2", func=make_component(f"{name}2")) + + nlp = Language() + nlp.add_pipe(f"{name}1") + nlp.add_pipe(f"{name}2") + assert results[f"{name}1"] == "" + assert results[f"{name}2"] == "" + assert nlp.pipeline == [(f"{name}1", c1), (f"{name}2", c2)] + assert nlp.pipe_names == [f"{name}1", f"{name}2"] + nlp.disable_pipe(f"{name}1") + assert nlp._disabled == set([f"{name}1"]) + assert nlp._pipe_names == [f"{name}1", f"{name}2"] + assert nlp.pipe_names == [f"{name}2"] + assert nlp.config["nlp"]["disabled"] == [f"{name}1"] + nlp("hello") + assert results[f"{name}1"] == "" # didn't run + assert results[f"{name}2"] == "hello" # ran + nlp.enable_pipe(f"{name}1") + assert nlp._disabled == set() + assert nlp.pipe_names == [f"{name}1", f"{name}2"] + assert nlp.config["nlp"]["disabled"] == [] + nlp("world") + assert results[f"{name}1"] == "world" + assert results[f"{name}2"] == "world" + nlp.disable_pipe(f"{name}2") + nlp.remove_pipe(f"{name}2") + assert nlp._pipeline == [(f"{name}1", c1)] + assert nlp.pipeline == [(f"{name}1", c1)] + assert nlp._pipe_names == [f"{name}1"] + assert nlp.pipe_names == [f"{name}1"] + assert nlp._disabled == set() + assert nlp.config["nlp"]["disabled"] == [] + nlp.rename_pipe(f"{name}1", name) + assert nlp._pipeline == [(name, c1)] + assert nlp._pipe_names == [name] + nlp("!") + assert results[f"{name}1"] == "!" + assert results[f"{name}2"] == "world" + with pytest.raises(ValueError): + nlp.disable_pipe(f"{name}2") + nlp.disable_pipe(name) + assert nlp._pipe_names == [name] + assert nlp.pipe_names == [] + assert nlp.config["nlp"]["disabled"] == [name] + nlp("?") + assert results[f"{name}1"] == "!" diff --git a/spacy/tests/regression/test_issue4501-5000.py b/spacy/tests/regression/test_issue4501-5000.py index d16ecc1e6..39533f70a 100644 --- a/spacy/tests/regression/test_issue4501-5000.py +++ b/spacy/tests/regression/test_issue4501-5000.py @@ -161,6 +161,7 @@ def test_issue4674(): assert kb2.get_size_entities() == 1 +@pytest.mark.skip(reason="API change: disable just disables, new exclude arg") def test_issue4707(): """Tests that disabled component names are also excluded from nlp.from_disk by default when loading a model. diff --git a/spacy/tests/serialize/test_serialize_pipeline.py b/spacy/tests/serialize/test_serialize_pipeline.py index 7ba4815ee..3ad60c76f 100644 --- a/spacy/tests/serialize/test_serialize_pipeline.py +++ b/spacy/tests/serialize/test_serialize_pipeline.py @@ -6,6 +6,8 @@ from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL from spacy.pipeline.textcat import DEFAULT_TEXTCAT_MODEL from spacy.pipeline.senter import DEFAULT_SENTER_MODEL +from spacy.lang.en import English +import spacy from ..util import make_tempdir @@ -173,3 +175,34 @@ def test_serialize_sentencerecognizer(en_vocab): sr_b = sr.to_bytes() sr_d = SentenceRecognizer(en_vocab, model).from_bytes(sr_b) assert sr.to_bytes() == sr_d.to_bytes() + + +def test_serialize_pipeline_disable_enable(): + nlp = English() + nlp.add_pipe("ner") + nlp.add_pipe("tagger") + nlp.disable_pipe("tagger") + assert nlp.config["nlp"]["disabled"] == ["tagger"] + config = nlp.config.copy() + nlp2 = English.from_config(config) + assert nlp2.pipe_names == ["ner"] + assert nlp2._pipe_names == ["ner", "tagger"] + assert nlp2._disabled == set(["tagger"]) + assert nlp2.config["nlp"]["disabled"] == ["tagger"] + with make_tempdir() as d: + nlp2.to_disk(d) + nlp3 = spacy.load(d) + assert nlp3.pipe_names == ["ner"] + assert nlp3._pipe_names == ["ner", "tagger"] + with make_tempdir() as d: + nlp3.to_disk(d) + nlp4 = spacy.load(d, disable=["ner"]) + assert nlp4.pipe_names == [] + assert nlp4._pipe_names == ["ner", "tagger"] + assert nlp4._disabled == set(["ner", "tagger"]) + with make_tempdir() as d: + nlp.to_disk(d) + nlp5 = spacy.load(d, exclude=["tagger"]) + assert nlp5.pipe_names == ["ner"] + assert nlp5._pipe_names == ["ner"] + assert nlp5._disabled == set() diff --git a/spacy/util.py b/spacy/util.py index 8ba164dc1..d12e54dc7 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -216,6 +216,7 @@ def load_model( *, vocab: Union["Vocab", bool] = True, disable: Iterable[str] = tuple(), + exclude: Iterable[str] = tuple(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": """Load a model from a package or data path. @@ -228,7 +229,7 @@ def load_model( keyed by section values in dot notation. RETURNS (Language): The loaded nlp object. """ - kwargs = {"vocab": vocab, "disable": disable, "config": config} + kwargs = {"vocab": vocab, "disable": disable, "exclude": exclude, "config": config} if isinstance(name, str): # name or string path if name.startswith("blank:"): # shortcut for blank model return get_lang_class(name.replace("blank:", ""))() @@ -248,6 +249,7 @@ def load_model_from_package( *, vocab: Union["Vocab", bool] = True, disable: Iterable[str] = tuple(), + exclude: Iterable[str] = tuple(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": """Load a model from an installed package. @@ -255,13 +257,17 @@ def load_model_from_package( name (str): The package name. vocab (Vocab / True): Optional vocab to pass in on initialization. If True, a new Vocab object will be created. - disable (Iterable[str]): Names of pipeline components to disable. + disable (Iterable[str]): Names of pipeline components to disable. Disabled + pipes will be loaded but they won't be run unless you explicitly + enable them by calling nlp.enable_pipe. + exclude (Iterable[str]): Names of pipeline components to exclude. Excluded + components won't be loaded. config (Dict[str, Any] / Config): Config overrides as nested dict or dict keyed by section values in dot notation. RETURNS (Language): The loaded nlp object. """ cls = importlib.import_module(name) - return cls.load(vocab=vocab, disable=disable, config=config) + return cls.load(vocab=vocab, disable=disable, exclude=exclude, config=config) def load_model_from_path( @@ -270,6 +276,7 @@ def load_model_from_path( meta: Optional[Dict[str, Any]] = None, vocab: Union["Vocab", bool] = True, disable: Iterable[str] = tuple(), + exclude: Iterable[str] = tuple(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": """Load a model from a data directory path. Creates Language class with @@ -279,7 +286,11 @@ def load_model_from_path( meta (Dict[str, Any]): Optional model meta. vocab (Vocab / True): Optional vocab to pass in on initialization. If True, a new Vocab object will be created. - disable (Iterable[str]): Names of pipeline components to disable. + disable (Iterable[str]): Names of pipeline components to disable. Disabled + pipes will be loaded but they won't be run unless you explicitly + enable them by calling nlp.enable_pipe. + exclude (Iterable[str]): Names of pipeline components to exclude. Excluded + components won't be loaded. config (Dict[str, Any] / Config): Config overrides as nested dict or dict keyed by section values in dot notation. RETURNS (Language): The loaded nlp object. @@ -290,8 +301,10 @@ def load_model_from_path( meta = get_model_meta(model_path) config_path = model_path / "config.cfg" config = load_config(config_path, overrides=dict_to_dot(config)) - nlp, _ = load_model_from_config(config, vocab=vocab, disable=disable) - return nlp.from_disk(model_path, exclude=disable) + nlp, _ = load_model_from_config( + config, vocab=vocab, disable=disable, exclude=exclude + ) + return nlp.from_disk(model_path, exclude=exclude) def load_model_from_config( @@ -299,6 +312,7 @@ def load_model_from_config( *, vocab: Union["Vocab", bool] = True, disable: Iterable[str] = tuple(), + exclude: Iterable[str] = tuple(), auto_fill: bool = False, validate: bool = True, ) -> Tuple["Language", Config]: @@ -309,7 +323,11 @@ def load_model_from_config( meta (Dict[str, Any]): Optional model meta. vocab (Vocab / True): Optional vocab to pass in on initialization. If True, a new Vocab object will be created. - disable (Iterable[str]): Names of pipeline components to disable. + disable (Iterable[str]): Names of pipeline components to disable. Disabled + pipes will be loaded but they won't be run unless you explicitly + enable them by calling nlp.enable_pipe. + exclude (Iterable[str]): Names of pipeline components to exclude. Excluded + components won't be loaded. auto_fill (bool): Whether to auto-fill config with missing defaults. validate (bool): Whether to show config validation errors. RETURNS (Language): The loaded nlp object. @@ -323,7 +341,12 @@ def load_model_from_config( # registry, including custom subclasses provided via entry points lang_cls = get_lang_class(nlp_config["lang"]) nlp = lang_cls.from_config( - config, vocab=vocab, disable=disable, auto_fill=auto_fill, validate=validate, + config, + vocab=vocab, + disable=disable, + exclude=exclude, + auto_fill=auto_fill, + validate=validate, ) return nlp, nlp.resolved @@ -333,6 +356,7 @@ def load_model_from_init_py( *, vocab: Union["Vocab", bool] = True, disable: Iterable[str] = tuple(), + exclude: Iterable[str] = tuple(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": """Helper function to use in the `load()` method of a model package's @@ -340,7 +364,11 @@ def load_model_from_init_py( vocab (Vocab / True): Optional vocab to pass in on initialization. If True, a new Vocab object will be created. - disable (Iterable[str]): Names of pipeline components to disable. + disable (Iterable[str]): Names of pipeline components to disable. Disabled + pipes will be loaded but they won't be run unless you explicitly + enable them by calling nlp.enable_pipe. + exclude (Iterable[str]): Names of pipeline components to exclude. Excluded + components won't be loaded. config (Dict[str, Any] / Config): Config overrides as nested dict or dict keyed by section values in dot notation. RETURNS (Language): The loaded nlp object. @@ -352,7 +380,12 @@ def load_model_from_init_py( if not model_path.exists(): raise IOError(Errors.E052.format(path=data_path)) return load_model_from_path( - data_path, vocab=vocab, meta=meta, disable=disable, config=config + data_path, + vocab=vocab, + meta=meta, + disable=disable, + exclude=exclude, + config=config, )