spaCy/spacy/__init__.py
Raphael Mitsch aea16719be
Simplify and clarify enable/disable behavior of spacy.load() (#11459)
* Change enable/disable behavior so that arguments take precedence over config options. Extend error message on conflict. Add warning message in case of overwriting config option with arguments.

* Fix tests in test_serialize_pipeline.py to reflect changes to handling of enable/disable.

* Fix type issue.

* Move comment.

* Move comment.

* Issue UserWarning instead of printing wasabi message. Adjust test.

* Added pytest.warns(UserWarning) for expected warning to fix tests.

* Update warning message.

* Move type handling out of fetch_pipes_status().

* Add global variable for default value. Use id() to determine whether used values are default value.

* Fix default value for disable.

* Rename DEFAULT_PIPE_STATUS to _DEFAULT_EMPTY_PIPES.
2022-09-27 14:22:36 +02:00

83 lines
2.9 KiB
Python

from typing import Union, Iterable, Dict, Any
from pathlib import Path
import sys
# set library-specific custom warning handling before doing anything else
from .errors import setup_default_warnings
setup_default_warnings() # noqa: E402
# These are imported as part of the API
from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401
from thinc.api import Config
from . import pipeline # noqa: F401
from .cli.info import info # noqa: F401
from .glossary import explain # noqa: F401
from .about import __version__ # noqa: F401
from .util import registry, logger # noqa: F401
from .errors import Errors
from .language import Language
from .vocab import Vocab
from . import util
if sys.maxunicode == 65535:
raise SystemError(Errors.E130)
def load(
name: Union[str, Path],
*,
vocab: Union[Vocab, bool] = True,
disable: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
enable: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
exclude: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
) -> Language:
"""Load a spaCy model from an installed package or a local path.
name (str): Package name or model path.
vocab (Vocab): A Vocab object. If True, a vocab is created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (but can be enabled later using nlp.enable_pipe).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
return util.load_model(
name,
vocab=vocab,
disable=disable,
enable=enable,
exclude=exclude,
config=config,
)
def blank(
name: str,
*,
vocab: Union[Vocab, bool] = True,
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
meta: Dict[str, Any] = util.SimpleFrozenDict(),
) -> Language:
"""Create a blank nlp object for a given language code.
name (str): The language code, e.g. "en".
vocab (Vocab): A Vocab object. If True, a vocab is created.
config (Dict[str, Any] / Config): Optional config overrides.
meta (Dict[str, Any]): Overrides for nlp.meta.
RETURNS (Language): The nlp object.
"""
LangClass = util.get_lang_class(name)
# We should accept both dot notation and nested dict here for consistency
config = util.dot_to_dict(config)
return LangClass.from_config(config, vocab=vocab, meta=meta)