from typing import Union, Iterable, Dict, Any from pathlib import Path import sys # set library-specific custom warning handling before doing anything else from .errors import setup_default_warnings setup_default_warnings() # noqa: E402 # These are imported as part of the API from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401 from thinc.api import Config from . import pipeline # noqa: F401 from .cli.info import info # noqa: F401 from .glossary import explain # noqa: F401 from .about import __version__ # noqa: F401 from .util import registry, logger # noqa: F401 from .errors import Errors from .language import Language from .vocab import Vocab from . import util if sys.maxunicode == 65535: raise SystemError(Errors.E130) def load( name: Union[str, Path], *, vocab: Union[Vocab, bool] = True, disable: Iterable[str] = util.SimpleFrozenList(), enable: Iterable[str] = util.SimpleFrozenList(), exclude: Iterable[str] = util.SimpleFrozenList(), config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(), ) -> Language: """Load a spaCy model from an installed package or a local path. name (str): Package name or model path. vocab (Vocab): A Vocab object. If True, a vocab is created. disable (Iterable[str]): Names of pipeline components to disable. Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling nlp.enable_pipe. enable (Iterable[str]): Names of pipeline components to enable. All other pipes will be disabled (but can be enabled later using nlp.enable_pipe). exclude (Iterable[str]): Names of pipeline components to exclude. Excluded components won't be loaded. config (Dict[str, Any] / Config): Config overrides as nested dict or dict keyed by section values in dot notation. RETURNS (Language): The loaded nlp object. """ return util.load_model( name, vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config, ) def blank( name: str, *, vocab: Union[Vocab, bool] = True, config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(), meta: Dict[str, Any] = util.SimpleFrozenDict(), ) -> Language: """Create a blank nlp object for a given language code. name (str): The language code, e.g. "en". vocab (Vocab): A Vocab object. If True, a vocab is created. config (Dict[str, Any] / Config): Optional config overrides. meta (Dict[str, Any]): Overrides for nlp.meta. RETURNS (Language): The nlp object. """ LangClass = util.get_lang_class(name) # We should accept both dot notation and nested dict here for consistency config = util.dot_to_dict(config) return LangClass.from_config(config, vocab=vocab, meta=meta)