mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-16 04:26:27 +03:00
4c058eb40a
* Enable flag on spacy.load: foundation for include, enable arguments. * Enable flag on spacy.load: fixed tests. * Enable flag on spacy.load: switched from pretrained model to empty model with added pipes for tests. * Enable flag on spacy.load: switched to more consistent error on misspecification of component activity. Test refactoring. Added to default config. * Enable flag on spacy.load: added support for fields not in pipeline. * Enable flag on spacy.load: removed serialization fields from supported fields. * Enable flag on spacy.load: removed 'enable' from config again. * Enable flag on spacy.load: relaxed checks in _resolve_component_activation_status() to allow non-standard pipes. * Enable flag on spacy.load: fixed relaxed checks for _resolve_component_activation_status() to allow non-standard pipes. Extended tests. * Enable flag on spacy.load: comments w.r.t. resolution workarounds. * Enable flag on spacy.load: remove include fields. Update website docs. * Enable flag on spacy.load: updates w.r.t. changes in master. * Implement Doc.from_json(): update docstrings. Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Implement Doc.from_json(): remove newline. Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Implement Doc.from_json(): change error message for E1038. Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Enable flag on spacy.load: wrapped docstring for _resolve_component_status() at 80 chars. * Enable flag on spacy.load: changed exmples for enable flag. * Remove newline. Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Fix docstring for Language._resolve_component_status(). * Rename E1038 to E1042. Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
from typing import Union, Iterable, Dict, Any
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
# set library-specific custom warning handling before doing anything else
|
|
from .errors import setup_default_warnings
|
|
|
|
setup_default_warnings() # noqa: E402
|
|
|
|
# These are imported as part of the API
|
|
from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401
|
|
from thinc.api import Config
|
|
|
|
from . import pipeline # noqa: F401
|
|
from .cli.info import info # noqa: F401
|
|
from .glossary import explain # noqa: F401
|
|
from .about import __version__ # noqa: F401
|
|
from .util import registry, logger # noqa: F401
|
|
|
|
from .errors import Errors
|
|
from .language import Language
|
|
from .vocab import Vocab
|
|
from . import util
|
|
|
|
|
|
if sys.maxunicode == 65535:
|
|
raise SystemError(Errors.E130)
|
|
|
|
|
|
def load(
|
|
name: Union[str, Path],
|
|
*,
|
|
vocab: Union[Vocab, bool] = True,
|
|
disable: Iterable[str] = util.SimpleFrozenList(),
|
|
enable: Iterable[str] = util.SimpleFrozenList(),
|
|
exclude: Iterable[str] = util.SimpleFrozenList(),
|
|
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
|
) -> Language:
|
|
"""Load a spaCy model from an installed package or a local path.
|
|
|
|
name (str): Package name or model path.
|
|
vocab (Vocab): A Vocab object. If True, a vocab is created.
|
|
disable (Iterable[str]): Names of pipeline components to disable. Disabled
|
|
pipes will be loaded but they won't be run unless you explicitly
|
|
enable them by calling nlp.enable_pipe.
|
|
enable (Iterable[str]): Names of pipeline components to enable. All other
|
|
pipes will be disabled (but can be enabled later using nlp.enable_pipe).
|
|
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
|
|
components won't be loaded.
|
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
|
keyed by section values in dot notation.
|
|
RETURNS (Language): The loaded nlp object.
|
|
"""
|
|
return util.load_model(
|
|
name,
|
|
vocab=vocab,
|
|
disable=disable,
|
|
enable=enable,
|
|
exclude=exclude,
|
|
config=config,
|
|
)
|
|
|
|
|
|
def blank(
|
|
name: str,
|
|
*,
|
|
vocab: Union[Vocab, bool] = True,
|
|
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
|
meta: Dict[str, Any] = util.SimpleFrozenDict(),
|
|
) -> Language:
|
|
"""Create a blank nlp object for a given language code.
|
|
|
|
name (str): The language code, e.g. "en".
|
|
vocab (Vocab): A Vocab object. If True, a vocab is created.
|
|
config (Dict[str, Any] / Config): Optional config overrides.
|
|
meta (Dict[str, Any]): Overrides for nlp.meta.
|
|
RETURNS (Language): The nlp object.
|
|
"""
|
|
LangClass = util.get_lang_class(name)
|
|
# We should accept both dot notation and nested dict here for consistency
|
|
config = util.dot_to_dict(config)
|
|
return LangClass.from_config(config, vocab=vocab, meta=meta)
|