Update docs and util consistency

This commit is contained in:
Ines Montani 2020-08-18 01:22:59 +02:00
parent 728fec0194
commit 1c3bcfb488
4 changed files with 103 additions and 27 deletions

View File

@ -249,7 +249,16 @@ def load_model_from_package(
disable: Iterable[str] = tuple(), disable: Iterable[str] = tuple(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language": ) -> "Language":
"""Load a model from an installed package.""" """Load a model from an installed package.
name (str): The package name.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
cls = importlib.import_module(name) cls = importlib.import_module(name)
return cls.load(vocab=vocab, disable=disable, config=config) return cls.load(vocab=vocab, disable=disable, config=config)
@ -263,7 +272,17 @@ def load_model_from_path(
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language": ) -> "Language":
"""Load a model from a data directory path. Creates Language class with """Load a model from a data directory path. Creates Language class with
pipeline from config.cfg and then calls from_disk() with path.""" pipeline from config.cfg and then calls from_disk() with path.
name (str): Package name or model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
if not model_path.exists(): if not model_path.exists():
raise IOError(Errors.E052.format(path=model_path)) raise IOError(Errors.E052.format(path=model_path))
if not meta: if not meta:
@ -284,6 +303,15 @@ def load_model_from_config(
) -> Tuple["Language", Config]: ) -> Tuple["Language", Config]:
"""Create an nlp object from a config. Expects the full config file including """Create an nlp object from a config. Expects the full config file including
a section "nlp" containing the settings for the nlp object. a section "nlp" containing the settings for the nlp object.
name (str): Package name or model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
auto_fill (bool): Whether to auto-fill config with missing defaults.
validate (bool): Whether to show config validation errors.
RETURNS (Language): The loaded nlp object.
""" """
if "nlp" not in config: if "nlp" not in config:
raise ValueError(Errors.E985.format(config=config)) raise ValueError(Errors.E985.format(config=config))
@ -308,6 +336,13 @@ def load_model_from_init_py(
) -> "Language": ) -> "Language":
"""Helper function to use in the `load()` method of a model package's """Helper function to use in the `load()` method of a model package's
__init__.py. __init__.py.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
""" """
model_path = Path(init_file).parent model_path = Path(init_file).parent
meta = get_model_meta(model_path) meta = get_model_meta(model_path)
@ -325,7 +360,14 @@ def load_config(
overrides: Dict[str, Any] = SimpleFrozenDict(), overrides: Dict[str, Any] = SimpleFrozenDict(),
interpolate: bool = False, interpolate: bool = False,
) -> Config: ) -> Config:
"""Load a config file. Takes care of path validation and section order.""" """Load a config file. Takes care of path validation and section order.
path (Union[str, Path]): Path to the config file.
overrides: (Dict[str, Any]): Config overrides as nested dict or
dict keyed by section values in dot notation.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
"""
config_path = ensure_path(path) config_path = ensure_path(path)
if not config_path.exists() or not config_path.is_file(): if not config_path.exists() or not config_path.is_file():
raise IOError(Errors.E053.format(path=config_path, name="config.cfg")) raise IOError(Errors.E053.format(path=config_path, name="config.cfg"))
@ -337,7 +379,12 @@ def load_config(
def load_config_from_str( def load_config_from_str(
text: str, overrides: Dict[str, Any] = SimpleFrozenDict(), interpolate: bool = False text: str, overrides: Dict[str, Any] = SimpleFrozenDict(), interpolate: bool = False
): ):
"""Load a full config from a string.""" """Load a full config from a string. Wrapper around Thinc's Config.from_str.
text (str): The string config to load.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
"""
return Config(section_order=CONFIG_SECTION_ORDER).from_str( return Config(section_order=CONFIG_SECTION_ORDER).from_str(
text, overrides=overrides, interpolate=interpolate, text, overrides=overrides, interpolate=interpolate,
) )
@ -435,19 +482,18 @@ def get_base_version(version: str) -> str:
return Version(version).base_version return Version(version).base_version
def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]: def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
"""Get model meta.json from a directory path and validate its contents. """Load a model meta.json from a path and validate its contents.
path (str / Path): Path to model directory. path (Union[str, Path]): Path to meta.json.
RETURNS (Dict[str, Any]): The model's meta data. RETURNS (Dict[str, Any]): The loaded meta.
""" """
model_path = ensure_path(path) path = ensure_path(path)
if not model_path.exists(): if not path.parent.exists():
raise IOError(Errors.E052.format(path=model_path)) raise IOError(Errors.E052.format(path=path.parent))
meta_path = model_path / "meta.json" if not path.exists() or not path.is_file():
if not meta_path.is_file(): raise IOError(Errors.E053.format(path=path, name="meta.json"))
raise IOError(Errors.E053.format(path=meta_path, name="meta.json")) meta = srsly.read_json(path)
meta = srsly.read_json(meta_path)
for setting in ["lang", "name", "version"]: for setting in ["lang", "name", "version"]:
if setting not in meta or not meta[setting]: if setting not in meta or not meta[setting]:
raise ValueError(Errors.E054.format(setting=setting)) raise ValueError(Errors.E054.format(setting=setting))
@ -471,6 +517,16 @@ def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]:
return meta return meta
def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]:
"""Get model meta.json from a directory path and validate its contents.
path (str / Path): Path to model directory.
RETURNS (Dict[str, Any]): The model's meta data.
"""
model_path = ensure_path(path)
return load_meta(model_path / "meta.json")
def is_package(name: str) -> bool: def is_package(name: str) -> bool:
"""Check if string maps to a package installed via pip. """Check if string maps to a package installed via pip.

View File

@ -40,7 +40,7 @@ Initialize a `Language` object.
| `meta` | Custom meta data for the `Language` class. Is written to by models to add model meta data. ~~dict~~ | | `meta` | Custom meta data for the `Language` class. Is written to by models to add model meta data. ~~dict~~ |
| `create_tokenizer` | Optional function that receives the `nlp` object and returns a tokenizer. ~~Callable[[Language], Callable[[str], Doc]]~~ | | `create_tokenizer` | Optional function that receives the `nlp` object and returns a tokenizer. ~~Callable[[Language], Callable[[str], Doc]]~~ |
## Language.from_config {#from_config tag="classmethod"} ## Language.from_config {#from_config tag="classmethod" new="3"}
Create a `Language` object from a loaded config. Will set up the tokenizer and Create a `Language` object from a loaded config. Will set up the tokenizer and
language data, add pipeline components based on the pipeline and components language data, add pipeline components based on the pipeline and components

View File

@ -70,7 +70,7 @@ Create a blank model of a given language class. This function is the twin of
| `name` | [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) of the language class to load. ~~str~~ | | `name` | [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) of the language class to load. ~~str~~ |
| **RETURNS** | An empty `Language` object of the appropriate subclass. ~~Language~~ | | **RETURNS** | An empty `Language` object of the appropriate subclass. ~~Language~~ |
#### spacy.info {#spacy.info tag="function"} ### spacy.info {#spacy.info tag="function"}
The same as the [`info` command](/api/cli#info). Pretty-print information about The same as the [`info` command](/api/cli#info). Pretty-print information about
your installation, models and local setup from within spaCy. To get the model your installation, models and local setup from within spaCy. To get the model
@ -585,20 +585,40 @@ A helper function to use in the `load()` method of a model package's
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ | | `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
| **RETURNS** | `Language` class with the loaded model. ~~Language~~ | | **RETURNS** | `Language` class with the loaded model. ~~Language~~ |
### util.get_model_meta {#util.get_model_meta tag="function" new="2"} ### util.load_config {#util.load_config tag="function" new="3"}
Get a model's meta.json from a directory path and validate its contents. Load a model's [`config.cfg`](/api/data-formats#config) from a file path. The
config typically includes details about the model pipeline and how its
components are created, as well as all training settings and hyperparameters.
> #### Example > #### Example
> >
> ```python > ```python
> meta = util.get_model_meta("/path/to/model") > config = util.load_config("/path/to/model/config.cfg")
> print(config.to_str())
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | --------------------------------------------- | | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `path` | Path to model directory. ~~Union[str, Path]~~ | | `path` | Path to the model's `config.cfg`. ~~Union[str, Path]~~ |
| **RETURNS** | The model's meta data. ~~Dict[str, Any]~~ | | `overrides` | Optional config overrides to replace in loaded config. Can be provided as nested dict, or as flat dict with keys in dot notation, e.g. `"nlp.pipeline"`. ~~Dict[str, Any]~~ |
| `interpolate` | Whether to interpolate the config and replace variables like `${paths:train}` with their values. Defaults to `False`. ~~bool~~ |
| **RETURNS** | The model's config. ~~Config~~ |
### util.load_meta {#util.load_meta tag="function" new="3"}
Get a model's `meta.json` from a file path and validate its contents.
> #### Example
>
> ```python
> meta = util.load_meta("/path/to/model/meta.json")
> ```
| Name | Description |
| ----------- | ----------------------------------------------------- |
| `path` | Path to the model's `meta.json`. ~~Union[str, Path]~~ |
| **RETURNS** | The model's meta data. ~~Dict[str, Any]~~ |
### util.is_package {#util.is_package tag="function"} ### util.is_package {#util.is_package tag="function"}

View File

@ -62,12 +62,12 @@ function linkType(el, showLink = true) {
export const TypeAnnotation = ({ lang = 'python', link = true, children }) => { export const TypeAnnotation = ({ lang = 'python', link = true, children }) => {
// Hacky, but we're temporarily replacing a dot to prevent it from being split during highlighting // Hacky, but we're temporarily replacing a dot to prevent it from being split during highlighting
const TMP_DOT = '' const TMP_DOT = '۔'
const code = Array.isArray(children) ? children.join('') : children || '' const code = Array.isArray(children) ? children.join('') : children || ''
const [rawText, meta] = code.split(/(?= \(.+\)$)/) const [rawText, meta] = code.split(/(?= \(.+\)$)/)
const rawStr = rawText.replace('.', TMP_DOT) const rawStr = rawText.replace(/\./g, TMP_DOT)
const rawHtml = lang === 'none' || !code ? code : highlightCode(lang, rawStr) const rawHtml = lang === 'none' || !code ? code : highlightCode(lang, rawStr)
const html = rawHtml.replace(TMP_DOT, '.').replace(/\n/g, ' ') const html = rawHtml.replace(new RegExp(TMP_DOT, 'g'), '.').replace(/\n/g, ' ')
const result = htmlToReact(html) const result = htmlToReact(html)
const elements = Array.isArray(result) ? result : [result] const elements = Array.isArray(result) ? result : [result]
const annotClassNames = classNames( const annotClassNames = classNames(