Update docs and util consistency

This commit is contained in:
Ines Montani 2020-08-18 01:22:59 +02:00
parent 728fec0194
commit 1c3bcfb488
4 changed files with 103 additions and 27 deletions

View File

@ -249,7 +249,16 @@ def load_model_from_package(
disable: Iterable[str] = tuple(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from an installed package."""
"""Load a model from an installed package.
name (str): The package name.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
cls = importlib.import_module(name)
return cls.load(vocab=vocab, disable=disable, config=config)
@ -263,7 +272,17 @@ def load_model_from_path(
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from a data directory path. Creates Language class with
pipeline from config.cfg and then calls from_disk() with path."""
pipeline from config.cfg and then calls from_disk() with path.
name (str): Package name or model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
if not model_path.exists():
raise IOError(Errors.E052.format(path=model_path))
if not meta:
@ -284,6 +303,15 @@ def load_model_from_config(
) -> Tuple["Language", Config]:
"""Create an nlp object from a config. Expects the full config file including
a section "nlp" containing the settings for the nlp object.
name (str): Package name or model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
auto_fill (bool): Whether to auto-fill config with missing defaults.
validate (bool): Whether to show config validation errors.
RETURNS (Language): The loaded nlp object.
"""
if "nlp" not in config:
raise ValueError(Errors.E985.format(config=config))
@ -308,6 +336,13 @@ def load_model_from_init_py(
) -> "Language":
"""Helper function to use in the `load()` method of a model package's
__init__.py.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
model_path = Path(init_file).parent
meta = get_model_meta(model_path)
@ -325,7 +360,14 @@ def load_config(
overrides: Dict[str, Any] = SimpleFrozenDict(),
interpolate: bool = False,
) -> Config:
"""Load a config file. Takes care of path validation and section order."""
"""Load a config file. Takes care of path validation and section order.
path (Union[str, Path]): Path to the config file.
overrides: (Dict[str, Any]): Config overrides as nested dict or
dict keyed by section values in dot notation.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
"""
config_path = ensure_path(path)
if not config_path.exists() or not config_path.is_file():
raise IOError(Errors.E053.format(path=config_path, name="config.cfg"))
@ -337,7 +379,12 @@ def load_config(
def load_config_from_str(
text: str, overrides: Dict[str, Any] = SimpleFrozenDict(), interpolate: bool = False
):
"""Load a full config from a string."""
"""Load a full config from a string. Wrapper around Thinc's Config.from_str.
text (str): The string config to load.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
"""
return Config(section_order=CONFIG_SECTION_ORDER).from_str(
text, overrides=overrides, interpolate=interpolate,
)
@ -435,19 +482,18 @@ def get_base_version(version: str) -> str:
return Version(version).base_version
def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]:
"""Get model meta.json from a directory path and validate its contents.
def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
"""Load a model meta.json from a path and validate its contents.
path (str / Path): Path to model directory.
RETURNS (Dict[str, Any]): The model's meta data.
path (Union[str, Path]): Path to meta.json.
RETURNS (Dict[str, Any]): The loaded meta.
"""
model_path = ensure_path(path)
if not model_path.exists():
raise IOError(Errors.E052.format(path=model_path))
meta_path = model_path / "meta.json"
if not meta_path.is_file():
raise IOError(Errors.E053.format(path=meta_path, name="meta.json"))
meta = srsly.read_json(meta_path)
path = ensure_path(path)
if not path.parent.exists():
raise IOError(Errors.E052.format(path=path.parent))
if not path.exists() or not path.is_file():
raise IOError(Errors.E053.format(path=path, name="meta.json"))
meta = srsly.read_json(path)
for setting in ["lang", "name", "version"]:
if setting not in meta or not meta[setting]:
raise ValueError(Errors.E054.format(setting=setting))
@ -471,6 +517,16 @@ def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]:
return meta
def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]:
"""Get model meta.json from a directory path and validate its contents.
path (str / Path): Path to model directory.
RETURNS (Dict[str, Any]): The model's meta data.
"""
model_path = ensure_path(path)
return load_meta(model_path / "meta.json")
def is_package(name: str) -> bool:
"""Check if string maps to a package installed via pip.

View File

@ -40,7 +40,7 @@ Initialize a `Language` object.
| `meta` | Custom meta data for the `Language` class. Is written to by models to add model meta data. ~~dict~~ |
| `create_tokenizer` | Optional function that receives the `nlp` object and returns a tokenizer. ~~Callable[[Language], Callable[[str], Doc]]~~ |
## Language.from_config {#from_config tag="classmethod"}
## Language.from_config {#from_config tag="classmethod" new="3"}
Create a `Language` object from a loaded config. Will set up the tokenizer and
language data, add pipeline components based on the pipeline and components

View File

@ -70,7 +70,7 @@ Create a blank model of a given language class. This function is the twin of
| `name` | [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) of the language class to load. ~~str~~ |
| **RETURNS** | An empty `Language` object of the appropriate subclass. ~~Language~~ |
#### spacy.info {#spacy.info tag="function"}
### spacy.info {#spacy.info tag="function"}
The same as the [`info` command](/api/cli#info). Pretty-print information about
your installation, models and local setup from within spaCy. To get the model
@ -585,20 +585,40 @@ A helper function to use in the `load()` method of a model package's
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
| **RETURNS** | `Language` class with the loaded model. ~~Language~~ |
### util.get_model_meta {#util.get_model_meta tag="function" new="2"}
### util.load_config {#util.load_config tag="function" new="3"}
Get a model's meta.json from a directory path and validate its contents.
Load a model's [`config.cfg`](/api/data-formats#config) from a file path. The
config typically includes details about the model pipeline and how its
components are created, as well as all training settings and hyperparameters.
> #### Example
>
> ```python
> meta = util.get_model_meta("/path/to/model")
> config = util.load_config("/path/to/model/config.cfg")
> print(config.to_str())
> ```
| Name | Description |
| ----------- | --------------------------------------------- |
| `path` | Path to model directory. ~~Union[str, Path]~~ |
| **RETURNS** | The model's meta data. ~~Dict[str, Any]~~ |
| Name | Description |
| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `path` | Path to the model's `config.cfg`. ~~Union[str, Path]~~ |
| `overrides` | Optional config overrides to replace in loaded config. Can be provided as nested dict, or as flat dict with keys in dot notation, e.g. `"nlp.pipeline"`. ~~Dict[str, Any]~~ |
| `interpolate` | Whether to interpolate the config and replace variables like `${paths:train}` with their values. Defaults to `False`. ~~bool~~ |
| **RETURNS** | The model's config. ~~Config~~ |
### util.load_meta {#util.load_meta tag="function" new="3"}
Get a model's `meta.json` from a file path and validate its contents.
> #### Example
>
> ```python
> meta = util.load_meta("/path/to/model/meta.json")
> ```
| Name | Description |
| ----------- | ----------------------------------------------------- |
| `path` | Path to the model's `meta.json`. ~~Union[str, Path]~~ |
| **RETURNS** | The model's meta data. ~~Dict[str, Any]~~ |
### util.is_package {#util.is_package tag="function"}

View File

@ -62,12 +62,12 @@ function linkType(el, showLink = true) {
export const TypeAnnotation = ({ lang = 'python', link = true, children }) => {
// Hacky, but we're temporarily replacing a dot to prevent it from being split during highlighting
const TMP_DOT = ''
const TMP_DOT = '۔'
const code = Array.isArray(children) ? children.join('') : children || ''
const [rawText, meta] = code.split(/(?= \(.+\)$)/)
const rawStr = rawText.replace('.', TMP_DOT)
const rawStr = rawText.replace(/\./g, TMP_DOT)
const rawHtml = lang === 'none' || !code ? code : highlightCode(lang, rawStr)
const html = rawHtml.replace(TMP_DOT, '.').replace(/\n/g, ' ')
const html = rawHtml.replace(new RegExp(TMP_DOT, 'g'), '.').replace(/\n/g, ' ')
const result = htmlToReact(html)
const elements = Array.isArray(result) ? result : [result]
const annotClassNames = classNames(