mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Document Lookups [ci skip]
This commit is contained in:
parent
32404e613c
commit
10257f3131
|
@ -32,7 +32,7 @@ class Lookups(object):
|
|||
Lookups.has_table.
|
||||
|
||||
name (unicode): Name of the table.
|
||||
RETURNS (bool): Whether a table of that name exists.
|
||||
RETURNS (bool): Whether a table of that name is in the lookups.
|
||||
"""
|
||||
return self.has_table(name)
|
||||
|
||||
|
@ -72,7 +72,7 @@ class Lookups(object):
|
|||
def remove_table(self, name):
|
||||
"""Remove a table. Raises an error if the table doesn't exist.
|
||||
|
||||
name (unicode): The name to remove.
|
||||
name (unicode): Name of the table to remove.
|
||||
RETURNS (Table): The removed table.
|
||||
"""
|
||||
if name not in self._tables:
|
||||
|
@ -87,19 +87,18 @@ class Lookups(object):
|
|||
"""
|
||||
return name in self._tables
|
||||
|
||||
def to_bytes(self, exclude=tuple(), **kwargs):
|
||||
def to_bytes(self, **kwargs):
|
||||
"""Serialize the lookups to a bytestring.
|
||||
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized Lookups.
|
||||
"""
|
||||
return srsly.msgpack_dumps(self._tables)
|
||||
|
||||
def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
|
||||
def from_bytes(self, bytes_data, **kwargs):
|
||||
"""Load the lookups from a bytestring.
|
||||
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The loaded Lookups.
|
||||
bytes_data (bytes): The data to load.
|
||||
RETURNS (Lookups): The loaded Lookups.
|
||||
"""
|
||||
self._tables = OrderedDict()
|
||||
msg = srsly.msgpack_loads(bytes_data)
|
||||
|
@ -108,7 +107,8 @@ class Lookups(object):
|
|||
return self
|
||||
|
||||
def to_disk(self, path, **kwargs):
|
||||
"""Save the lookups to a directory as lookups.bin.
|
||||
"""Save the lookups to a directory as lookups.bin. Expects a path to a
|
||||
directory, which will be created if it doesn't exist.
|
||||
|
||||
path (unicode / Path): The file path.
|
||||
"""
|
||||
|
@ -121,9 +121,10 @@ class Lookups(object):
|
|||
file_.write(self.to_bytes())
|
||||
|
||||
def from_disk(self, path, **kwargs):
|
||||
"""Load lookups from a directory containing a lookups.bin.
|
||||
"""Load lookups from a directory containing a lookups.bin. Will skip
|
||||
loading if the file doesn't exist.
|
||||
|
||||
path (unicode / Path): The file path.
|
||||
path (unicode / Path): The directory path.
|
||||
RETURNS (Lookups): The loaded lookups.
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
|
@ -136,12 +137,18 @@ class Lookups(object):
|
|||
|
||||
|
||||
class Table(OrderedDict):
|
||||
"""A table in the lookups. Subclass of builtin dict that implements a
|
||||
"""A table in the lookups. Subclass of OrderedDict that implements a
|
||||
slightly more consistent and unified API.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data, name=None):
|
||||
"""Initialize a new table from a dict.
|
||||
|
||||
data (dict): The dictionary.
|
||||
name (unicode): Optional table name for reference.
|
||||
RETURNS (Table): The newly created object.
|
||||
"""
|
||||
self = cls(name=name)
|
||||
self.update(data)
|
||||
return self
|
||||
|
|
271
website/docs/api/lookups.md
Normal file
271
website/docs/api/lookups.md
Normal file
|
@ -0,0 +1,271 @@
|
|||
---
|
||||
title: Lookups
|
||||
teaser: A container for large lookup tables and dictionaries
|
||||
tag: class
|
||||
source: spacy/lookups.py
|
||||
new: 2.2
|
||||
---
|
||||
|
||||
This class allows convenient accesss to large lookup tables and dictionaries,
|
||||
e.g. lemmatization data or tokenizer exception lists. Lookups are available via
|
||||
the [`Vocab`](/api/vocab) as `vocab.lookups`, so they can be accessed before the
|
||||
pipeline components are applied (e.g. in the tokenizer and lemmatizer), as well
|
||||
as within the pipeline components via `doc.vocab.lookups`.
|
||||
|
||||
## Lookups.\_\_init\_\_ {#init tag="method"}
|
||||
|
||||
Create a `Lookups` object.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.lookups import Lookups
|
||||
> lookups = Lookups()
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | --------- | ----------------------------- |
|
||||
| **RETURNS** | `Lookups` | The newly constructed object. |
|
||||
|
||||
## Lookups.\_\_len\_\_ {#len tag="method"}
|
||||
|
||||
Get the current number of tables in the lookups.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> assert len(lookups) == 0
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ---- | ------------------------------------ |
|
||||
| **RETURNS** | int | The number of tables in the lookups. |
|
||||
|
||||
## Lookups.\_\contains\_\_ {#contains tag="method"}
|
||||
|
||||
Check if the lookups contain a table of a given name. Delegates to
|
||||
[`Lookups.has_table`](/api/lookups#has_table).
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("some_table")
|
||||
> assert "some_table" in lookups
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ------- | ----------------------------------------------- |
|
||||
| `name` | unicode | Name of the table. |
|
||||
| **RETURNS** | bool | Whether a table of that name is in the lookups. |
|
||||
|
||||
## Lookups.tables {#tables tag="property"}
|
||||
|
||||
Get the names of all tables in the lookups.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("some_table")
|
||||
> assert lookups.tables == ["some_table"]
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ---- | ----------------------------------- |
|
||||
| **RETURNS** | list | Names of the tables in the lookups. |
|
||||
|
||||
## Lookups.add_table {#add_table tag="method"}
|
||||
|
||||
Add a new table with optional data to the lookups. Raises an error if the table
|
||||
exists.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("some_table", {"foo": "bar"})
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ----------------------------- | ---------------------------------- |
|
||||
| `name` | unicode | Unique name of the table. |
|
||||
| `data` | dict | Optional data to add to the table. |
|
||||
| **RETURNS** | [`Table`](/api/lookups#table) | The newly added table. |
|
||||
|
||||
## Lookups.get_table {#get_table tag="method"}
|
||||
|
||||
Get a table from the lookups. Raises an error if the table doesn't exist.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("some_table", {"foo": "bar"})
|
||||
> table = lookups.get_table("some_table")
|
||||
> assert table["foo"] == "bar"
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ----------------------------- | ------------------ |
|
||||
| `name` | unicode | Name of the table. |
|
||||
| **RETURNS** | [`Table`](/api/lookups#table) | The table. |
|
||||
|
||||
## Lookups.remove_table {#remove_table tag="method"}
|
||||
|
||||
Remove a table from the lookups. Raises an error if the table doesn't exist.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("some_table")
|
||||
> removed_table = lookups.remove_table("some_table")
|
||||
> assert "some_table" not in lookups
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ----------------------------- | ---------------------------- |
|
||||
| `name` | unicode | Name of the table to remove. |
|
||||
| **RETURNS** | [`Table`](/api/lookups#table) | The removed table. |
|
||||
|
||||
## Lookups.has_table {#has_table tag="method"}
|
||||
|
||||
Check if the lookups contain a table of a given name. Equivalent to
|
||||
[`Lookups.__contains__`](/api/lookups#contains).
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("some_table")
|
||||
> assert lookups.has_table("some_table")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ------- | ----------------------------------------------- |
|
||||
| `name` | unicode | Name of the table. |
|
||||
| **RETURNS** | bool | Whether a table of that name is in the lookups. |
|
||||
|
||||
## Lookups.to_bytes {#to_bytes tag="method"}
|
||||
|
||||
Serialize the lookups to a bytestring.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookup_bytes = lookups.to_bytes()
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ----- | ----------------------- |
|
||||
| **RETURNS** | bytes | The serialized lookups. |
|
||||
|
||||
## Lookups.from_bytes {#from_bytes tag="method"}
|
||||
|
||||
Load the lookups from a bytestring.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookup_bytes = lookups.to_bytes()
|
||||
> lookups = Lookups()
|
||||
> lookups.from_bytes(lookup_bytes)
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------------ | --------- | ---------------------- |
|
||||
| `bytes_data` | bytes | The data to load from. |
|
||||
| **RETURNS** | `Lookups` | The loaded lookups. |
|
||||
|
||||
## Lookups.to_disk {#to_disk tag="method"}
|
||||
|
||||
Save the lookups to a directory as `lookups.bin`. Expects a path to a directory,
|
||||
which will be created if it doesn't exist.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups.to_disk("/path/to/lookups")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
|
||||
| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
|
||||
|
||||
## Lookups.from_disk {#from_disk tag="method"}
|
||||
|
||||
Load lookups from a directory containing a `lookups.bin`. Will skip loading if
|
||||
the file doesn't exist.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.lookups import Lookups
|
||||
> lookups = Lookups()
|
||||
> lookups.from_disk("/path/to/lookups")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ---------------- | -------------------------------------------------------------------------- |
|
||||
| `path` | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
|
||||
| **RETURNS** | `Lookups` | The loaded lookups. |
|
||||
|
||||
## Table {#table tag="class, ordererddict"}
|
||||
|
||||
A table in the lookups. Subclass of `OrderedDict` that implements a slightly
|
||||
more consistent and unified API. Supports all other methods and attributes of
|
||||
`OrderedDict` / `dict`, and the customized methods listed here.
|
||||
|
||||
### Table.\_\_init\_\_ {#table.init tag="method"}
|
||||
|
||||
Initialize a new table.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.lookups import Table
|
||||
> table = Table(name="some_table")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ------- | ---------------------------------- |
|
||||
| `name` | unicode | Optional table name for reference. |
|
||||
| **RETURNS** | `Table` | The newly constructed object. |
|
||||
|
||||
### Table.from_dict {#table.from_dict tag="classmethod"}
|
||||
|
||||
Initialize a new table from a dict.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.lookups import Table
|
||||
> data = {"foo": "bar", "baz": 100}
|
||||
> table = Table.from_dict(data, name="some_table")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ------- | ---------------------------------- |
|
||||
| `data` | dict | The dictionary. |
|
||||
| `name` | unicode | Optional table name for reference. |
|
||||
| **RETURNS** | `Table` | The newly constructed object. |
|
||||
|
||||
### Table.set {#table.set tag="key"}
|
||||
|
||||
Set a new key / value pair. Same as `table[key] = value`.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.lookups import Table
|
||||
> table = Table()
|
||||
> table.set("foo", "bar")
|
||||
> assert table["foo"] == "bar"
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------- | ------- | ----------- |
|
||||
| `key` | unicode | The key. |
|
||||
| `value` | - | The value. |
|
|
@ -293,6 +293,7 @@ Load state from a binary string.
|
|||
| `strings` | `StringStore` | A table managing the string-to-int mapping. |
|
||||
| `vectors` <Tag variant="new">2</Tag> | `Vectors` | A table associating word IDs to word vectors. |
|
||||
| `vectors_length` | int | Number of dimensions for each word vector. |
|
||||
| `lookups` | `Lookups` | The available lookup tables in this vocab. |
|
||||
| `writing_system` <Tag variant="new">2.1</Tag> | dict | A dict with information about the language's writing system. |
|
||||
|
||||
## Serialization fields {#serialization-fields}
|
||||
|
@ -313,3 +314,4 @@ serialization by passing in the string names via the `exclude` argument.
|
|||
| `strings` | The strings in the [`StringStore`](/api/stringstore). |
|
||||
| `lexemes` | The lexeme data. |
|
||||
| `vectors` | The word vectors, if available. |
|
||||
| `lookups` | The lookup tables, if available. |
|
||||
|
|
|
@ -90,6 +90,7 @@
|
|||
{ "text": "Vocab", "url": "/api/vocab" },
|
||||
{ "text": "StringStore", "url": "/api/stringstore" },
|
||||
{ "text": "Vectors", "url": "/api/vectors" },
|
||||
{ "text": "Lookups", "url": "/api/lookups" },
|
||||
{ "text": "KnowledgeBase", "url": "/api/kb" },
|
||||
{ "text": "GoldParse", "url": "/api/goldparse" },
|
||||
{ "text": "GoldCorpus", "url": "/api/goldcorpus" },
|
||||
|
|
Loading…
Reference in New Issue
Block a user