mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 05:07:03 +03:00
Merge pull request #5791 from adrianeboyd/docs/morphology
This commit is contained in:
commit
eb9acae34d
153
website/docs/api/morphanalysis.md
Normal file
153
website/docs/api/morphanalysis.md
Normal file
|
@ -0,0 +1,153 @@
|
||||||
|
---
|
||||||
|
title: MorphAnalysis
|
||||||
|
tag: class
|
||||||
|
source: spacy/tokens/morphanalysis.pyx
|
||||||
|
---
|
||||||
|
|
||||||
|
Stores a single morphological analysis.
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.\_\_init\_\_ {#init tag="method"}
|
||||||
|
|
||||||
|
Initialize a MorphAnalysis object from a UD FEATS string or a dictionary of
|
||||||
|
morphological features.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> from spacy.tokens import MorphAnalysis
|
||||||
|
>
|
||||||
|
> feats = "Feat1=Val1|Feat2=Val2"
|
||||||
|
> m = MorphAnalysis(nlp.vocab, feats)
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ------------------ | ----------------------------- |
|
||||||
|
| `vocab` | `Vocab` | The vocab. |
|
||||||
|
| `features` | `Union[Dict, str]` | The morphological features. |
|
||||||
|
| **RETURNS** | `MorphAnalysis` | The newly constructed object. |
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.\_\_contains\_\_ {#contains tag="method"}
|
||||||
|
|
||||||
|
Whether a feature/value pair is in the analysis.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1,Val2|Feat2=Val2"
|
||||||
|
> morph = MorphAnalysis(nlp.vocab, feats)
|
||||||
|
> assert "Feat1=Val1" in morph
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ----- | ------------------------------------- |
|
||||||
|
| **RETURNS** | `str` | A feature/value pair in the analysis. |
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.\_\_iter\_\_ {#iter tag="method"}
|
||||||
|
|
||||||
|
Iterate over the feature/value pairs in the analysis.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1,Val3|Feat2=Val2"
|
||||||
|
> morph = MorphAnalysis(nlp.vocab, feats)
|
||||||
|
> assert list(morph) == ["Feat1=Va1", "Feat1=Val3", "Feat2=Val2"]
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ---------- | ----- | ------------------------------------- |
|
||||||
|
| **YIELDS** | `str` | A feature/value pair in the analysis. |
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.\_\_len\_\_ {#len tag="method"}
|
||||||
|
|
||||||
|
Returns the number of features in the analysis.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1,Val2|Feat2=Val2"
|
||||||
|
> morph = MorphAnalysis(nlp.vocab, feats)
|
||||||
|
> assert len(morph) == 3
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ----- | --------------------------------------- |
|
||||||
|
| **RETURNS** | `int` | The number of features in the analysis. |
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.\_\_str\_\_ {#str tag="method"}
|
||||||
|
|
||||||
|
Returns the morphological analysis in the UD FEATS string format.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1,Val2|Feat2=Val2"
|
||||||
|
> morph = MorphAnalysis(nlp.vocab, feats)
|
||||||
|
> assert str(morph) == feats
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ----- | ---------------------------------|
|
||||||
|
| **RETURNS** | `str` | The analysis in UD FEATS format. |
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.get {#get tag="method"}
|
||||||
|
|
||||||
|
Retrieve values for a feature by field.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1,Val2"
|
||||||
|
> morph = MorphAnalysis(nlp.vocab, feats)
|
||||||
|
> assert morph.get("Feat1") == ["Val1", "Val2"]
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ------ | ----------------------------------- |
|
||||||
|
| `field` | `str` | The field to retrieve. |
|
||||||
|
| **RETURNS** | `list` | A list of the individual features. |
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.to_dict {#to_dict tag="method"}
|
||||||
|
|
||||||
|
Produce a dict representation of the analysis, in the same format as the tag
|
||||||
|
map.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1,Val2|Feat2=Val2"
|
||||||
|
> morph = MorphAnalysis(nlp.vocab, feats)
|
||||||
|
> assert morph.to_dict() == {"Feat1": "Val1,Val2", "Feat2": "Val2"}
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ------ | -----------------------------------------|
|
||||||
|
| **RETURNS** | `dict` | The dict representation of the analysis. |
|
||||||
|
|
||||||
|
|
||||||
|
## MorphAnalysis.from_id {#from_id tag="classmethod"}
|
||||||
|
|
||||||
|
Create a morphological analysis from a given hash ID.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1|Feat2=Val2"
|
||||||
|
> hash = nlp.vocab.strings[feats]
|
||||||
|
> morph = MorphAnalysis.from_id(nlp.vocab, hash)
|
||||||
|
> assert str(morph) == feats
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ------- | ------- | -------------------------------- |
|
||||||
|
| `vocab` | `Vocab` | The vocab. |
|
||||||
|
| `key` | `int` | The hash of the features string. |
|
||||||
|
|
||||||
|
|
165
website/docs/api/morphology.md
Normal file
165
website/docs/api/morphology.md
Normal file
|
@ -0,0 +1,165 @@
|
||||||
|
---
|
||||||
|
title: Morphology
|
||||||
|
tag: class
|
||||||
|
source: spacy/morphology.pyx
|
||||||
|
---
|
||||||
|
|
||||||
|
Store the possible morphological analyses for a language, and index them
|
||||||
|
by hash. To save space on each token, tokens only know the hash of their
|
||||||
|
morphological analysis, so queries of morphological attributes are delegated to
|
||||||
|
this class.
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.\_\_init\_\_ {#init tag="method"}
|
||||||
|
|
||||||
|
Create a Morphology object using the tag map, lemmatizer and exceptions.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> from spacy.morphology import Morphology
|
||||||
|
>
|
||||||
|
> morphology = Morphology(strings, tag_map, lemmatizer)
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------- |
|
||||||
|
| `strings` | `StringStore` | The string store. |
|
||||||
|
| `tag_map` | `Dict[str, Dict]` | The tag map. |
|
||||||
|
| `lemmatizer`| `Lemmatizer` | The lemmatizer. |
|
||||||
|
| `exc` | `Dict[str, Dict]` | A dictionary of exceptions in the format `{tag: {orth: {"POS": "X", "Feat1": "Val1, "Feat2": "Val2", ...}` |
|
||||||
|
| **RETURNS** | `Morphology` | The newly constructed object. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.add {#add tag="method"}
|
||||||
|
|
||||||
|
Insert a morphological analysis in the morphology table, if not already
|
||||||
|
present. The morphological analysis may be provided in the UD FEATS format as a
|
||||||
|
string or in the tag map dictionary format. Returns the hash of the new
|
||||||
|
analysis.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1|Feat2=Val2"
|
||||||
|
> hash = nlp.vocab.morphology.add(feats)
|
||||||
|
> assert hash == nlp.vocab.strings[feats]
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ------------------- | --------------------------- |
|
||||||
|
| `features` | `Union[Dict, str]` | The morphological features. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.get {#get tag="method"}
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> feats = "Feat1=Val1|Feat2=Val2"
|
||||||
|
> hash = nlp.vocab.morphology.add(feats)
|
||||||
|
> assert nlp.vocab.morphology.get(hash) == feats
|
||||||
|
> ```
|
||||||
|
|
||||||
|
Get the FEATS string for the hash of the morphological analysis.
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ------ | --------------------------------------- |
|
||||||
|
| `morph` | int | The hash of the morphological analysis. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.load_tag_map {#load_tag_map tag="method"}
|
||||||
|
|
||||||
|
Replace the current tag map with the provided tag map.
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ------------------ | ------------ |
|
||||||
|
| `tag_map` | `Dict[str, Dict]` | The tag map. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.load_morph_exceptions {#load_morph_exceptions tag="method"}
|
||||||
|
|
||||||
|
Replace the current morphological exceptions with the provided exceptions.
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ------------- | ------------------ | ----------------------------- |
|
||||||
|
| `morph_rules` | `Dict[str, Dict]` | The morphological exceptions. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.add_special_case {#add_special_case tag="method"}
|
||||||
|
|
||||||
|
Add a special-case rule to the morphological analyzer. Tokens whose tag and
|
||||||
|
orth match the rule will receive the specified properties.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> attrs = {"POS": "DET", "Definite": "Def"}
|
||||||
|
> morphology.add_special_case("DT", "the", attrs)
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ---- | ---------------------------------------------- |
|
||||||
|
| `tag_str` | str | The fine-grained tag. |
|
||||||
|
| `orth_str` | str | The token text. |
|
||||||
|
| `attrs` | dict | The features to assign for this token and tag. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.exc {#exc tag="property"}
|
||||||
|
|
||||||
|
The current morphological exceptions.
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ---------- | ----- | --------------------------------------------------- |
|
||||||
|
| **YIELDS** | dict | The current dictionary of morphological exceptions. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.lemmatize {#lemmatize tag="method"}
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.feats_to_dict {#feats_to_dict tag="staticmethod"}
|
||||||
|
|
||||||
|
Convert a string FEATS representation to a dictionary of features and values in
|
||||||
|
the same format as the tag map.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> from spacy.morphology import Morphology
|
||||||
|
> d = Morphology.feats_to_dict("Feat1=Val1|Feat2=Val2")
|
||||||
|
> assert d == {"Feat1": "Val1", "Feat2": "Val2"}
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ----------- | ---- | ------------------------------------------------------------- |
|
||||||
|
| `feats` | str | The morphological features in Universal Dependencies FEATS format. |
|
||||||
|
| **RETURNS** | dict | The morphological features as a dictionary. |
|
||||||
|
|
||||||
|
|
||||||
|
## Morphology.dict_to_feats {#dict_to_feats tag="staticmethod"}
|
||||||
|
|
||||||
|
Convert a dictionary of features and values to a string FEATS representation.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> from spacy.morphology import Morphology
|
||||||
|
> f = Morphology.dict_to_feats({"Feat1": "Val1", "Feat2": "Val2"})
|
||||||
|
> assert f == "Feat1=Val1|Feat2=Val2"
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ------------ | ----------------- | --------------------------------------------------------------------- |
|
||||||
|
| `feats_dict` | `Dict[str, Dict]` | The morphological features as a dictionary. |
|
||||||
|
| **RETURNS** | str | The morphological features as in Universal Dependencies FEATS format. |
|
||||||
|
|
||||||
|
|
||||||
|
## Attributes {#attributes}
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ------------- | ----- | -------------------------------------------- |
|
||||||
|
| `FEATURE_SEP` | `str` | The FEATS feature separator. Default is `|`. |
|
||||||
|
| `FIELD_SEP` | `str` | The FEATS field separator. Default is `=`. |
|
||||||
|
| `VALUE_SEP` | `str` | The FEATS value separator. Default is `,`. |
|
|
@ -450,6 +450,8 @@ The L2 norm of the token's vector representation.
|
||||||
| `pos_` | str | Coarse-grained part-of-speech from the [Universal POS tag set](https://universaldependencies.org/docs/u/pos/). |
|
| `pos_` | str | Coarse-grained part-of-speech from the [Universal POS tag set](https://universaldependencies.org/docs/u/pos/). |
|
||||||
| `tag` | int | Fine-grained part-of-speech. |
|
| `tag` | int | Fine-grained part-of-speech. |
|
||||||
| `tag_` | str | Fine-grained part-of-speech. |
|
| `tag_` | str | Fine-grained part-of-speech. |
|
||||||
|
| `morph` | `MorphAnalysis` | Morphological analysis. |
|
||||||
|
| `morph_` | str | Morphological analysis in UD FEATS format. |
|
||||||
| `dep` | int | Syntactic dependency relation. |
|
| `dep` | int | Syntactic dependency relation. |
|
||||||
| `dep_` | str | Syntactic dependency relation. |
|
| `dep_` | str | Syntactic dependency relation. |
|
||||||
| `lang` | int | Language of the parent document's vocabulary. |
|
| `lang` | int | Language of the parent document's vocabulary. |
|
||||||
|
|
|
@ -24,6 +24,7 @@ an **annotated document**. It also orchestrates training and serialization.
|
||||||
| [`Span`](/api/span) | A slice from a `Doc` object. |
|
| [`Span`](/api/span) | A slice from a `Doc` object. |
|
||||||
| [`Token`](/api/token) | An individual token — i.e. a word, punctuation symbol, whitespace, etc. |
|
| [`Token`](/api/token) | An individual token — i.e. a word, punctuation symbol, whitespace, etc. |
|
||||||
| [`Lexeme`](/api/lexeme) | An entry in the vocabulary. It's a word type with no context, as opposed to a word token. It therefore has no part-of-speech tag, dependency parse etc. |
|
| [`Lexeme`](/api/lexeme) | An entry in the vocabulary. It's a word type with no context, as opposed to a word token. It therefore has no part-of-speech tag, dependency parse etc. |
|
||||||
|
| [`MorphAnalysis`](/api/morphanalysis) | A morphological analysis. |
|
||||||
|
|
||||||
### Processing pipeline {#architecture-pipeline}
|
### Processing pipeline {#architecture-pipeline}
|
||||||
|
|
||||||
|
@ -32,7 +33,7 @@ an **annotated document**. It also orchestrates training and serialization.
|
||||||
| [`Language`](/api/language) | A text-processing pipeline. Usually you'll load this once per process as `nlp` and pass the instance around your application. |
|
| [`Language`](/api/language) | A text-processing pipeline. Usually you'll load this once per process as `nlp` and pass the instance around your application. |
|
||||||
| [`Tokenizer`](/api/tokenizer) | Segment text, and create `Doc` objects with the discovered segment boundaries. |
|
| [`Tokenizer`](/api/tokenizer) | Segment text, and create `Doc` objects with the discovered segment boundaries. |
|
||||||
| [`Lemmatizer`](/api/lemmatizer) | Determine the base forms of words. |
|
| [`Lemmatizer`](/api/lemmatizer) | Determine the base forms of words. |
|
||||||
| `Morphology` | Assign linguistic features like lemmas, noun case, verb tense etc. based on the word and its part-of-speech tag. |
|
| [`Morphology`](/api/morphology) | Assign linguistic features like lemmas, noun case, verb tense etc. based on the word and its part-of-speech tag. |
|
||||||
| [`Tagger`](/api/tagger) | Annotate part-of-speech tags on `Doc` objects. |
|
| [`Tagger`](/api/tagger) | Annotate part-of-speech tags on `Doc` objects. |
|
||||||
| [`DependencyParser`](/api/dependencyparser) | Annotate syntactic dependencies on `Doc` objects. |
|
| [`DependencyParser`](/api/dependencyparser) | Annotate syntactic dependencies on `Doc` objects. |
|
||||||
| [`EntityRecognizer`](/api/entityrecognizer) | Annotate named entities, e.g. persons or products, on `Doc` objects. |
|
| [`EntityRecognizer`](/api/entityrecognizer) | Annotate named entities, e.g. persons or products, on `Doc` objects. |
|
||||||
|
|
|
@ -102,6 +102,8 @@
|
||||||
{ "text": "StringStore", "url": "/api/stringstore" },
|
{ "text": "StringStore", "url": "/api/stringstore" },
|
||||||
{ "text": "Vectors", "url": "/api/vectors" },
|
{ "text": "Vectors", "url": "/api/vectors" },
|
||||||
{ "text": "Lookups", "url": "/api/lookups" },
|
{ "text": "Lookups", "url": "/api/lookups" },
|
||||||
|
{ "text": "Morphology", "url": "/api/morphology" },
|
||||||
|
{ "text": "MorphAnalysis", "url": "/api/morphanalysis" },
|
||||||
{ "text": "KnowledgeBase", "url": "/api/kb" },
|
{ "text": "KnowledgeBase", "url": "/api/kb" },
|
||||||
{ "text": "Scorer", "url": "/api/scorer" },
|
{ "text": "Scorer", "url": "/api/scorer" },
|
||||||
{ "text": "Corpus", "url": "/api/corpus" }
|
{ "text": "Corpus", "url": "/api/corpus" }
|
||||||
|
|
Loading…
Reference in New Issue
Block a user