mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Merge pull request #6379 from svlandeg/fix/labels-constructor
This commit is contained in:
commit
82e88f0e3b
|
@ -67,9 +67,6 @@ class Morphologizer(Tagger):
|
||||||
vocab: Vocab,
|
vocab: Vocab,
|
||||||
model: Model,
|
model: Model,
|
||||||
name: str = "morphologizer",
|
name: str = "morphologizer",
|
||||||
*,
|
|
||||||
labels_morph: Optional[dict] = None,
|
|
||||||
labels_pos: Optional[dict] = None,
|
|
||||||
):
|
):
|
||||||
"""Initialize a morphologizer.
|
"""Initialize a morphologizer.
|
||||||
|
|
||||||
|
@ -77,8 +74,6 @@ class Morphologizer(Tagger):
|
||||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
labels_morph (dict): Mapping of morph + POS tags to morph labels.
|
|
||||||
labels_pos (dict): Mapping of morph + POS tags to POS tags.
|
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/morphologizer#init
|
DOCS: https://nightly.spacy.io/api/morphologizer#init
|
||||||
"""
|
"""
|
||||||
|
@ -90,7 +85,7 @@ class Morphologizer(Tagger):
|
||||||
# store mappings from morph+POS labels to token-level annotations:
|
# store mappings from morph+POS labels to token-level annotations:
|
||||||
# 1) labels_morph stores a mapping from morph+POS->morph
|
# 1) labels_morph stores a mapping from morph+POS->morph
|
||||||
# 2) labels_pos stores a mapping from morph+POS->POS
|
# 2) labels_pos stores a mapping from morph+POS->POS
|
||||||
cfg = {"labels_morph": labels_morph or {}, "labels_pos": labels_pos or {}}
|
cfg = {"labels_morph": {}, "labels_pos": {}}
|
||||||
self.cfg = dict(sorted(cfg.items()))
|
self.cfg = dict(sorted(cfg.items()))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -47,7 +47,7 @@ class MultitaskObjective(Tagger):
|
||||||
side-objective.
|
side-objective.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, vocab, model, name="nn_labeller", *, labels, target):
|
def __init__(self, vocab, model, name="nn_labeller", *, target):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -67,7 +67,7 @@ class MultitaskObjective(Tagger):
|
||||||
self.make_label = target
|
self.make_label = target
|
||||||
else:
|
else:
|
||||||
raise ValueError(Errors.E016)
|
raise ValueError(Errors.E016)
|
||||||
cfg = {"labels": labels or {}, "target": target}
|
cfg = {"labels": {}, "target": target}
|
||||||
self.cfg = dict(cfg)
|
self.cfg = dict(cfg)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -81,15 +81,18 @@ class MultitaskObjective(Tagger):
|
||||||
def set_annotations(self, docs, dep_ids):
|
def set_annotations(self, docs, dep_ids):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def initialize(self, get_examples, nlp=None):
|
def initialize(self, get_examples, nlp=None, labels=None):
|
||||||
if not hasattr(get_examples, "__call__"):
|
if not hasattr(get_examples, "__call__"):
|
||||||
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
|
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
|
||||||
raise ValueError(err)
|
raise ValueError(err)
|
||||||
for example in get_examples():
|
if labels is not None:
|
||||||
for token in example.y:
|
self.labels = labels
|
||||||
label = self.make_label(token)
|
else:
|
||||||
if label is not None and label not in self.labels:
|
for example in get_examples():
|
||||||
self.labels[label] = len(self.labels)
|
for token in example.y:
|
||||||
|
label = self.make_label(token)
|
||||||
|
if label is not None and label not in self.labels:
|
||||||
|
self.labels[label] = len(self.labels)
|
||||||
self.model.initialize() # TODO: fix initialization by defining X and Y
|
self.model.initialize() # TODO: fix initialization by defining X and Y
|
||||||
|
|
||||||
def predict(self, docs):
|
def predict(self, docs):
|
||||||
|
|
|
@ -61,14 +61,13 @@ class Tagger(TrainablePipe):
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/tagger
|
DOCS: https://nightly.spacy.io/api/tagger
|
||||||
"""
|
"""
|
||||||
def __init__(self, vocab, model, name="tagger", *, labels=None):
|
def __init__(self, vocab, model, name="tagger"):
|
||||||
"""Initialize a part-of-speech tagger.
|
"""Initialize a part-of-speech tagger.
|
||||||
|
|
||||||
vocab (Vocab): The shared vocabulary.
|
vocab (Vocab): The shared vocabulary.
|
||||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
labels (List): The set of labels. Defaults to None.
|
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/tagger#init
|
DOCS: https://nightly.spacy.io/api/tagger#init
|
||||||
"""
|
"""
|
||||||
|
@ -76,7 +75,7 @@ class Tagger(TrainablePipe):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.name = name
|
self.name = name
|
||||||
self._rehearsal_model = None
|
self._rehearsal_model = None
|
||||||
cfg = {"labels": labels or []}
|
cfg = {"labels": []}
|
||||||
self.cfg = dict(sorted(cfg.items()))
|
self.cfg = dict(sorted(cfg.items()))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -66,9 +66,6 @@ shortcut for this and instantiate the component using its string name and
|
||||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
|
||||||
| `labels_morph` | Mapping of morph + POS tags to morph labels. ~~Dict[str, str]~~ |
|
|
||||||
| `labels_pos` | Mapping of morph + POS tags to POS tags. ~~Dict[str, str]~~ |
|
|
||||||
|
|
||||||
## Morphologizer.\_\_call\_\_ {#call tag="method"}
|
## Morphologizer.\_\_call\_\_ {#call tag="method"}
|
||||||
|
|
||||||
|
|
|
@ -21,16 +21,12 @@ architectures and their arguments and hyperparameters.
|
||||||
>
|
>
|
||||||
> ```python
|
> ```python
|
||||||
> from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
|
> from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
|
||||||
> config = {
|
> config = {"model": DEFAULT_TAGGER_MODEL}
|
||||||
> "set_morphology": False,
|
|
||||||
> "model": DEFAULT_TAGGER_MODEL,
|
|
||||||
> }
|
|
||||||
> nlp.add_pipe("tagger", config=config)
|
> nlp.add_pipe("tagger", config=config)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||||
| `set_morphology` | Whether to set morphological features. Defaults to `False`. ~~bool~~ |
|
|
||||||
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -63,8 +59,6 @@ shortcut for this and instantiate the component using its string name and
|
||||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||||
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
|
||||||
| `set_morphology` | Whether to set morphological features. ~~bool~~ |
|
|
||||||
|
|
||||||
## Tagger.\_\_call\_\_ {#call tag="method"}
|
## Tagger.\_\_call\_\_ {#call tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user