Merge pull request #6379 from svlandeg/fix/labels-constructor

This commit is contained in:
Ines Montani 2020-12-08 06:29:56 +01:00 committed by GitHub
commit 82e88f0e3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 15 additions and 27 deletions

View File

@ -67,9 +67,6 @@ class Morphologizer(Tagger):
vocab: Vocab,
model: Model,
name: str = "morphologizer",
*,
labels_morph: Optional[dict] = None,
labels_pos: Optional[dict] = None,
):
"""Initialize a morphologizer.
@ -77,8 +74,6 @@ class Morphologizer(Tagger):
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
losses during training.
labels_morph (dict): Mapping of morph + POS tags to morph labels.
labels_pos (dict): Mapping of morph + POS tags to POS tags.
DOCS: https://nightly.spacy.io/api/morphologizer#init
"""
@ -90,7 +85,7 @@ class Morphologizer(Tagger):
# store mappings from morph+POS labels to token-level annotations:
# 1) labels_morph stores a mapping from morph+POS->morph
# 2) labels_pos stores a mapping from morph+POS->POS
cfg = {"labels_morph": labels_morph or {}, "labels_pos": labels_pos or {}}
cfg = {"labels_morph": {}, "labels_pos": {}}
self.cfg = dict(sorted(cfg.items()))
@property

View File

@ -47,7 +47,7 @@ class MultitaskObjective(Tagger):
side-objective.
"""
def __init__(self, vocab, model, name="nn_labeller", *, labels, target):
def __init__(self, vocab, model, name="nn_labeller", *, target):
self.vocab = vocab
self.model = model
self.name = name
@ -67,7 +67,7 @@ class MultitaskObjective(Tagger):
self.make_label = target
else:
raise ValueError(Errors.E016)
cfg = {"labels": labels or {}, "target": target}
cfg = {"labels": {}, "target": target}
self.cfg = dict(cfg)
@property
@ -81,15 +81,18 @@ class MultitaskObjective(Tagger):
def set_annotations(self, docs, dep_ids):
pass
def initialize(self, get_examples, nlp=None):
def initialize(self, get_examples, nlp=None, labels=None):
if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
raise ValueError(err)
for example in get_examples():
for token in example.y:
label = self.make_label(token)
if label is not None and label not in self.labels:
self.labels[label] = len(self.labels)
if labels is not None:
self.labels = labels
else:
for example in get_examples():
for token in example.y:
label = self.make_label(token)
if label is not None and label not in self.labels:
self.labels[label] = len(self.labels)
self.model.initialize() # TODO: fix initialization by defining X and Y
def predict(self, docs):

View File

@ -61,14 +61,13 @@ class Tagger(TrainablePipe):
DOCS: https://nightly.spacy.io/api/tagger
"""
def __init__(self, vocab, model, name="tagger", *, labels=None):
def __init__(self, vocab, model, name="tagger"):
"""Initialize a part-of-speech tagger.
vocab (Vocab): The shared vocabulary.
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
losses during training.
labels (List): The set of labels. Defaults to None.
DOCS: https://nightly.spacy.io/api/tagger#init
"""
@ -76,7 +75,7 @@ class Tagger(TrainablePipe):
self.model = model
self.name = name
self._rehearsal_model = None
cfg = {"labels": labels or []}
cfg = {"labels": []}
self.cfg = dict(sorted(cfg.items()))
@property

View File

@ -66,9 +66,6 @@ shortcut for this and instantiate the component using its string name and
| `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `labels_morph` | Mapping of morph + POS tags to morph labels. ~~Dict[str, str]~~ |
| `labels_pos` | Mapping of morph + POS tags to POS tags. ~~Dict[str, str]~~ |
## Morphologizer.\_\_call\_\_ {#call tag="method"}

View File

@ -21,16 +21,12 @@ architectures and their arguments and hyperparameters.
>
> ```python
> from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
> config = {
> "set_morphology": False,
> "model": DEFAULT_TAGGER_MODEL,
> }
> config = {"model": DEFAULT_TAGGER_MODEL}
> nlp.add_pipe("tagger", config=config)
> ```
| Setting | Description |
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `set_morphology` | Whether to set morphological features. Defaults to `False`. ~~bool~~ |
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python
@ -63,8 +59,6 @@ shortcut for this and instantiate the component using its string name and
| `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `set_morphology` | Whether to set morphological features. ~~bool~~ |
## Tagger.\_\_call\_\_ {#call tag="method"}