Merge pull request #6379 from svlandeg/fix/labels-constructor

This commit is contained in:
Ines Montani 2020-12-08 06:29:56 +01:00 committed by GitHub
commit 82e88f0e3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 15 additions and 27 deletions

View File

@ -67,9 +67,6 @@ class Morphologizer(Tagger):
vocab: Vocab, vocab: Vocab,
model: Model, model: Model,
name: str = "morphologizer", name: str = "morphologizer",
*,
labels_morph: Optional[dict] = None,
labels_pos: Optional[dict] = None,
): ):
"""Initialize a morphologizer. """Initialize a morphologizer.
@ -77,8 +74,6 @@ class Morphologizer(Tagger):
model (thinc.api.Model): The Thinc Model powering the pipeline component. model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the name (str): The component instance name, used to add entries to the
losses during training. losses during training.
labels_morph (dict): Mapping of morph + POS tags to morph labels.
labels_pos (dict): Mapping of morph + POS tags to POS tags.
DOCS: https://nightly.spacy.io/api/morphologizer#init DOCS: https://nightly.spacy.io/api/morphologizer#init
""" """
@ -90,7 +85,7 @@ class Morphologizer(Tagger):
# store mappings from morph+POS labels to token-level annotations: # store mappings from morph+POS labels to token-level annotations:
# 1) labels_morph stores a mapping from morph+POS->morph # 1) labels_morph stores a mapping from morph+POS->morph
# 2) labels_pos stores a mapping from morph+POS->POS # 2) labels_pos stores a mapping from morph+POS->POS
cfg = {"labels_morph": labels_morph or {}, "labels_pos": labels_pos or {}} cfg = {"labels_morph": {}, "labels_pos": {}}
self.cfg = dict(sorted(cfg.items())) self.cfg = dict(sorted(cfg.items()))
@property @property

View File

@ -47,7 +47,7 @@ class MultitaskObjective(Tagger):
side-objective. side-objective.
""" """
def __init__(self, vocab, model, name="nn_labeller", *, labels, target): def __init__(self, vocab, model, name="nn_labeller", *, target):
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
self.name = name self.name = name
@ -67,7 +67,7 @@ class MultitaskObjective(Tagger):
self.make_label = target self.make_label = target
else: else:
raise ValueError(Errors.E016) raise ValueError(Errors.E016)
cfg = {"labels": labels or {}, "target": target} cfg = {"labels": {}, "target": target}
self.cfg = dict(cfg) self.cfg = dict(cfg)
@property @property
@ -81,15 +81,18 @@ class MultitaskObjective(Tagger):
def set_annotations(self, docs, dep_ids): def set_annotations(self, docs, dep_ids):
pass pass
def initialize(self, get_examples, nlp=None): def initialize(self, get_examples, nlp=None, labels=None):
if not hasattr(get_examples, "__call__"): if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples)) err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
raise ValueError(err) raise ValueError(err)
for example in get_examples(): if labels is not None:
for token in example.y: self.labels = labels
label = self.make_label(token) else:
if label is not None and label not in self.labels: for example in get_examples():
self.labels[label] = len(self.labels) for token in example.y:
label = self.make_label(token)
if label is not None and label not in self.labels:
self.labels[label] = len(self.labels)
self.model.initialize() # TODO: fix initialization by defining X and Y self.model.initialize() # TODO: fix initialization by defining X and Y
def predict(self, docs): def predict(self, docs):

View File

@ -61,14 +61,13 @@ class Tagger(TrainablePipe):
DOCS: https://nightly.spacy.io/api/tagger DOCS: https://nightly.spacy.io/api/tagger
""" """
def __init__(self, vocab, model, name="tagger", *, labels=None): def __init__(self, vocab, model, name="tagger"):
"""Initialize a part-of-speech tagger. """Initialize a part-of-speech tagger.
vocab (Vocab): The shared vocabulary. vocab (Vocab): The shared vocabulary.
model (thinc.api.Model): The Thinc Model powering the pipeline component. model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the name (str): The component instance name, used to add entries to the
losses during training. losses during training.
labels (List): The set of labels. Defaults to None.
DOCS: https://nightly.spacy.io/api/tagger#init DOCS: https://nightly.spacy.io/api/tagger#init
""" """
@ -76,7 +75,7 @@ class Tagger(TrainablePipe):
self.model = model self.model = model
self.name = name self.name = name
self._rehearsal_model = None self._rehearsal_model = None
cfg = {"labels": labels or []} cfg = {"labels": []}
self.cfg = dict(sorted(cfg.items())) self.cfg = dict(sorted(cfg.items()))
@property @property

View File

@ -66,9 +66,6 @@ shortcut for this and instantiate the component using its string name and
| `vocab` | The shared vocabulary. ~~Vocab~~ | | `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | | `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `labels_morph` | Mapping of morph + POS tags to morph labels. ~~Dict[str, str]~~ |
| `labels_pos` | Mapping of morph + POS tags to POS tags. ~~Dict[str, str]~~ |
## Morphologizer.\_\_call\_\_ {#call tag="method"} ## Morphologizer.\_\_call\_\_ {#call tag="method"}

View File

@ -21,16 +21,12 @@ architectures and their arguments and hyperparameters.
> >
> ```python > ```python
> from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL > from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
> config = { > config = {"model": DEFAULT_TAGGER_MODEL}
> "set_morphology": False,
> "model": DEFAULT_TAGGER_MODEL,
> }
> nlp.add_pipe("tagger", config=config) > nlp.add_pipe("tagger", config=config)
> ``` > ```
| Setting | Description | | Setting | Description |
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `set_morphology` | Whether to set morphological features. Defaults to `False`. ~~bool~~ |
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python ```python
@ -63,8 +59,6 @@ shortcut for this and instantiate the component using its string name and
| `vocab` | The shared vocabulary. ~~Vocab~~ | | `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | | `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `set_morphology` | Whether to set morphological features. ~~bool~~ |
## Tagger.\_\_call\_\_ {#call tag="method"} ## Tagger.\_\_call\_\_ {#call tag="method"}