Merge pull request #6379 from svlandeg/fix/labels-constructor

2025-07-10 16:22:29 +03:00 · 2020-12-08 06:29:56 +01:00 · 2020-12-08 06:29:56 +01:00 · 82e88f0e3b
commit 82e88f0e3b
parent 78085fab1f 73fc1ed963
5 changed files with 15 additions and 27 deletions
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -67,9 +67,6 @@ class Morphologizer(Tagger):
        vocab: Vocab,
        model: Model,
        name: str = "morphologizer",
        *,
        labels_morph: Optional[dict] = None,
        labels_pos: Optional[dict] = None,
    ):
        """Initialize a morphologizer.
@ -77,8 +74,6 @@ class Morphologizer(Tagger):
        model (thinc.api.Model): The Thinc Model powering the pipeline component.
        name (str): The component instance name, used to add entries to the
            losses during training.
        labels_morph (dict): Mapping of morph + POS tags to morph labels.
        labels_pos (dict): Mapping of morph + POS tags to POS tags.
        DOCS: https://nightly.spacy.io/api/morphologizer#init
        """
@ -90,7 +85,7 @@ class Morphologizer(Tagger):
        # store mappings from morph+POS labels to token-level annotations:
        # 1) labels_morph stores a mapping from morph+POS->morph
        # 2) labels_pos stores a mapping from morph+POS->POS
-        cfg = {"labels_morph": labels_morph or {}, "labels_pos": labels_pos or {}}
+        cfg = {"labels_morph": {}, "labels_pos": {}}
        self.cfg = dict(sorted(cfg.items()))
    @property
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@ -47,7 +47,7 @@ class MultitaskObjective(Tagger):
    side-objective.
    """
-    def __init__(self, vocab, model, name="nn_labeller", *, labels, target):
+    def __init__(self, vocab, model, name="nn_labeller", *, target):
        self.vocab = vocab
        self.model = model
        self.name = name
@ -67,7 +67,7 @@ class MultitaskObjective(Tagger):
            self.make_label = target
        else:
            raise ValueError(Errors.E016)
-        cfg = {"labels": labels or {}, "target": target}
+        cfg = {"labels": {}, "target": target}
        self.cfg = dict(cfg)
    @property
@ -81,15 +81,18 @@ class MultitaskObjective(Tagger):
    def set_annotations(self, docs, dep_ids):
        pass
-    def initialize(self, get_examples, nlp=None):
+    def initialize(self, get_examples, nlp=None, labels=None):
        if not hasattr(get_examples, "__call__"):
            err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
            raise ValueError(err)
-        for example in get_examples():
+        if labels is not None:
-            for token in example.y:
+            self.labels = labels
-                label = self.make_label(token)
+        else:
-                if label is not None and label not in self.labels:
+            for example in get_examples():
-                    self.labels[label] = len(self.labels)
+                for token in example.y:
                    label = self.make_label(token)
                    if label is not None and label not in self.labels:
                        self.labels[label] = len(self.labels)
        self.model.initialize()   # TODO: fix initialization by defining X and Y
    def predict(self, docs):
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -61,14 +61,13 @@ class Tagger(TrainablePipe):
    DOCS: https://nightly.spacy.io/api/tagger
    """
-    def __init__(self, vocab, model, name="tagger", *, labels=None):
+    def __init__(self, vocab, model, name="tagger"):
        """Initialize a part-of-speech tagger.
        vocab (Vocab): The shared vocabulary.
        model (thinc.api.Model): The Thinc Model powering the pipeline component.
        name (str): The component instance name, used to add entries to the
            losses during training.
        labels (List): The set of labels. Defaults to None.
        DOCS: https://nightly.spacy.io/api/tagger#init
        """
@ -76,7 +75,7 @@ class Tagger(TrainablePipe):
        self.model = model
        self.name = name
        self._rehearsal_model = None
-        cfg = {"labels": labels or []}
+        cfg = {"labels": []}
        self.cfg = dict(sorted(cfg.items()))
    @property
--- a/website/docs/api/morphologizer.md
+++ b/website/docs/api/morphologizer.md
@ -66,9 +66,6 @@ shortcut for this and instantiate the component using its string name and
 | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                     |
 | `model`        | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
 | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                  |
 | _keyword-only_ |                                                                                                                      |
 | `labels_morph` | Mapping of morph + POS tags to morph labels. ~~Dict[str, str]~~                                                      |
 | `labels_pos`   | Mapping of morph + POS tags to POS tags. ~~Dict[str, str]~~                                                          |
 ## Morphologizer.\_\_call\_\_ {#call tag="method"}
--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -21,16 +21,12 @@ architectures and their arguments and hyperparameters.
 >
 > ```python
 > from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
-> config = {
+> config = {"model": DEFAULT_TAGGER_MODEL}
 >    "set_morphology": False,
 >    "model": DEFAULT_TAGGER_MODEL,
 > }
 > nlp.add_pipe("tagger", config=config)
 > ```
 | Setting          | Description                                                                                                                                                                                                                                                                                            |
 | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `set_morphology` | Whether to set morphological features. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                                   |
 | `model`          | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
 ```python
@ -63,8 +59,6 @@ shortcut for this and instantiate the component using its string name and
 | `vocab`          | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                      |
 | `model`          | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
 | `name`           | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                   |
 | _keyword-only_   |                                                                                                                                                                                                                                                       |
 | `set_morphology` | Whether to set morphological features. ~~bool~~                                                                                                                                                                                                       |
 ## Tagger.\_\_call\_\_ {#call tag="method"}