mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Merge pull request #6379 from svlandeg/fix/labels-constructor
This commit is contained in:
commit
82e88f0e3b
|
@ -67,9 +67,6 @@ class Morphologizer(Tagger):
|
|||
vocab: Vocab,
|
||||
model: Model,
|
||||
name: str = "morphologizer",
|
||||
*,
|
||||
labels_morph: Optional[dict] = None,
|
||||
labels_pos: Optional[dict] = None,
|
||||
):
|
||||
"""Initialize a morphologizer.
|
||||
|
||||
|
@ -77,8 +74,6 @@ class Morphologizer(Tagger):
|
|||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
labels_morph (dict): Mapping of morph + POS tags to morph labels.
|
||||
labels_pos (dict): Mapping of morph + POS tags to POS tags.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#init
|
||||
"""
|
||||
|
@ -90,7 +85,7 @@ class Morphologizer(Tagger):
|
|||
# store mappings from morph+POS labels to token-level annotations:
|
||||
# 1) labels_morph stores a mapping from morph+POS->morph
|
||||
# 2) labels_pos stores a mapping from morph+POS->POS
|
||||
cfg = {"labels_morph": labels_morph or {}, "labels_pos": labels_pos or {}}
|
||||
cfg = {"labels_morph": {}, "labels_pos": {}}
|
||||
self.cfg = dict(sorted(cfg.items()))
|
||||
|
||||
@property
|
||||
|
|
|
@ -47,7 +47,7 @@ class MultitaskObjective(Tagger):
|
|||
side-objective.
|
||||
"""
|
||||
|
||||
def __init__(self, vocab, model, name="nn_labeller", *, labels, target):
|
||||
def __init__(self, vocab, model, name="nn_labeller", *, target):
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
self.name = name
|
||||
|
@ -67,7 +67,7 @@ class MultitaskObjective(Tagger):
|
|||
self.make_label = target
|
||||
else:
|
||||
raise ValueError(Errors.E016)
|
||||
cfg = {"labels": labels or {}, "target": target}
|
||||
cfg = {"labels": {}, "target": target}
|
||||
self.cfg = dict(cfg)
|
||||
|
||||
@property
|
||||
|
@ -81,15 +81,18 @@ class MultitaskObjective(Tagger):
|
|||
def set_annotations(self, docs, dep_ids):
|
||||
pass
|
||||
|
||||
def initialize(self, get_examples, nlp=None):
|
||||
def initialize(self, get_examples, nlp=None, labels=None):
|
||||
if not hasattr(get_examples, "__call__"):
|
||||
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
|
||||
raise ValueError(err)
|
||||
for example in get_examples():
|
||||
for token in example.y:
|
||||
label = self.make_label(token)
|
||||
if label is not None and label not in self.labels:
|
||||
self.labels[label] = len(self.labels)
|
||||
if labels is not None:
|
||||
self.labels = labels
|
||||
else:
|
||||
for example in get_examples():
|
||||
for token in example.y:
|
||||
label = self.make_label(token)
|
||||
if label is not None and label not in self.labels:
|
||||
self.labels[label] = len(self.labels)
|
||||
self.model.initialize() # TODO: fix initialization by defining X and Y
|
||||
|
||||
def predict(self, docs):
|
||||
|
|
|
@ -61,14 +61,13 @@ class Tagger(TrainablePipe):
|
|||
|
||||
DOCS: https://nightly.spacy.io/api/tagger
|
||||
"""
|
||||
def __init__(self, vocab, model, name="tagger", *, labels=None):
|
||||
def __init__(self, vocab, model, name="tagger"):
|
||||
"""Initialize a part-of-speech tagger.
|
||||
|
||||
vocab (Vocab): The shared vocabulary.
|
||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
labels (List): The set of labels. Defaults to None.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#init
|
||||
"""
|
||||
|
@ -76,7 +75,7 @@ class Tagger(TrainablePipe):
|
|||
self.model = model
|
||||
self.name = name
|
||||
self._rehearsal_model = None
|
||||
cfg = {"labels": labels or []}
|
||||
cfg = {"labels": []}
|
||||
self.cfg = dict(sorted(cfg.items()))
|
||||
|
||||
@property
|
||||
|
|
|
@ -66,9 +66,6 @@ shortcut for this and instantiate the component using its string name and
|
|||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `labels_morph` | Mapping of morph + POS tags to morph labels. ~~Dict[str, str]~~ |
|
||||
| `labels_pos` | Mapping of morph + POS tags to POS tags. ~~Dict[str, str]~~ |
|
||||
|
||||
## Morphologizer.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
|
|
@ -21,16 +21,12 @@ architectures and their arguments and hyperparameters.
|
|||
>
|
||||
> ```python
|
||||
> from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
|
||||
> config = {
|
||||
> "set_morphology": False,
|
||||
> "model": DEFAULT_TAGGER_MODEL,
|
||||
> }
|
||||
> config = {"model": DEFAULT_TAGGER_MODEL}
|
||||
> nlp.add_pipe("tagger", config=config)
|
||||
> ```
|
||||
|
||||
| Setting | Description |
|
||||
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `set_morphology` | Whether to set morphological features. Defaults to `False`. ~~bool~~ |
|
||||
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
```python
|
||||
|
@ -63,8 +59,6 @@ shortcut for this and instantiate the component using its string name and
|
|||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `set_morphology` | Whether to set morphological features. ~~bool~~ |
|
||||
|
||||
## Tagger.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user