From 1041f68be2fe66c632c23e06472b0748990326f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Fri, 2 Dec 2022 16:30:03 +0100 Subject: [PATCH] Use cross entropy from `thinc.legacy` --- spacy/pipeline/edit_tree_lemmatizer.py | 7 +++++-- spacy/pipeline/morphologizer.pyx | 5 +++-- spacy/pipeline/senter.pyx | 6 ++++-- spacy/pipeline/tagger.pyx | 7 ++++--- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py index 9676e2194..0531d4ba5 100644 --- a/spacy/pipeline/edit_tree_lemmatizer.py +++ b/spacy/pipeline/edit_tree_lemmatizer.py @@ -5,8 +5,9 @@ from itertools import islice import numpy as np import srsly -from thinc.api import Config, Model, SequenceCategoricalCrossentropy +from thinc.api import Config, Model from thinc.types import ArrayXd, Floats2d, Ints1d +from thinc.legacy import LegacySequenceCategoricalCrossentropy from ._edit_tree_internals.edit_trees import EditTrees from ._edit_tree_internals.schemas import validate_edit_tree @@ -129,7 +130,9 @@ class EditTreeLemmatizer(TrainablePipe): self, examples: Iterable[Example], scores: List[Floats2d] ) -> Tuple[float, List[Floats2d]]: validate_examples(examples, "EditTreeLemmatizer.get_loss") - loss_func = SequenceCategoricalCrossentropy(normalize=False, missing_value=-1) + loss_func = LegacySequenceCategoricalCrossentropy( + normalize=False, missing_value=-1 + ) truths = [] for eg in examples: diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 782a1dabe..293add9e1 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -1,7 +1,8 @@ # cython: infer_types=True, profile=True, binding=True from typing import Callable, Dict, Iterable, List, Optional, Union import srsly -from thinc.api import SequenceCategoricalCrossentropy, Model, Config +from thinc.api import Model, Config +from thinc.legacy import LegacySequenceCategoricalCrossentropy from thinc.types import Floats2d, Ints1d from itertools import islice @@ -290,7 +291,7 @@ class Morphologizer(Tagger): DOCS: https://spacy.io/api/morphologizer#get_loss """ validate_examples(examples, "Morphologizer.get_loss") - loss_func = SequenceCategoricalCrossentropy(names=tuple(self.labels), normalize=False) + loss_func = LegacySequenceCategoricalCrossentropy(names=tuple(self.labels), normalize=False) truths = [] for eg in examples: eg_truths = [] diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 93a7ee796..42feeb277 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -3,7 +3,9 @@ from typing import Dict, Iterable, Optional, Callable, List, Union from itertools import islice import srsly -from thinc.api import Model, SequenceCategoricalCrossentropy, Config +from thinc.api import Model, Config +from thinc.legacy import LegacySequenceCategoricalCrossentropy + from thinc.types import Floats2d, Ints1d from ..tokens.doc cimport Doc @@ -161,7 +163,7 @@ class SentenceRecognizer(Tagger): """ validate_examples(examples, "SentenceRecognizer.get_loss") labels = self.labels - loss_func = SequenceCategoricalCrossentropy(names=labels, normalize=False) + loss_func = LegacySequenceCategoricalCrossentropy(names=labels, normalize=False) truths = [] for eg in examples: eg_truth = [] diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 3b4715ce5..e12f116af 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -2,7 +2,8 @@ from typing import Callable, Dict, Iterable, List, Optional, Union import numpy import srsly -from thinc.api import Model, set_dropout_rate, SequenceCategoricalCrossentropy, Config +from thinc.api import Model, set_dropout_rate, Config +from thinc.legacy import LegacySequenceCategoricalCrossentropy from thinc.types import Floats2d, Ints1d import warnings from itertools import islice @@ -244,7 +245,7 @@ class Tagger(TrainablePipe): DOCS: https://spacy.io/api/tagger#rehearse """ - loss_func = SequenceCategoricalCrossentropy() + loss_func = LegacySequenceCategoricalCrossentropy() if losses is None: losses = {} losses.setdefault(self.name, 0.0) @@ -275,7 +276,7 @@ class Tagger(TrainablePipe): DOCS: https://spacy.io/api/tagger#get_loss """ validate_examples(examples, "Tagger.get_loss") - loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"]) + loss_func = LegacySequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"]) # Convert empty tag "" to missing value None so that both misaligned # tokens and tokens with missing annotation have the default missing # value None.