Annotate TrainablePipe subclasses with NVTX ranges

This commit is contained in:
shademe 2022-06-14 15:29:40 +02:00
parent 26536eb6b8
commit d8684f7372
9 changed files with 76 additions and 9 deletions

View File

@ -12,7 +12,7 @@ from thinc.types import Floats2d, Ints1d, Ints2d
from ._edit_tree_internals.edit_trees import EditTrees from ._edit_tree_internals.edit_trees import EditTrees
from ._edit_tree_internals.schemas import validate_edit_tree from ._edit_tree_internals.schemas import validate_edit_tree
from .lemmatizer import lemmatizer_score from .lemmatizer import lemmatizer_score
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from ..errors import Errors from ..errors import Errors
from ..language import Language from ..language import Language
from ..tokens import Doc from ..tokens import Doc
@ -117,6 +117,7 @@ class EditTreeLemmatizer(TrainablePipe):
self.cfg: Dict[str, Any] = {"labels": []} self.cfg: Dict[str, Any] = {"labels": []}
self.scorer = scorer self.scorer = scorer
@trainable_pipe_nvtx_range
def get_loss( def get_loss(
self, examples: Iterable[Example], scores: List[Floats2d] self, examples: Iterable[Example], scores: List[Floats2d]
) -> Tuple[float, List[Floats2d]]: ) -> Tuple[float, List[Floats2d]]:
@ -144,6 +145,7 @@ class EditTreeLemmatizer(TrainablePipe):
return float(loss), d_scores return float(loss), d_scores
@trainable_pipe_nvtx_range
def predict(self, docs: Iterable[Doc]) -> List[Ints2d]: def predict(self, docs: Iterable[Doc]) -> List[Ints2d]:
n_docs = len(list(docs)) n_docs = len(list(docs))
if not any(len(doc) for doc in docs): if not any(len(doc) for doc in docs):
@ -186,6 +188,7 @@ class EditTreeLemmatizer(TrainablePipe):
return guesses return guesses
@trainable_pipe_nvtx_range
def set_annotations(self, docs: Iterable[Doc], batch_tree_ids): def set_annotations(self, docs: Iterable[Doc], batch_tree_ids):
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
doc_tree_ids = batch_tree_ids[i] doc_tree_ids = batch_tree_ids[i]
@ -224,6 +227,7 @@ class EditTreeLemmatizer(TrainablePipe):
trees.append(tree) trees.append(tree)
return dict(trees=trees, labels=tuple(self.cfg["labels"])) return dict(trees=trees, labels=tuple(self.cfg["labels"]))
@trainable_pipe_nvtx_range
def initialize( def initialize(
self, self,
get_examples: Callable[[], Iterable[Example]], get_examples: Callable[[], Iterable[Example]],

View File

@ -12,7 +12,7 @@ from ..ml import empty_kb
from ..tokens import Doc, Span from ..tokens import Doc, Span
from .pipe import deserialize_config from .pipe import deserialize_config
from .legacy.entity_linker import EntityLinker_v1 from .legacy.entity_linker import EntityLinker_v1
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from ..language import Language from ..language import Language
from ..vocab import Vocab from ..vocab import Vocab
from ..training import Example, validate_examples, validate_get_examples from ..training import Example, validate_examples, validate_get_examples
@ -208,6 +208,7 @@ class EntityLinker(TrainablePipe):
if len(self.kb) == 0: if len(self.kb) == 0:
raise ValueError(Errors.E139.format(name=self.name)) raise ValueError(Errors.E139.format(name=self.name))
@trainable_pipe_nvtx_range
def initialize( def initialize(
self, self,
get_examples: Callable[[], Iterable[Example]], get_examples: Callable[[], Iterable[Example]],
@ -278,6 +279,7 @@ class EntityLinker(TrainablePipe):
return False return False
@trainable_pipe_nvtx_range
def update( def update(
self, self,
examples: Iterable[Example], examples: Iterable[Example],
@ -338,6 +340,7 @@ class EntityLinker(TrainablePipe):
losses[self.name] += loss losses[self.name] += loss
return losses return losses
@trainable_pipe_nvtx_range
def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d): def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d):
validate_examples(examples, "EntityLinker.get_loss") validate_examples(examples, "EntityLinker.get_loss")
entity_encodings = [] entity_encodings = []
@ -377,6 +380,7 @@ class EntityLinker(TrainablePipe):
loss = loss / len(entity_encodings) loss = loss / len(entity_encodings)
return float(loss), out return float(loss), out
@trainable_pipe_nvtx_range
def predict(self, docs: Iterable[Doc]) -> List[str]: def predict(self, docs: Iterable[Doc]) -> List[str]:
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying them.
Returns the KB IDs for each entity in each doc, including NIL if there is Returns the KB IDs for each entity in each doc, including NIL if there is
@ -466,6 +470,7 @@ class EntityLinker(TrainablePipe):
raise RuntimeError(err) raise RuntimeError(err)
return final_kb_ids return final_kb_ids
@trainable_pipe_nvtx_range
def set_annotations(self, docs: Iterable[Doc], kb_ids: List[str]) -> None: def set_annotations(self, docs: Iterable[Doc], kb_ids: List[str]) -> None:
"""Modify a batch of documents, using pre-computed scores. """Modify a batch of documents, using pre-computed scores.
@ -573,8 +578,10 @@ class EntityLinker(TrainablePipe):
util.from_disk(path, deserialize, exclude) util.from_disk(path, deserialize, exclude)
return self return self
@trainable_pipe_nvtx_range
def rehearse(self, examples, *, sgd=None, losses=None, **config): def rehearse(self, examples, *, sgd=None, losses=None, **config):
raise NotImplementedError raise NotImplementedError
@trainable_pipe_nvtx_range
def add_label(self, label): def add_label(self, label):
raise NotImplementedError raise NotImplementedError

View File

@ -15,7 +15,7 @@ from ...kb import KnowledgeBase, Candidate
from ...ml import empty_kb from ...ml import empty_kb
from ...tokens import Doc, Span from ...tokens import Doc, Span
from ..pipe import deserialize_config from ..pipe import deserialize_config
from ..trainable_pipe import TrainablePipe from ..trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from ...language import Language from ...language import Language
from ...vocab import Vocab from ...vocab import Vocab
from ...training import Example, validate_examples, validate_get_examples from ...training import Example, validate_examples, validate_get_examples
@ -103,6 +103,7 @@ class EntityLinker_v1(TrainablePipe):
if len(self.kb) == 0: if len(self.kb) == 0:
raise ValueError(Errors.E139.format(name=self.name)) raise ValueError(Errors.E139.format(name=self.name))
@trainable_pipe_nvtx_range
def initialize( def initialize(
self, self,
get_examples: Callable[[], Iterable[Example]], get_examples: Callable[[], Iterable[Example]],
@ -138,6 +139,7 @@ class EntityLinker_v1(TrainablePipe):
X=doc_sample, Y=self.model.ops.asarray(vector_sample, dtype="float32") X=doc_sample, Y=self.model.ops.asarray(vector_sample, dtype="float32")
) )
@trainable_pipe_nvtx_range
def update( def update(
self, self,
examples: Iterable[Example], examples: Iterable[Example],
@ -203,6 +205,7 @@ class EntityLinker_v1(TrainablePipe):
losses[self.name] += loss losses[self.name] += loss
return losses return losses
@trainable_pipe_nvtx_range
def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d): def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d):
validate_examples(examples, "EntityLinker_v1.get_loss") validate_examples(examples, "EntityLinker_v1.get_loss")
entity_encodings = [] entity_encodings = []
@ -224,6 +227,7 @@ class EntityLinker_v1(TrainablePipe):
loss = loss / len(entity_encodings) loss = loss / len(entity_encodings)
return float(loss), gradients return float(loss), gradients
@trainable_pipe_nvtx_range
def predict(self, docs: Iterable[Doc]) -> List[str]: def predict(self, docs: Iterable[Doc]) -> List[str]:
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying them.
Returns the KB IDs for each entity in each doc, including NIL if there is Returns the KB IDs for each entity in each doc, including NIL if there is
@ -312,6 +316,7 @@ class EntityLinker_v1(TrainablePipe):
raise RuntimeError(err) raise RuntimeError(err)
return final_kb_ids return final_kb_ids
@trainable_pipe_nvtx_range
def set_annotations(self, docs: Iterable[Doc], kb_ids: List[str]) -> None: def set_annotations(self, docs: Iterable[Doc], kb_ids: List[str]) -> None:
"""Modify a batch of documents, using pre-computed scores. """Modify a batch of documents, using pre-computed scores.
@ -419,8 +424,10 @@ class EntityLinker_v1(TrainablePipe):
util.from_disk(path, deserialize, exclude) util.from_disk(path, deserialize, exclude)
return self return self
@trainable_pipe_nvtx_range
def rehearse(self, examples, *, sgd=None, losses=None, **config): def rehearse(self, examples, *, sgd=None, losses=None, **config):
raise NotImplementedError raise NotImplementedError
@trainable_pipe_nvtx_range
def add_label(self, label): def add_label(self, label):
raise NotImplementedError raise NotImplementedError

View File

@ -6,7 +6,7 @@ from thinc.api import set_dropout_rate
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from .tagger import Tagger from .tagger import Tagger
from ..training import validate_examples from ..training import validate_examples
from ..language import Language from ..language import Language
@ -78,9 +78,11 @@ class MultitaskObjective(Tagger):
def labels(self, value): def labels(self, value):
self.cfg["labels"] = value self.cfg["labels"] = value
@trainable_pipe_nvtx_range
def set_annotations(self, docs, dep_ids): def set_annotations(self, docs, dep_ids):
pass pass
@trainable_pipe_nvtx_range
def initialize(self, get_examples, nlp=None, labels=None): def initialize(self, get_examples, nlp=None, labels=None):
if not hasattr(get_examples, "__call__"): if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples)) err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
@ -95,11 +97,13 @@ class MultitaskObjective(Tagger):
self.labels[label] = len(self.labels) self.labels[label] = len(self.labels)
self.model.initialize() # TODO: fix initialization by defining X and Y self.model.initialize() # TODO: fix initialization by defining X and Y
@trainable_pipe_nvtx_range
def predict(self, docs): def predict(self, docs):
tokvecs = self.model.get_ref("tok2vec")(docs) tokvecs = self.model.get_ref("tok2vec")(docs)
scores = self.model.get_ref("softmax")(tokvecs) scores = self.model.get_ref("softmax")(tokvecs)
return tokvecs, scores return tokvecs, scores
@trainable_pipe_nvtx_range
def get_loss(self, examples, scores): def get_loss(self, examples, scores):
cdef int idx = 0 cdef int idx = 0
correct = numpy.zeros((scores.shape[0],), dtype="i") correct = numpy.zeros((scores.shape[0],), dtype="i")
@ -174,19 +178,23 @@ class ClozeMultitask(TrainablePipe):
self.cfg = cfg self.cfg = cfg
self.distance = CosineDistance(ignore_zeros=True, normalize=False) # TODO: in config self.distance = CosineDistance(ignore_zeros=True, normalize=False) # TODO: in config
@trainable_pipe_nvtx_range
def set_annotations(self, docs, dep_ids): def set_annotations(self, docs, dep_ids):
pass pass
@trainable_pipe_nvtx_range
def initialize(self, get_examples, nlp=None): def initialize(self, get_examples, nlp=None):
self.model.initialize() # TODO: fix initialization by defining X and Y self.model.initialize() # TODO: fix initialization by defining X and Y
X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO"))) X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
self.model.output_layer.initialize(X) self.model.output_layer.initialize(X)
@trainable_pipe_nvtx_range
def predict(self, docs): def predict(self, docs):
tokvecs = self.model.get_ref("tok2vec")(docs) tokvecs = self.model.get_ref("tok2vec")(docs)
vectors = self.model.get_ref("output_layer")(tokvecs) vectors = self.model.get_ref("output_layer")(tokvecs)
return tokvecs, vectors return tokvecs, vectors
@trainable_pipe_nvtx_range
def get_loss(self, examples, vectors, prediction): def get_loss(self, examples, vectors, prediction):
validate_examples(examples, "ClozeMultitask.get_loss") validate_examples(examples, "ClozeMultitask.get_loss")
# The simplest way to implement this would be to vstack the # The simplest way to implement this would be to vstack the
@ -199,9 +207,11 @@ class ClozeMultitask(TrainablePipe):
loss = self.distance.get_loss(prediction, target) loss = self.distance.get_loss(prediction, target)
return float(loss), gradient return float(loss), gradient
@trainable_pipe_nvtx_range
def update(self, examples, *, drop=0., sgd=None, losses=None): def update(self, examples, *, drop=0., sgd=None, losses=None):
pass pass
@trainable_pipe_nvtx_range
def rehearse(self, examples, drop=0., sgd=None, losses=None): def rehearse(self, examples, drop=0., sgd=None, losses=None):
if losses is not None and self.name not in losses: if losses is not None and self.name not in losses:
losses[self.name] = 0. losses[self.name] = 0.
@ -217,5 +227,6 @@ class ClozeMultitask(TrainablePipe):
losses[self.name] += loss losses[self.name] += loss
return losses return losses
@trainable_pipe_nvtx_range
def add_label(self, label): def add_label(self, label):
raise NotImplementedError raise NotImplementedError

View File

@ -8,7 +8,7 @@ import numpy
from ..compat import Protocol, runtime_checkable from ..compat import Protocol, runtime_checkable
from ..scorer import Scorer from ..scorer import Scorer
from ..language import Language from ..language import Language
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from ..tokens import Doc, SpanGroup, Span from ..tokens import Doc, SpanGroup, Span
from ..vocab import Vocab from ..vocab import Vocab
from ..training import Example, validate_examples from ..training import Example, validate_examples
@ -227,6 +227,7 @@ class SpanCategorizer(TrainablePipe):
""" """
return str(self.cfg["spans_key"]) return str(self.cfg["spans_key"])
@trainable_pipe_nvtx_range
def add_label(self, label: str) -> int: def add_label(self, label: str) -> int:
"""Add a new label to the pipe. """Add a new label to the pipe.
@ -260,6 +261,7 @@ class SpanCategorizer(TrainablePipe):
""" """
return list(self.labels) return list(self.labels)
@trainable_pipe_nvtx_range
def predict(self, docs: Iterable[Doc]): def predict(self, docs: Iterable[Doc]):
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying them.
@ -272,6 +274,7 @@ class SpanCategorizer(TrainablePipe):
scores = self.model.predict((docs, indices)) # type: ignore scores = self.model.predict((docs, indices)) # type: ignore
return indices, scores return indices, scores
@trainable_pipe_nvtx_range
def set_candidates( def set_candidates(
self, docs: Iterable[Doc], *, candidates_key: str = "candidates" self, docs: Iterable[Doc], *, candidates_key: str = "candidates"
) -> None: ) -> None:
@ -290,6 +293,7 @@ class SpanCategorizer(TrainablePipe):
for index in candidates.dataXd: for index in candidates.dataXd:
doc.spans[candidates_key].append(doc[index[0] : index[1]]) doc.spans[candidates_key].append(doc[index[0] : index[1]])
@trainable_pipe_nvtx_range
def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None: def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None:
"""Modify a batch of Doc objects, using pre-computed scores. """Modify a batch of Doc objects, using pre-computed scores.
@ -308,6 +312,7 @@ class SpanCategorizer(TrainablePipe):
) )
offset += indices.lengths[i] offset += indices.lengths[i]
@trainable_pipe_nvtx_range
def update( def update(
self, self,
examples: Iterable[Example], examples: Iterable[Example],
@ -349,6 +354,7 @@ class SpanCategorizer(TrainablePipe):
losses[self.name] += loss losses[self.name] += loss
return losses return losses
@trainable_pipe_nvtx_range
def get_loss( def get_loss(
self, examples: Iterable[Example], spans_scores: Tuple[Ragged, Floats2d] self, examples: Iterable[Example], spans_scores: Tuple[Ragged, Floats2d]
) -> Tuple[float, float]: ) -> Tuple[float, float]:
@ -399,6 +405,7 @@ class SpanCategorizer(TrainablePipe):
loss = float((d_scores**2).sum()) loss = float((d_scores**2).sum())
return loss, d_scores return loss, d_scores
@trainable_pipe_nvtx_range
def initialize( def initialize(
self, self,
get_examples: Callable[[], Iterable[Example]], get_examples: Callable[[], Iterable[Example]],

View File

@ -11,7 +11,7 @@ from ..tokens.doc cimport Doc
from ..morphology cimport Morphology from ..morphology cimport Morphology
from ..vocab cimport Vocab from ..vocab cimport Vocab
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from .pipe import deserialize_config from .pipe import deserialize_config
from ..language import Language from ..language import Language
from ..attrs import POS, ID from ..attrs import POS, ID
@ -126,6 +126,7 @@ class Tagger(TrainablePipe):
"""Data about the labels currently added to the component.""" """Data about the labels currently added to the component."""
return tuple(self.cfg["labels"]) return tuple(self.cfg["labels"])
@trainable_pipe_nvtx_range
def predict(self, docs): def predict(self, docs):
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying them.
@ -155,6 +156,7 @@ class Tagger(TrainablePipe):
guesses.append(doc_guesses) guesses.append(doc_guesses)
return guesses return guesses
@trainable_pipe_nvtx_range
def set_annotations(self, docs, batch_tag_ids): def set_annotations(self, docs, batch_tag_ids):
"""Modify a batch of documents, using pre-computed scores. """Modify a batch of documents, using pre-computed scores.
@ -177,6 +179,7 @@ class Tagger(TrainablePipe):
if doc.c[j].tag == 0 or overwrite: if doc.c[j].tag == 0 or overwrite:
doc.c[j].tag = self.vocab.strings[labels[tag_id]] doc.c[j].tag = self.vocab.strings[labels[tag_id]]
@trainable_pipe_nvtx_range
def update(self, examples, *, drop=0., sgd=None, losses=None): def update(self, examples, *, drop=0., sgd=None, losses=None):
"""Learn from a batch of documents and gold-standard information, """Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict and get_loss. updating the pipe's model. Delegates to predict and get_loss.
@ -210,6 +213,7 @@ class Tagger(TrainablePipe):
losses[self.name] += loss losses[self.name] += loss
return losses return losses
@trainable_pipe_nvtx_range
def rehearse(self, examples, *, drop=0., sgd=None, losses=None): def rehearse(self, examples, *, drop=0., sgd=None, losses=None):
"""Perform a "rehearsal" update from a batch of data. Rehearsal updates """Perform a "rehearsal" update from a batch of data. Rehearsal updates
teach the current model to make predictions similar to an initial model, teach the current model to make predictions similar to an initial model,
@ -245,6 +249,7 @@ class Tagger(TrainablePipe):
losses[self.name] += loss losses[self.name] += loss
return losses return losses
@trainable_pipe_nvtx_range
def get_loss(self, examples, scores): def get_loss(self, examples, scores):
"""Find the loss and gradient of loss for the batch of documents and """Find the loss and gradient of loss for the batch of documents and
their predicted scores. their predicted scores.
@ -269,6 +274,7 @@ class Tagger(TrainablePipe):
raise ValueError(Errors.E910.format(name=self.name)) raise ValueError(Errors.E910.format(name=self.name))
return float(loss), d_scores return float(loss), d_scores
@trainable_pipe_nvtx_range
def initialize(self, get_examples, *, nlp=None, labels=None): def initialize(self, get_examples, *, nlp=None, labels=None):
"""Initialize the pipe for training, using a representative set """Initialize the pipe for training, using a representative set
of data examples. of data examples.
@ -307,6 +313,7 @@ class Tagger(TrainablePipe):
assert len(label_sample) > 0, Errors.E923.format(name=self.name) assert len(label_sample) > 0, Errors.E923.format(name=self.name)
self.model.initialize(X=doc_sample, Y=label_sample) self.model.initialize(X=doc_sample, Y=label_sample)
@trainable_pipe_nvtx_range
def add_label(self, label): def add_label(self, label):
"""Add a new label to the pipe. """Add a new label to the pipe.

View File

@ -4,7 +4,7 @@ from thinc.types import Floats2d
import numpy import numpy
from itertools import islice from itertools import islice
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from ..language import Language from ..language import Language
from ..training import Example, validate_examples, validate_get_examples from ..training import Example, validate_examples, validate_get_examples
from ..errors import Errors from ..errors import Errors
@ -181,6 +181,7 @@ class TextCategorizer(TrainablePipe):
""" """
return self.labels # type: ignore[return-value] return self.labels # type: ignore[return-value]
@trainable_pipe_nvtx_range
def predict(self, docs: Iterable[Doc]): def predict(self, docs: Iterable[Doc]):
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying them.
@ -199,6 +200,7 @@ class TextCategorizer(TrainablePipe):
scores = self.model.ops.asarray(scores) scores = self.model.ops.asarray(scores)
return scores return scores
@trainable_pipe_nvtx_range
def set_annotations(self, docs: Iterable[Doc], scores) -> None: def set_annotations(self, docs: Iterable[Doc], scores) -> None:
"""Modify a batch of Doc objects, using pre-computed scores. """Modify a batch of Doc objects, using pre-computed scores.
@ -211,6 +213,7 @@ class TextCategorizer(TrainablePipe):
for j, label in enumerate(self.labels): for j, label in enumerate(self.labels):
doc.cats[label] = float(scores[i, j]) doc.cats[label] = float(scores[i, j])
@trainable_pipe_nvtx_range
def update( def update(
self, self,
examples: Iterable[Example], examples: Iterable[Example],
@ -248,6 +251,7 @@ class TextCategorizer(TrainablePipe):
losses[self.name] += loss losses[self.name] += loss
return losses return losses
@trainable_pipe_nvtx_range
def rehearse( def rehearse(
self, self,
examples: Iterable[Example], examples: Iterable[Example],
@ -306,6 +310,7 @@ class TextCategorizer(TrainablePipe):
truths = self.model.ops.asarray(truths) # type: ignore truths = self.model.ops.asarray(truths) # type: ignore
return truths, not_missing # type: ignore return truths, not_missing # type: ignore
@trainable_pipe_nvtx_range
def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]: def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
"""Find the loss and gradient of loss for the batch of documents and """Find the loss and gradient of loss for the batch of documents and
their predicted scores. their predicted scores.
@ -325,6 +330,7 @@ class TextCategorizer(TrainablePipe):
mean_square_error = (d_scores**2).mean() mean_square_error = (d_scores**2).mean()
return float(mean_square_error), d_scores return float(mean_square_error), d_scores
@trainable_pipe_nvtx_range
def add_label(self, label: str) -> int: def add_label(self, label: str) -> int:
"""Add a new label to the pipe. """Add a new label to the pipe.
@ -344,6 +350,7 @@ class TextCategorizer(TrainablePipe):
self.vocab.strings.add(label) self.vocab.strings.add(label)
return 1 return 1
@trainable_pipe_nvtx_range
def initialize( def initialize(
self, self,
get_examples: Callable[[], Iterable[Example]], get_examples: Callable[[], Iterable[Example]],

View File

@ -2,7 +2,7 @@ from typing import Sequence, Iterable, Optional, Dict, Callable, List, Any
from thinc.api import Model, set_dropout_rate, Optimizer, Config from thinc.api import Model, set_dropout_rate, Optimizer, Config
from itertools import islice from itertools import islice
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from ..training import Example, validate_examples, validate_get_examples from ..training import Example, validate_examples, validate_get_examples
from ..tokens import Doc from ..tokens import Doc
from ..vocab import Vocab from ..vocab import Vocab
@ -109,6 +109,7 @@ class Tok2Vec(TrainablePipe):
if isinstance(node, Tok2VecListener) and node.upstream_name in names: if isinstance(node, Tok2VecListener) and node.upstream_name in names:
self.add_listener(node, component.name) self.add_listener(node, component.name)
@trainable_pipe_nvtx_range
def predict(self, docs: Iterable[Doc]): def predict(self, docs: Iterable[Doc]):
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying them.
Returns a single tensor for a batch of documents. Returns a single tensor for a batch of documents.
@ -128,6 +129,7 @@ class Tok2Vec(TrainablePipe):
listener.receive(batch_id, tokvecs, _empty_backprop) listener.receive(batch_id, tokvecs, _empty_backprop)
return tokvecs return tokvecs
@trainable_pipe_nvtx_range
def set_annotations(self, docs: Sequence[Doc], tokvecses) -> None: def set_annotations(self, docs: Sequence[Doc], tokvecses) -> None:
"""Modify a batch of documents, using pre-computed scores. """Modify a batch of documents, using pre-computed scores.
@ -140,6 +142,7 @@ class Tok2Vec(TrainablePipe):
assert tokvecs.shape[0] == len(doc) assert tokvecs.shape[0] == len(doc)
doc.tensor = tokvecs doc.tensor = tokvecs
@trainable_pipe_nvtx_range
def update( def update(
self, self,
examples: Iterable[Example], examples: Iterable[Example],
@ -194,9 +197,11 @@ class Tok2Vec(TrainablePipe):
self.listeners[-1].receive(batch_id, tokvecs, backprop) self.listeners[-1].receive(batch_id, tokvecs, backprop)
return losses return losses
@trainable_pipe_nvtx_range
def get_loss(self, examples, scores) -> None: def get_loss(self, examples, scores) -> None:
pass pass
@trainable_pipe_nvtx_range
def initialize( def initialize(
self, self,
get_examples: Callable[[], Iterable[Example]], get_examples: Callable[[], Iterable[Example]],

View File

@ -21,7 +21,7 @@ from ..ml.parser_model cimport predict_states, arg_max_if_valid
from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
from ..ml.parser_model cimport get_c_weights, get_c_sizes from ..ml.parser_model cimport get_c_weights, get_c_sizes
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from .trainable_pipe import TrainablePipe from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
from ._parser_internals cimport _beam_utils from ._parser_internals cimport _beam_utils
from ._parser_internals import _beam_utils from ._parser_internals import _beam_utils
@ -159,6 +159,7 @@ cdef class Parser(TrainablePipe):
def incorrect_spans_key(self): def incorrect_spans_key(self):
return self.cfg["incorrect_spans_key"] return self.cfg["incorrect_spans_key"]
@trainable_pipe_nvtx_range
def add_label(self, label): def add_label(self, label):
resized = False resized = False
for action in self.moves.action_types: for action in self.moves.action_types:
@ -214,6 +215,7 @@ cdef class Parser(TrainablePipe):
with self.model.use_params(params): with self.model.use_params(params):
yield yield
@trainable_pipe_nvtx_range
def pipe(self, docs, *, int batch_size=256): def pipe(self, docs, *, int batch_size=256):
"""Process a stream of documents. """Process a stream of documents.
@ -240,6 +242,7 @@ cdef class Parser(TrainablePipe):
error_handler(self.name, self, batch_in_order, e) error_handler(self.name, self, batch_in_order, e)
@trainable_pipe_nvtx_range
def predict(self, docs): def predict(self, docs):
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
@ -256,6 +259,7 @@ cdef class Parser(TrainablePipe):
beam_density=self.cfg["beam_density"] beam_density=self.cfg["beam_density"]
) )
@trainable_pipe_nvtx_range
def greedy_parse(self, docs, drop=0.): def greedy_parse(self, docs, drop=0.):
cdef vector[StateC*] states cdef vector[StateC*] states
cdef StateClass state cdef StateClass state
@ -280,6 +284,7 @@ cdef class Parser(TrainablePipe):
del model del model
return batch return batch
@trainable_pipe_nvtx_range
def beam_parse(self, docs, int beam_width, float drop=0., beam_density=0.): def beam_parse(self, docs, int beam_width, float drop=0., beam_density=0.):
cdef Beam beam cdef Beam beam
cdef Doc doc cdef Doc doc
@ -321,6 +326,7 @@ cdef class Parser(TrainablePipe):
unfinished.clear() unfinished.clear()
free_activations(&activations) free_activations(&activations)
@trainable_pipe_nvtx_range
def set_annotations(self, docs, states_or_beams): def set_annotations(self, docs, states_or_beams):
cdef StateClass state cdef StateClass state
cdef Beam beam cdef Beam beam
@ -331,6 +337,7 @@ cdef class Parser(TrainablePipe):
for hook in self.postprocesses: for hook in self.postprocesses:
hook(doc) hook(doc)
@trainable_pipe_nvtx_range
def transition_states(self, states, float[:, ::1] scores): def transition_states(self, states, float[:, ::1] scores):
cdef StateClass state cdef StateClass state
cdef float* c_scores = &scores[0, 0] cdef float* c_scores = &scores[0, 0]
@ -360,6 +367,7 @@ cdef class Parser(TrainablePipe):
action.do(states[i], action.label) action.do(states[i], action.label)
free(is_valid) free(is_valid)
@trainable_pipe_nvtx_range
def update(self, examples, *, drop=0., sgd=None, losses=None): def update(self, examples, *, drop=0., sgd=None, losses=None):
cdef StateClass state cdef StateClass state
if losses is None: if losses is None:
@ -432,6 +440,7 @@ cdef class Parser(TrainablePipe):
del model del model
return losses return losses
@trainable_pipe_nvtx_range
def rehearse(self, examples, sgd=None, losses=None, **cfg): def rehearse(self, examples, sgd=None, losses=None, **cfg):
"""Perform a "rehearsal" update, to prevent catastrophic forgetting.""" """Perform a "rehearsal" update, to prevent catastrophic forgetting."""
if losses is None: if losses is None:
@ -481,6 +490,7 @@ cdef class Parser(TrainablePipe):
del tutor del tutor
return losses return losses
@trainable_pipe_nvtx_range
def update_beam(self, examples, *, beam_width, def update_beam(self, examples, *, beam_width,
drop=0., sgd=None, losses=None, beam_density=0.0): drop=0., sgd=None, losses=None, beam_density=0.0):
states, golds, _ = self.moves.init_gold_batch(examples) states, golds, _ = self.moves.init_gold_batch(examples)
@ -502,6 +512,7 @@ cdef class Parser(TrainablePipe):
if sgd is not None: if sgd is not None:
self.finish_update(sgd) self.finish_update(sgd)
@trainable_pipe_nvtx_range
def get_batch_loss(self, states, golds, float[:, ::1] scores, losses): def get_batch_loss(self, states, golds, float[:, ::1] scores, losses):
cdef StateClass state cdef StateClass state
cdef Pool mem = Pool() cdef Pool mem = Pool()
@ -535,6 +546,7 @@ cdef class Parser(TrainablePipe):
def set_output(self, nO): def set_output(self, nO):
self.model.attrs["resize_output"](self.model, nO) self.model.attrs["resize_output"](self.model, nO)
@trainable_pipe_nvtx_range
def initialize(self, get_examples, nlp=None, labels=None): def initialize(self, get_examples, nlp=None, labels=None):
validate_get_examples(get_examples, "Parser.initialize") validate_get_examples(get_examples, "Parser.initialize")
util.check_lexeme_norms(self.vocab, "parser or NER") util.check_lexeme_norms(self.vocab, "parser or NER")