mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 08:14:15 +03:00
Annotate TrainablePipe
subclasses with NVTX ranges
This commit is contained in:
parent
26536eb6b8
commit
d8684f7372
|
@ -12,7 +12,7 @@ from thinc.types import Floats2d, Ints1d, Ints2d
|
|||
from ._edit_tree_internals.edit_trees import EditTrees
|
||||
from ._edit_tree_internals.schemas import validate_edit_tree
|
||||
from .lemmatizer import lemmatizer_score
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from ..errors import Errors
|
||||
from ..language import Language
|
||||
from ..tokens import Doc
|
||||
|
@ -117,6 +117,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
|||
self.cfg: Dict[str, Any] = {"labels": []}
|
||||
self.scorer = scorer
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(
|
||||
self, examples: Iterable[Example], scores: List[Floats2d]
|
||||
) -> Tuple[float, List[Floats2d]]:
|
||||
|
@ -144,6 +145,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
|||
|
||||
return float(loss), d_scores
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs: Iterable[Doc]) -> List[Ints2d]:
|
||||
n_docs = len(list(docs))
|
||||
if not any(len(doc) for doc in docs):
|
||||
|
@ -186,6 +188,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
|||
|
||||
return guesses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs: Iterable[Doc], batch_tree_ids):
|
||||
for i, doc in enumerate(docs):
|
||||
doc_tree_ids = batch_tree_ids[i]
|
||||
|
@ -224,6 +227,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
|||
trees.append(tree)
|
||||
return dict(trees=trees, labels=tuple(self.cfg["labels"]))
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(
|
||||
self,
|
||||
get_examples: Callable[[], Iterable[Example]],
|
||||
|
|
|
@ -12,7 +12,7 @@ from ..ml import empty_kb
|
|||
from ..tokens import Doc, Span
|
||||
from .pipe import deserialize_config
|
||||
from .legacy.entity_linker import EntityLinker_v1
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from ..language import Language
|
||||
from ..vocab import Vocab
|
||||
from ..training import Example, validate_examples, validate_get_examples
|
||||
|
@ -208,6 +208,7 @@ class EntityLinker(TrainablePipe):
|
|||
if len(self.kb) == 0:
|
||||
raise ValueError(Errors.E139.format(name=self.name))
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(
|
||||
self,
|
||||
get_examples: Callable[[], Iterable[Example]],
|
||||
|
@ -278,6 +279,7 @@ class EntityLinker(TrainablePipe):
|
|||
|
||||
return False
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
|
@ -338,6 +340,7 @@ class EntityLinker(TrainablePipe):
|
|||
losses[self.name] += loss
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d):
|
||||
validate_examples(examples, "EntityLinker.get_loss")
|
||||
entity_encodings = []
|
||||
|
@ -377,6 +380,7 @@ class EntityLinker(TrainablePipe):
|
|||
loss = loss / len(entity_encodings)
|
||||
return float(loss), out
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs: Iterable[Doc]) -> List[str]:
|
||||
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
||||
Returns the KB IDs for each entity in each doc, including NIL if there is
|
||||
|
@ -466,6 +470,7 @@ class EntityLinker(TrainablePipe):
|
|||
raise RuntimeError(err)
|
||||
return final_kb_ids
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs: Iterable[Doc], kb_ids: List[str]) -> None:
|
||||
"""Modify a batch of documents, using pre-computed scores.
|
||||
|
||||
|
@ -573,8 +578,10 @@ class EntityLinker(TrainablePipe):
|
|||
util.from_disk(path, deserialize, exclude)
|
||||
return self
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def rehearse(self, examples, *, sgd=None, losses=None, **config):
|
||||
raise NotImplementedError
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def add_label(self, label):
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -15,7 +15,7 @@ from ...kb import KnowledgeBase, Candidate
|
|||
from ...ml import empty_kb
|
||||
from ...tokens import Doc, Span
|
||||
from ..pipe import deserialize_config
|
||||
from ..trainable_pipe import TrainablePipe
|
||||
from ..trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from ...language import Language
|
||||
from ...vocab import Vocab
|
||||
from ...training import Example, validate_examples, validate_get_examples
|
||||
|
@ -103,6 +103,7 @@ class EntityLinker_v1(TrainablePipe):
|
|||
if len(self.kb) == 0:
|
||||
raise ValueError(Errors.E139.format(name=self.name))
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(
|
||||
self,
|
||||
get_examples: Callable[[], Iterable[Example]],
|
||||
|
@ -138,6 +139,7 @@ class EntityLinker_v1(TrainablePipe):
|
|||
X=doc_sample, Y=self.model.ops.asarray(vector_sample, dtype="float32")
|
||||
)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
|
@ -203,6 +205,7 @@ class EntityLinker_v1(TrainablePipe):
|
|||
losses[self.name] += loss
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d):
|
||||
validate_examples(examples, "EntityLinker_v1.get_loss")
|
||||
entity_encodings = []
|
||||
|
@ -224,6 +227,7 @@ class EntityLinker_v1(TrainablePipe):
|
|||
loss = loss / len(entity_encodings)
|
||||
return float(loss), gradients
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs: Iterable[Doc]) -> List[str]:
|
||||
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
||||
Returns the KB IDs for each entity in each doc, including NIL if there is
|
||||
|
@ -312,6 +316,7 @@ class EntityLinker_v1(TrainablePipe):
|
|||
raise RuntimeError(err)
|
||||
return final_kb_ids
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs: Iterable[Doc], kb_ids: List[str]) -> None:
|
||||
"""Modify a batch of documents, using pre-computed scores.
|
||||
|
||||
|
@ -419,8 +424,10 @@ class EntityLinker_v1(TrainablePipe):
|
|||
util.from_disk(path, deserialize, exclude)
|
||||
return self
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def rehearse(self, examples, *, sgd=None, losses=None, **config):
|
||||
raise NotImplementedError
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def add_label(self, label):
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -6,7 +6,7 @@ from thinc.api import set_dropout_rate
|
|||
|
||||
from ..tokens.doc cimport Doc
|
||||
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from .tagger import Tagger
|
||||
from ..training import validate_examples
|
||||
from ..language import Language
|
||||
|
@ -78,9 +78,11 @@ class MultitaskObjective(Tagger):
|
|||
def labels(self, value):
|
||||
self.cfg["labels"] = value
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs, dep_ids):
|
||||
pass
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(self, get_examples, nlp=None, labels=None):
|
||||
if not hasattr(get_examples, "__call__"):
|
||||
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
|
||||
|
@ -95,11 +97,13 @@ class MultitaskObjective(Tagger):
|
|||
self.labels[label] = len(self.labels)
|
||||
self.model.initialize() # TODO: fix initialization by defining X and Y
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs):
|
||||
tokvecs = self.model.get_ref("tok2vec")(docs)
|
||||
scores = self.model.get_ref("softmax")(tokvecs)
|
||||
return tokvecs, scores
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(self, examples, scores):
|
||||
cdef int idx = 0
|
||||
correct = numpy.zeros((scores.shape[0],), dtype="i")
|
||||
|
@ -174,19 +178,23 @@ class ClozeMultitask(TrainablePipe):
|
|||
self.cfg = cfg
|
||||
self.distance = CosineDistance(ignore_zeros=True, normalize=False) # TODO: in config
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs, dep_ids):
|
||||
pass
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(self, get_examples, nlp=None):
|
||||
self.model.initialize() # TODO: fix initialization by defining X and Y
|
||||
X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
|
||||
self.model.output_layer.initialize(X)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs):
|
||||
tokvecs = self.model.get_ref("tok2vec")(docs)
|
||||
vectors = self.model.get_ref("output_layer")(tokvecs)
|
||||
return tokvecs, vectors
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(self, examples, vectors, prediction):
|
||||
validate_examples(examples, "ClozeMultitask.get_loss")
|
||||
# The simplest way to implement this would be to vstack the
|
||||
|
@ -199,9 +207,11 @@ class ClozeMultitask(TrainablePipe):
|
|||
loss = self.distance.get_loss(prediction, target)
|
||||
return float(loss), gradient
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(self, examples, *, drop=0., sgd=None, losses=None):
|
||||
pass
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def rehearse(self, examples, drop=0., sgd=None, losses=None):
|
||||
if losses is not None and self.name not in losses:
|
||||
losses[self.name] = 0.
|
||||
|
@ -217,5 +227,6 @@ class ClozeMultitask(TrainablePipe):
|
|||
losses[self.name] += loss
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def add_label(self, label):
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -8,7 +8,7 @@ import numpy
|
|||
from ..compat import Protocol, runtime_checkable
|
||||
from ..scorer import Scorer
|
||||
from ..language import Language
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from ..tokens import Doc, SpanGroup, Span
|
||||
from ..vocab import Vocab
|
||||
from ..training import Example, validate_examples
|
||||
|
@ -227,6 +227,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
"""
|
||||
return str(self.cfg["spans_key"])
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def add_label(self, label: str) -> int:
|
||||
"""Add a new label to the pipe.
|
||||
|
||||
|
@ -260,6 +261,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
"""
|
||||
return list(self.labels)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs: Iterable[Doc]):
|
||||
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
||||
|
||||
|
@ -272,6 +274,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
scores = self.model.predict((docs, indices)) # type: ignore
|
||||
return indices, scores
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_candidates(
|
||||
self, docs: Iterable[Doc], *, candidates_key: str = "candidates"
|
||||
) -> None:
|
||||
|
@ -290,6 +293,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
for index in candidates.dataXd:
|
||||
doc.spans[candidates_key].append(doc[index[0] : index[1]])
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None:
|
||||
"""Modify a batch of Doc objects, using pre-computed scores.
|
||||
|
||||
|
@ -308,6 +312,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
)
|
||||
offset += indices.lengths[i]
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
|
@ -349,6 +354,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
losses[self.name] += loss
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(
|
||||
self, examples: Iterable[Example], spans_scores: Tuple[Ragged, Floats2d]
|
||||
) -> Tuple[float, float]:
|
||||
|
@ -399,6 +405,7 @@ class SpanCategorizer(TrainablePipe):
|
|||
loss = float((d_scores**2).sum())
|
||||
return loss, d_scores
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(
|
||||
self,
|
||||
get_examples: Callable[[], Iterable[Example]],
|
||||
|
|
|
@ -11,7 +11,7 @@ from ..tokens.doc cimport Doc
|
|||
from ..morphology cimport Morphology
|
||||
from ..vocab cimport Vocab
|
||||
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from .pipe import deserialize_config
|
||||
from ..language import Language
|
||||
from ..attrs import POS, ID
|
||||
|
@ -126,6 +126,7 @@ class Tagger(TrainablePipe):
|
|||
"""Data about the labels currently added to the component."""
|
||||
return tuple(self.cfg["labels"])
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs):
|
||||
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
||||
|
||||
|
@ -155,6 +156,7 @@ class Tagger(TrainablePipe):
|
|||
guesses.append(doc_guesses)
|
||||
return guesses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs, batch_tag_ids):
|
||||
"""Modify a batch of documents, using pre-computed scores.
|
||||
|
||||
|
@ -177,6 +179,7 @@ class Tagger(TrainablePipe):
|
|||
if doc.c[j].tag == 0 or overwrite:
|
||||
doc.c[j].tag = self.vocab.strings[labels[tag_id]]
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(self, examples, *, drop=0., sgd=None, losses=None):
|
||||
"""Learn from a batch of documents and gold-standard information,
|
||||
updating the pipe's model. Delegates to predict and get_loss.
|
||||
|
@ -210,6 +213,7 @@ class Tagger(TrainablePipe):
|
|||
losses[self.name] += loss
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def rehearse(self, examples, *, drop=0., sgd=None, losses=None):
|
||||
"""Perform a "rehearsal" update from a batch of data. Rehearsal updates
|
||||
teach the current model to make predictions similar to an initial model,
|
||||
|
@ -245,6 +249,7 @@ class Tagger(TrainablePipe):
|
|||
losses[self.name] += loss
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(self, examples, scores):
|
||||
"""Find the loss and gradient of loss for the batch of documents and
|
||||
their predicted scores.
|
||||
|
@ -269,6 +274,7 @@ class Tagger(TrainablePipe):
|
|||
raise ValueError(Errors.E910.format(name=self.name))
|
||||
return float(loss), d_scores
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(self, get_examples, *, nlp=None, labels=None):
|
||||
"""Initialize the pipe for training, using a representative set
|
||||
of data examples.
|
||||
|
@ -307,6 +313,7 @@ class Tagger(TrainablePipe):
|
|||
assert len(label_sample) > 0, Errors.E923.format(name=self.name)
|
||||
self.model.initialize(X=doc_sample, Y=label_sample)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def add_label(self, label):
|
||||
"""Add a new label to the pipe.
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ from thinc.types import Floats2d
|
|||
import numpy
|
||||
from itertools import islice
|
||||
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from ..language import Language
|
||||
from ..training import Example, validate_examples, validate_get_examples
|
||||
from ..errors import Errors
|
||||
|
@ -181,6 +181,7 @@ class TextCategorizer(TrainablePipe):
|
|||
"""
|
||||
return self.labels # type: ignore[return-value]
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs: Iterable[Doc]):
|
||||
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
||||
|
||||
|
@ -199,6 +200,7 @@ class TextCategorizer(TrainablePipe):
|
|||
scores = self.model.ops.asarray(scores)
|
||||
return scores
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs: Iterable[Doc], scores) -> None:
|
||||
"""Modify a batch of Doc objects, using pre-computed scores.
|
||||
|
||||
|
@ -211,6 +213,7 @@ class TextCategorizer(TrainablePipe):
|
|||
for j, label in enumerate(self.labels):
|
||||
doc.cats[label] = float(scores[i, j])
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
|
@ -248,6 +251,7 @@ class TextCategorizer(TrainablePipe):
|
|||
losses[self.name] += loss
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def rehearse(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
|
@ -306,6 +310,7 @@ class TextCategorizer(TrainablePipe):
|
|||
truths = self.model.ops.asarray(truths) # type: ignore
|
||||
return truths, not_missing # type: ignore
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
|
||||
"""Find the loss and gradient of loss for the batch of documents and
|
||||
their predicted scores.
|
||||
|
@ -325,6 +330,7 @@ class TextCategorizer(TrainablePipe):
|
|||
mean_square_error = (d_scores**2).mean()
|
||||
return float(mean_square_error), d_scores
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def add_label(self, label: str) -> int:
|
||||
"""Add a new label to the pipe.
|
||||
|
||||
|
@ -344,6 +350,7 @@ class TextCategorizer(TrainablePipe):
|
|||
self.vocab.strings.add(label)
|
||||
return 1
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(
|
||||
self,
|
||||
get_examples: Callable[[], Iterable[Example]],
|
||||
|
|
|
@ -2,7 +2,7 @@ from typing import Sequence, Iterable, Optional, Dict, Callable, List, Any
|
|||
from thinc.api import Model, set_dropout_rate, Optimizer, Config
|
||||
from itertools import islice
|
||||
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from ..training import Example, validate_examples, validate_get_examples
|
||||
from ..tokens import Doc
|
||||
from ..vocab import Vocab
|
||||
|
@ -109,6 +109,7 @@ class Tok2Vec(TrainablePipe):
|
|||
if isinstance(node, Tok2VecListener) and node.upstream_name in names:
|
||||
self.add_listener(node, component.name)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs: Iterable[Doc]):
|
||||
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
||||
Returns a single tensor for a batch of documents.
|
||||
|
@ -128,6 +129,7 @@ class Tok2Vec(TrainablePipe):
|
|||
listener.receive(batch_id, tokvecs, _empty_backprop)
|
||||
return tokvecs
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs: Sequence[Doc], tokvecses) -> None:
|
||||
"""Modify a batch of documents, using pre-computed scores.
|
||||
|
||||
|
@ -140,6 +142,7 @@ class Tok2Vec(TrainablePipe):
|
|||
assert tokvecs.shape[0] == len(doc)
|
||||
doc.tensor = tokvecs
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
|
@ -194,9 +197,11 @@ class Tok2Vec(TrainablePipe):
|
|||
self.listeners[-1].receive(batch_id, tokvecs, backprop)
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_loss(self, examples, scores) -> None:
|
||||
pass
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(
|
||||
self,
|
||||
get_examples: Callable[[], Iterable[Example]],
|
||||
|
|
|
@ -21,7 +21,7 @@ from ..ml.parser_model cimport predict_states, arg_max_if_valid
|
|||
from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
|
||||
from ..ml.parser_model cimport get_c_weights, get_c_sizes
|
||||
from ..tokens.doc cimport Doc
|
||||
from .trainable_pipe import TrainablePipe
|
||||
from .trainable_pipe import TrainablePipe, trainable_pipe_nvtx_range
|
||||
from ._parser_internals cimport _beam_utils
|
||||
from ._parser_internals import _beam_utils
|
||||
|
||||
|
@ -159,6 +159,7 @@ cdef class Parser(TrainablePipe):
|
|||
def incorrect_spans_key(self):
|
||||
return self.cfg["incorrect_spans_key"]
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def add_label(self, label):
|
||||
resized = False
|
||||
for action in self.moves.action_types:
|
||||
|
@ -214,6 +215,7 @@ cdef class Parser(TrainablePipe):
|
|||
with self.model.use_params(params):
|
||||
yield
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def pipe(self, docs, *, int batch_size=256):
|
||||
"""Process a stream of documents.
|
||||
|
||||
|
@ -240,6 +242,7 @@ cdef class Parser(TrainablePipe):
|
|||
error_handler(self.name, self, batch_in_order, e)
|
||||
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def predict(self, docs):
|
||||
if isinstance(docs, Doc):
|
||||
docs = [docs]
|
||||
|
@ -256,6 +259,7 @@ cdef class Parser(TrainablePipe):
|
|||
beam_density=self.cfg["beam_density"]
|
||||
)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def greedy_parse(self, docs, drop=0.):
|
||||
cdef vector[StateC*] states
|
||||
cdef StateClass state
|
||||
|
@ -280,6 +284,7 @@ cdef class Parser(TrainablePipe):
|
|||
del model
|
||||
return batch
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def beam_parse(self, docs, int beam_width, float drop=0., beam_density=0.):
|
||||
cdef Beam beam
|
||||
cdef Doc doc
|
||||
|
@ -321,6 +326,7 @@ cdef class Parser(TrainablePipe):
|
|||
unfinished.clear()
|
||||
free_activations(&activations)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def set_annotations(self, docs, states_or_beams):
|
||||
cdef StateClass state
|
||||
cdef Beam beam
|
||||
|
@ -331,6 +337,7 @@ cdef class Parser(TrainablePipe):
|
|||
for hook in self.postprocesses:
|
||||
hook(doc)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def transition_states(self, states, float[:, ::1] scores):
|
||||
cdef StateClass state
|
||||
cdef float* c_scores = &scores[0, 0]
|
||||
|
@ -360,6 +367,7 @@ cdef class Parser(TrainablePipe):
|
|||
action.do(states[i], action.label)
|
||||
free(is_valid)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update(self, examples, *, drop=0., sgd=None, losses=None):
|
||||
cdef StateClass state
|
||||
if losses is None:
|
||||
|
@ -432,6 +440,7 @@ cdef class Parser(TrainablePipe):
|
|||
del model
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def rehearse(self, examples, sgd=None, losses=None, **cfg):
|
||||
"""Perform a "rehearsal" update, to prevent catastrophic forgetting."""
|
||||
if losses is None:
|
||||
|
@ -481,6 +490,7 @@ cdef class Parser(TrainablePipe):
|
|||
del tutor
|
||||
return losses
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def update_beam(self, examples, *, beam_width,
|
||||
drop=0., sgd=None, losses=None, beam_density=0.0):
|
||||
states, golds, _ = self.moves.init_gold_batch(examples)
|
||||
|
@ -502,6 +512,7 @@ cdef class Parser(TrainablePipe):
|
|||
if sgd is not None:
|
||||
self.finish_update(sgd)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def get_batch_loss(self, states, golds, float[:, ::1] scores, losses):
|
||||
cdef StateClass state
|
||||
cdef Pool mem = Pool()
|
||||
|
@ -535,6 +546,7 @@ cdef class Parser(TrainablePipe):
|
|||
def set_output(self, nO):
|
||||
self.model.attrs["resize_output"](self.model, nO)
|
||||
|
||||
@trainable_pipe_nvtx_range
|
||||
def initialize(self, get_examples, nlp=None, labels=None):
|
||||
validate_get_examples(get_examples, "Parser.initialize")
|
||||
util.check_lexeme_norms(self.vocab, "parser or NER")
|
||||
|
|
Loading…
Reference in New Issue
Block a user