Annotate pipe before update

As discussed internally, we want to let a pipe annotate before doing an
update with gold/silver data. Otherwise, the output may be (too)
informed by the gold/silver data.
This commit is contained in:
Daniël de Kok 2023-01-20 14:59:47 +01:00
parent 3ce9a3d2c4
commit f470672972

View File

@ -1078,6 +1078,19 @@ class Language:
teacher_pipes = dict(teacher.pipeline) teacher_pipes = dict(teacher.pipeline)
for name, student_proc in self.pipeline: for name, student_proc in self.pipeline:
if name in annotates:
for doc, eg in zip(
_pipe(
(eg.predicted for eg in examples),
proc=student_proc,
name=name,
default_error_handler=self.default_error_handler,
kwargs=pipe_kwargs[name],
),
examples,
):
eg.predicted = doc
if ( if (
name not in exclude name not in exclude
and isinstance(student_proc, ty.DistillableComponent) and isinstance(student_proc, ty.DistillableComponent)
@ -1096,18 +1109,7 @@ class Language:
losses=losses, losses=losses,
**component_cfg[name], **component_cfg[name],
) )
if name in annotates:
for doc, eg in zip(
_pipe(
(eg.predicted for eg in examples),
proc=student_proc,
name=name,
default_error_handler=self.default_error_handler,
kwargs=pipe_kwargs[name],
),
examples,
):
eg.predicted = doc
return losses return losses
def disable_pipes(self, *names) -> "DisabledPipes": def disable_pipes(self, *names) -> "DisabledPipes":