Sync missing and misaligned values in Tagger loss (#6689)

Use `None` for both missing and misaligned annotation in
`Tagger.get_loss`, reverting to the default missing value in the loss
function.
This commit is contained in:
Adriane Boyd 2021-01-10 01:30:37 +01:00 committed by GitHub
parent c04bab6bae
commit ad43cbb042
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 2 deletions

View File

@ -256,8 +256,14 @@ class Tagger(TrainablePipe):
DOCS: https://nightly.spacy.io/api/tagger#get_loss
"""
validate_examples(examples, "Tagger.get_loss")
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, missing_value="")
truths = [eg.get_aligned("TAG", as_string=True) for eg in examples]
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
# Convert empty tag "" to missing value None so that both misaligned
# tokens and tokens with missing annotation have the default missing
# value None.
truths = []
for eg in examples:
eg_truths = [tag if tag is not "" else None for tag in eg.get_aligned("TAG", as_string=True)]
truths.append(eg_truths)
d_scores, loss = loss_func(scores, truths)
if self.model.ops.xp.isnan(loss):
raise ValueError(Errors.E910.format(name=self.name))

View File

@ -37,7 +37,16 @@ TRAIN_DATA = [
]
PARTIAL_DATA = [
# partial annotation
("I like green eggs", {"tags": ["", "V", "J", ""]}),
# misaligned partial annotation
(
"He hates green eggs",
{
"words": ["He", "hate", "s", "green", "eggs"],
"tags": ["", "V", "S", "J", ""],
},
),
]