mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-15 02:32:37 +03:00
bool -> float
This commit is contained in:
parent
b0929271a8
commit
ee5332eba5
|
@ -7,7 +7,7 @@ import srsly
|
||||||
from wasabi import Printer, MESSAGES, msg
|
from wasabi import Printer, MESSAGES, msg
|
||||||
import typer
|
import typer
|
||||||
import math
|
import math
|
||||||
import numpy as np
|
import numpy
|
||||||
|
|
||||||
from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
|
from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
|
||||||
from ._util import import_code, debug_cli, _format_number
|
from ._util import import_code, debug_cli, _format_number
|
||||||
|
@ -524,9 +524,9 @@ def debug_data(
|
||||||
msg.divider("Part-of-speech Tagging")
|
msg.divider("Part-of-speech Tagging")
|
||||||
label_list, counts = zip(*gold_train_data["tags"].items())
|
label_list, counts = zip(*gold_train_data["tags"].items())
|
||||||
msg.info(f"{len(label_list)} label(s) in train data")
|
msg.info(f"{len(label_list)} label(s) in train data")
|
||||||
p = np.array(counts)
|
p = numpy.array(counts)
|
||||||
p = p / p.sum()
|
p = p / p.sum()
|
||||||
norm_entropy = (-p * np.log2(p)).sum() / np.log2(len(label_list))
|
norm_entropy = (-p * numpy.log2(p)).sum() / numpy.log2(len(label_list))
|
||||||
msg.info(f"{norm_entropy} is the normalised label entropy")
|
msg.info(f"{norm_entropy} is the normalised label entropy")
|
||||||
model_labels = _get_labels_from_model(nlp, "tagger")
|
model_labels = _get_labels_from_model(nlp, "tagger")
|
||||||
labels = set(label_list)
|
labels = set(label_list)
|
||||||
|
|
|
@ -45,7 +45,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
@Language.factory(
|
@Language.factory(
|
||||||
"tagger",
|
"tagger",
|
||||||
assigns=["token.tag"],
|
assigns=["token.tag"],
|
||||||
default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": False},
|
default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.05},
|
||||||
default_score_weights={"tag_acc": 1.0},
|
default_score_weights={"tag_acc": 1.0},
|
||||||
)
|
)
|
||||||
def make_tagger(
|
def make_tagger(
|
||||||
|
@ -55,7 +55,7 @@ def make_tagger(
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
neg_prefix: str,
|
neg_prefix: str,
|
||||||
label_smoothing: bool,
|
label_smoothing: float,
|
||||||
):
|
):
|
||||||
"""Construct a part-of-speech tagger component.
|
"""Construct a part-of-speech tagger component.
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ class Tagger(TrainablePipe):
|
||||||
overwrite=BACKWARD_OVERWRITE,
|
overwrite=BACKWARD_OVERWRITE,
|
||||||
scorer=tagger_score,
|
scorer=tagger_score,
|
||||||
neg_prefix="!",
|
neg_prefix="!",
|
||||||
label_smoothing=False,
|
label_smoothing=0.05,
|
||||||
):
|
):
|
||||||
"""Initialize a part-of-speech tagger.
|
"""Initialize a part-of-speech tagger.
|
||||||
|
|
||||||
|
@ -258,7 +258,6 @@ class Tagger(TrainablePipe):
|
||||||
DOCS: https://spacy.io/api/tagger#get_loss
|
DOCS: https://spacy.io/api/tagger#get_loss
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Tagger.get_loss")
|
validate_examples(examples, "Tagger.get_loss")
|
||||||
self.cfg["label_smoothing"] = 0.05 if self.cfg["label_smoothing"] else 0.0
|
|
||||||
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"], label_smoothing=self.cfg["label_smoothing"])
|
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"], label_smoothing=self.cfg["label_smoothing"])
|
||||||
# Convert empty tag "" to missing value None so that both misaligned
|
# Convert empty tag "" to missing value None so that both misaligned
|
||||||
# tokens and tokens with missing annotation have the default missing
|
# tokens and tokens with missing annotation have the default missing
|
||||||
|
|
|
@ -70,10 +70,10 @@ PARTIAL_DATA = [
|
||||||
def test_label_smoothing():
|
def test_label_smoothing():
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
tagger_no_ls = nlp.add_pipe(
|
tagger_no_ls = nlp.add_pipe(
|
||||||
"tagger", "no_label_smoothing", config=dict(label_smoothing=False)
|
"tagger", "no_label_smoothing", config=dict(label_smoothing=0.0)
|
||||||
)
|
)
|
||||||
tagger_ls = nlp.add_pipe(
|
tagger_ls = nlp.add_pipe(
|
||||||
"tagger", "label_smoothing", config=dict(label_smoothing=True)
|
"tagger", "label_smoothing"
|
||||||
)
|
)
|
||||||
train_examples = []
|
train_examples = []
|
||||||
losses = {}
|
losses = {}
|
||||||
|
@ -87,9 +87,9 @@ def test_label_smoothing():
|
||||||
tag_scores, bp_tag_scores = tagger_ls.model.begin_update(
|
tag_scores, bp_tag_scores = tagger_ls.model.begin_update(
|
||||||
[eg.predicted for eg in train_examples]
|
[eg.predicted for eg in train_examples]
|
||||||
)
|
)
|
||||||
no_ls_probs = tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
|
no_ls_grads= tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
|
||||||
ls_probs = tagger_ls.get_loss(train_examples, tag_scores)[1][0]
|
ls_grads= tagger_ls.get_loss(train_examples, tag_scores)[1][0]
|
||||||
assert_array_almost_equal((ls_probs - no_ls_probs)[0], [0.05, -0.025, -0.025])
|
assert_array_almost_equal((ls_grads - no_ls_grads)[0], [0.05, -0.025, -0.025])
|
||||||
|
|
||||||
|
|
||||||
def test_no_label():
|
def test_no_label():
|
||||||
|
|
Loading…
Reference in New Issue
Block a user