mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Remove omit_extra_lookups
This commit is contained in:
parent
945f795a3e
commit
0fcd352179
|
@ -41,7 +41,6 @@ def init_model_cli(
|
||||||
truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
|
truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
|
||||||
vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
|
vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
|
||||||
model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the model meta"),
|
model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the model meta"),
|
||||||
omit_extra_lookups: bool = Opt(False, "--omit-extra-lookups", "-OEL", help="Don't include extra lookups in model"),
|
|
||||||
base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base model (for languages with custom tokenizers)")
|
base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base model (for languages with custom tokenizers)")
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
|
@ -60,7 +59,6 @@ def init_model_cli(
|
||||||
truncate_vectors=truncate_vectors,
|
truncate_vectors=truncate_vectors,
|
||||||
vectors_name=vectors_name,
|
vectors_name=vectors_name,
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
omit_extra_lookups=omit_extra_lookups,
|
|
||||||
base_model=base_model,
|
base_model=base_model,
|
||||||
silent=False,
|
silent=False,
|
||||||
)
|
)
|
||||||
|
@ -77,7 +75,6 @@ def init_model(
|
||||||
truncate_vectors: int = 0,
|
truncate_vectors: int = 0,
|
||||||
vectors_name: Optional[str] = None,
|
vectors_name: Optional[str] = None,
|
||||||
model_name: Optional[str] = None,
|
model_name: Optional[str] = None,
|
||||||
omit_extra_lookups: bool = False,
|
|
||||||
base_model: Optional[str] = None,
|
base_model: Optional[str] = None,
|
||||||
silent: bool = True,
|
silent: bool = True,
|
||||||
) -> Language:
|
) -> Language:
|
||||||
|
@ -109,13 +106,6 @@ def init_model(
|
||||||
with msg.loading("Creating model..."):
|
with msg.loading("Creating model..."):
|
||||||
nlp = create_model(lang, lex_attrs, name=model_name, base_model=base_model)
|
nlp = create_model(lang, lex_attrs, name=model_name, base_model=base_model)
|
||||||
|
|
||||||
# Create empty extra lexeme tables so the data from spacy-lookups-data
|
|
||||||
# isn't loaded if these features are accessed
|
|
||||||
if omit_extra_lookups:
|
|
||||||
nlp.vocab.lookups.remove_table("lexeme_cluster")
|
|
||||||
nlp.vocab.lookups.remove_table("lexeme_prob")
|
|
||||||
nlp.vocab.lookups.remove_table("lexeme_settings")
|
|
||||||
|
|
||||||
msg.good("Successfully created model")
|
msg.good("Successfully created model")
|
||||||
if vectors_loc is not None:
|
if vectors_loc is not None:
|
||||||
add_vectors(
|
add_vectors(
|
||||||
|
|
|
@ -120,13 +120,6 @@ def train(
|
||||||
# Load morph rules
|
# Load morph rules
|
||||||
nlp.vocab.morphology.load_morph_exceptions(morph_rules)
|
nlp.vocab.morphology.load_morph_exceptions(morph_rules)
|
||||||
|
|
||||||
# Create empty extra lexeme tables so the data from spacy-lookups-data
|
|
||||||
# isn't loaded if these features are accessed
|
|
||||||
if config["training"]["omit_extra_lookups"]:
|
|
||||||
nlp.vocab.lookups.remove_table("lexeme_cluster")
|
|
||||||
nlp.vocab.lookups.remove_table("lexeme_prob")
|
|
||||||
nlp.vocab.lookups.remove_table("lexeme_settings")
|
|
||||||
|
|
||||||
# Load a pretrained tok2vec model - cf. CLI command 'pretrain'
|
# Load a pretrained tok2vec model - cf. CLI command 'pretrain'
|
||||||
if weights_data is not None:
|
if weights_data is not None:
|
||||||
tok2vec_path = config.get("pretraining", {}).get("tok2vec_model", None)
|
tok2vec_path = config.get("pretraining", {}).get("tok2vec_model", None)
|
||||||
|
|
|
@ -47,7 +47,6 @@ score_weights = {"tags_acc": 0.2, "las": 0.4, "ents_f": 0.4}
|
||||||
# These settings are invalid for the transformer models.
|
# These settings are invalid for the transformer models.
|
||||||
init_tok2vec = null
|
init_tok2vec = null
|
||||||
discard_oversize = false
|
discard_oversize = false
|
||||||
omit_extra_lookups = false
|
|
||||||
batch_by = "sequences"
|
batch_by = "sequences"
|
||||||
raw_text = null
|
raw_text = null
|
||||||
tag_map = null
|
tag_map = null
|
||||||
|
|
|
@ -211,7 +211,6 @@ class ConfigSchemaTraining(BaseModel):
|
||||||
score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Weights of each score type for selecting final model")
|
score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Weights of each score type for selecting final model")
|
||||||
init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
|
init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
|
||||||
discard_oversize: StrictBool = Field(..., title="Whether to skip examples longer than batch size")
|
discard_oversize: StrictBool = Field(..., title="Whether to skip examples longer than batch size")
|
||||||
omit_extra_lookups: StrictBool = Field(..., title="Don't include extra lookups in model")
|
|
||||||
batch_by: StrictStr = Field(..., title="Batch examples by type")
|
batch_by: StrictStr = Field(..., title="Batch examples by type")
|
||||||
raw_text: Optional[StrictStr] = Field(..., title="Raw text")
|
raw_text: Optional[StrictStr] = Field(..., title="Raw text")
|
||||||
tag_map: Optional[StrictStr] = Field(..., title="Path to JSON-formatted tag map")
|
tag_map: Optional[StrictStr] = Field(..., title="Path to JSON-formatted tag map")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user