Remove omit_extra_lookups

This commit is contained in:
Ines Montani 2020-07-22 16:01:17 +02:00
parent 945f795a3e
commit 0fcd352179
4 changed files with 0 additions and 19 deletions

View File

@ -41,7 +41,6 @@ def init_model_cli(
truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"), truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"), vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the model meta"), model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the model meta"),
omit_extra_lookups: bool = Opt(False, "--omit-extra-lookups", "-OEL", help="Don't include extra lookups in model"),
base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base model (for languages with custom tokenizers)") base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base model (for languages with custom tokenizers)")
# fmt: on # fmt: on
): ):
@ -60,7 +59,6 @@ def init_model_cli(
truncate_vectors=truncate_vectors, truncate_vectors=truncate_vectors,
vectors_name=vectors_name, vectors_name=vectors_name,
model_name=model_name, model_name=model_name,
omit_extra_lookups=omit_extra_lookups,
base_model=base_model, base_model=base_model,
silent=False, silent=False,
) )
@ -77,7 +75,6 @@ def init_model(
truncate_vectors: int = 0, truncate_vectors: int = 0,
vectors_name: Optional[str] = None, vectors_name: Optional[str] = None,
model_name: Optional[str] = None, model_name: Optional[str] = None,
omit_extra_lookups: bool = False,
base_model: Optional[str] = None, base_model: Optional[str] = None,
silent: bool = True, silent: bool = True,
) -> Language: ) -> Language:
@ -109,13 +106,6 @@ def init_model(
with msg.loading("Creating model..."): with msg.loading("Creating model..."):
nlp = create_model(lang, lex_attrs, name=model_name, base_model=base_model) nlp = create_model(lang, lex_attrs, name=model_name, base_model=base_model)
# Create empty extra lexeme tables so the data from spacy-lookups-data
# isn't loaded if these features are accessed
if omit_extra_lookups:
nlp.vocab.lookups.remove_table("lexeme_cluster")
nlp.vocab.lookups.remove_table("lexeme_prob")
nlp.vocab.lookups.remove_table("lexeme_settings")
msg.good("Successfully created model") msg.good("Successfully created model")
if vectors_loc is not None: if vectors_loc is not None:
add_vectors( add_vectors(

View File

@ -120,13 +120,6 @@ def train(
# Load morph rules # Load morph rules
nlp.vocab.morphology.load_morph_exceptions(morph_rules) nlp.vocab.morphology.load_morph_exceptions(morph_rules)
# Create empty extra lexeme tables so the data from spacy-lookups-data
# isn't loaded if these features are accessed
if config["training"]["omit_extra_lookups"]:
nlp.vocab.lookups.remove_table("lexeme_cluster")
nlp.vocab.lookups.remove_table("lexeme_prob")
nlp.vocab.lookups.remove_table("lexeme_settings")
# Load a pretrained tok2vec model - cf. CLI command 'pretrain' # Load a pretrained tok2vec model - cf. CLI command 'pretrain'
if weights_data is not None: if weights_data is not None:
tok2vec_path = config.get("pretraining", {}).get("tok2vec_model", None) tok2vec_path = config.get("pretraining", {}).get("tok2vec_model", None)

View File

@ -47,7 +47,6 @@ score_weights = {"tags_acc": 0.2, "las": 0.4, "ents_f": 0.4}
# These settings are invalid for the transformer models. # These settings are invalid for the transformer models.
init_tok2vec = null init_tok2vec = null
discard_oversize = false discard_oversize = false
omit_extra_lookups = false
batch_by = "sequences" batch_by = "sequences"
raw_text = null raw_text = null
tag_map = null tag_map = null

View File

@ -211,7 +211,6 @@ class ConfigSchemaTraining(BaseModel):
score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Weights of each score type for selecting final model") score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Weights of each score type for selecting final model")
init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights") init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
discard_oversize: StrictBool = Field(..., title="Whether to skip examples longer than batch size") discard_oversize: StrictBool = Field(..., title="Whether to skip examples longer than batch size")
omit_extra_lookups: StrictBool = Field(..., title="Don't include extra lookups in model")
batch_by: StrictStr = Field(..., title="Batch examples by type") batch_by: StrictStr = Field(..., title="Batch examples by type")
raw_text: Optional[StrictStr] = Field(..., title="Raw text") raw_text: Optional[StrictStr] = Field(..., title="Raw text")
tag_map: Optional[StrictStr] = Field(..., title="Path to JSON-formatted tag map") tag_map: Optional[StrictStr] = Field(..., title="Path to JSON-formatted tag map")