Remove omit_extra_lookups

2025-11-08 20:07:51 +03:00 · 2020-07-22 16:01:17 +02:00 · 2020-07-22 16:01:17 +02:00 · 0fcd352179
commit 0fcd352179
parent 945f795a3e
4 changed files with 0 additions and 19 deletions
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@ -41,7 +41,6 @@ def init_model_cli(
    truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
    vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
    model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the model meta"),
    omit_extra_lookups: bool = Opt(False, "--omit-extra-lookups", "-OEL", help="Don't include extra lookups in model"),
    base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base model (for languages with custom tokenizers)")
    # fmt: on
 ):
@ -60,7 +59,6 @@ def init_model_cli(
        truncate_vectors=truncate_vectors,
        vectors_name=vectors_name,
        model_name=model_name,
        omit_extra_lookups=omit_extra_lookups,
        base_model=base_model,
        silent=False,
    )
@ -77,7 +75,6 @@ def init_model(
    truncate_vectors: int = 0,
    vectors_name: Optional[str] = None,
    model_name: Optional[str] = None,
    omit_extra_lookups: bool = False,
    base_model: Optional[str] = None,
    silent: bool = True,
 ) -> Language:
@ -109,13 +106,6 @@ def init_model(
    with msg.loading("Creating model..."):
        nlp = create_model(lang, lex_attrs, name=model_name, base_model=base_model)
    # Create empty extra lexeme tables so the data from spacy-lookups-data
    # isn't loaded if these features are accessed
    if omit_extra_lookups:
        nlp.vocab.lookups.remove_table("lexeme_cluster")
        nlp.vocab.lookups.remove_table("lexeme_prob")
        nlp.vocab.lookups.remove_table("lexeme_settings")
    msg.good("Successfully created model")
    if vectors_loc is not None:
        add_vectors(
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -120,13 +120,6 @@ def train(
        # Load morph rules
        nlp.vocab.morphology.load_morph_exceptions(morph_rules)
    # Create empty extra lexeme tables so the data from spacy-lookups-data
    # isn't loaded if these features are accessed
    if config["training"]["omit_extra_lookups"]:
        nlp.vocab.lookups.remove_table("lexeme_cluster")
        nlp.vocab.lookups.remove_table("lexeme_prob")
        nlp.vocab.lookups.remove_table("lexeme_settings")
    # Load a pretrained tok2vec model - cf. CLI command 'pretrain'
    if weights_data is not None:
        tok2vec_path = config.get("pretraining", {}).get("tok2vec_model", None)
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@ -47,7 +47,6 @@ score_weights = {"tags_acc": 0.2, "las": 0.4, "ents_f": 0.4}
 # These settings are invalid for the transformer models.
 init_tok2vec = null
 discard_oversize = false
 omit_extra_lookups = false
 batch_by = "sequences"
 raw_text = null
 tag_map = null
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@ -211,7 +211,6 @@ class ConfigSchemaTraining(BaseModel):
    score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Weights of each score type for selecting final model")
    init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
    discard_oversize: StrictBool = Field(..., title="Whether to skip examples longer than batch size")
    omit_extra_lookups: StrictBool = Field(..., title="Don't include extra lookups in model")
    batch_by: StrictStr = Field(..., title="Batch examples by type")
    raw_text: Optional[StrictStr] = Field(..., title="Raw text")
    tag_map: Optional[StrictStr] = Field(..., title="Path to JSON-formatted tag map")