diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index debecd0b1..bf3749c9e 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -87,6 +87,7 @@ def train(
     sourced_components = get_sourced_components(config)
     with show_validation_error(config_path):
         nlp, config = util.load_model_from_config(config)
+    util.load_vocab_data_into_model(nlp, lookups=config["training"]["lookups"])
     if config["training"]["vectors"] is not None:
         util.load_vectors_into_model(nlp, config["training"]["vectors"])
     raw_text, tag_map, morph_rules, weights_data = load_from_paths(config)
diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index f4a453f2a..5cd97a0eb 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -12,7 +12,6 @@ gpu_allocator = null
 lang = null
 pipeline = []
 disabled = []
-load_vocab_data = true
 before_creation = null
 after_creation = null
 after_pipeline_creation = null
@@ -59,6 +58,7 @@ accumulate_gradient = 1
 init_tok2vec = ${paths.init_tok2vec}
 raw_text = ${paths.raw}
 vectors = null
+lookups = null
 # Controls early-stopping. 0 or -1 mean unlimited.
 patience = 1600
 max_epochs = 0
diff --git a/spacy/language.py b/spacy/language.py
index d530e6b92..7d463731a 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -31,6 +31,7 @@ from .schemas import ConfigSchema
 from .git_info import GIT_VERSION
 from . import util
 from . import about
+from .lookups import load_lookups
 
 
 # This is the base config will all settings (training etc.)
@@ -86,6 +87,13 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
     return tokenizer_factory
 
 
+@registry.misc("spacy.LookupsDataLoader.v1")
+def load_lookups_data(lang, tables):
+    util.logger.debug(f"Loading lookups from spacy-lookups-data: {tables}")
+    lookups = load_lookups(lang=lang, tables=tables)
+    return lookups
+
+
 class Language:
     """A text-processing pipeline. Usually you'll load this once per process,
     and pass the instance around your application.
@@ -152,7 +160,6 @@ class Language:
                 self.lang,
                 self.Defaults,
                 vectors_name=vectors_name,
-                load_data=self._config["nlp"]["load_vocab_data"],
             )
         else:
             if (self.lang and vocab.lang) and (self.lang != vocab.lang):
diff --git a/spacy/schemas.py b/spacy/schemas.py
index db71af9ca..60655da8c 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -8,6 +8,7 @@ from collections import defaultdict
 from thinc.api import Optimizer
 
 from .attrs import NAMES
+from .lookups import Lookups
 
 if TYPE_CHECKING:
     # This lets us add type hints for mypy etc. without causing circular imports
@@ -198,6 +199,7 @@ class ModelMetaSchema(BaseModel):
 class ConfigSchemaTraining(BaseModel):
     # fmt: off
     vectors: Optional[StrictStr] = Field(..., title="Path to vectors")
+    lookups: Optional[Lookups] = Field(..., title="Vocab lookups")
     dev_corpus: StrictStr = Field(..., title="Path in the config to the dev data")
     train_corpus: StrictStr = Field(..., title="Path in the config to the training data")
     batcher: Batcher = Field(..., title="Batcher for the training data")
@@ -228,7 +230,6 @@ class ConfigSchemaNlp(BaseModel):
     pipeline: List[StrictStr] = Field(..., title="The pipeline component names in order")
     disabled: List[StrictStr] = Field(..., title="Pipeline components to disable by default")
     tokenizer: Callable = Field(..., title="The tokenizer to use")
-    load_vocab_data: StrictBool = Field(..., title="Whether to load additional vocab data from spacy-lookups-data")
     before_creation: Optional[Callable[[Type["Language"]], Type["Language"]]] = Field(..., title="Optional callback to modify Language class before initialization")
     after_creation: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after creation and before the pipeline is constructed")
     after_pipeline_creation: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after the pipeline is constructed")
diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py
index 1f073ab32..8c931d31e 100644
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@@ -69,7 +69,6 @@ def test_util_dot_section():
     [nlp]
     lang = "en"
     pipeline = ["textcat"]
-    load_vocab_data = false
 
     [components]
 
@@ -95,15 +94,13 @@ def test_util_dot_section():
     # not exclusive_classes
     assert en_nlp.get_pipe("textcat").model.attrs["multi_label"] is False
     # Test that default values got overwritten
-    assert not en_config["nlp"]["load_vocab_data"]
-    assert nl_config["nlp"]["load_vocab_data"]  # default value True
+    assert en_config["nlp"]["pipeline"] == ["textcat"]
+    assert nl_config["nlp"]["pipeline"] == [] # default value []
     # Test proper functioning of 'dot_to_object'
     with pytest.raises(KeyError):
         dot_to_object(en_config, "nlp.pipeline.tagger")
     with pytest.raises(KeyError):
         dot_to_object(en_config, "nlp.unknownattribute")
-    assert not dot_to_object(en_config, "nlp.load_vocab_data")
-    assert dot_to_object(nl_config, "nlp.load_vocab_data")
     assert isinstance(dot_to_object(nl_config, "training.optimizer"), Optimizer)
 
 
diff --git a/spacy/util.py b/spacy/util.py
index 18b34e4d6..88162b23a 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -253,6 +253,14 @@ def load_vectors_into_model(
                 nlp.vocab.strings.add(vectors_nlp.vocab.strings[key])
 
 
+def load_vocab_data_into_model(
+    nlp: "Language", *, lookups: Optional["Lookups"]=None
+) -> None:
+    """Load vocab data."""
+    if lookups:
+        nlp.vocab.lookups = lookups
+
+
 def load_model(
     name: Union[str, Path],
     *,
diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd
index 69cec7d3d..7d8dfd5d6 100644
--- a/spacy/vocab.pxd
+++ b/spacy/vocab.pxd
@@ -28,7 +28,7 @@ cdef class Vocab:
     cpdef readonly StringStore strings
     cpdef public Morphology morphology
     cpdef public object vectors
-    cpdef public object lookups
+    cpdef public object _lookups
     cpdef public object writing_system
     cpdef public object get_noun_chunks
     cdef readonly int length
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index ef0847e54..ce104d9db 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -22,14 +22,9 @@ from .lang.norm_exceptions import BASE_NORMS
 from .lang.lex_attrs import LEX_ATTRS, is_stop, get_lang
 
 
-def create_vocab(lang, defaults, vectors_name=None, load_data=True):
+def create_vocab(lang, defaults, vectors_name=None):
     # If the spacy-lookups-data package is installed, we pre-populate the lookups
     # with lexeme data, if available
-    if load_data:
-        tables = ["lexeme_norm", "lexeme_prob", "lexeme_cluster", "lexeme_settings"]
-        lookups = load_lookups(lang, tables=tables, strict=False)
-    else:
-        lookups = Lookups()
     lex_attrs = {**LEX_ATTRS, **defaults.lex_attr_getters}
     # This is messy, but it's the minimal working fix to Issue #639.
     lex_attrs[IS_STOP] = functools.partial(is_stop, stops=defaults.stop_words)
@@ -38,11 +33,9 @@ def create_vocab(lang, defaults, vectors_name=None, load_data=True):
     lex_attrs[NORM] = util.add_lookups(
         lex_attrs.get(NORM, LEX_ATTRS[NORM]),
         BASE_NORMS,
-        lookups.get_table("lexeme_norm", {}),
     )
     return Vocab(
         lex_attr_getters=lex_attrs,
-        lookups=lookups,
         writing_system=defaults.writing_system,
         get_noun_chunks=defaults.syntax_iterators.get("noun_chunks"),
         vectors_name=vectors_name,
@@ -424,6 +417,19 @@ cdef class Vocab:
             orth = self.strings.add(orth)
         return orth in self.vectors
 
+    property lookups:
+        def __get__(self):
+            return self._lookups
+
+        def __set__(self, lookups):
+            self._lookups = lookups
+            if lookups.has_table("lexeme_norm"):
+                self.lex_attr_getters[NORM] = util.add_lookups(
+                    self.lex_attr_getters.get(NORM, LEX_ATTRS[NORM]),
+                    self.lookups.get_table("lexeme_norm"),
+                )
+
+
     def to_disk(self, path, *, exclude=tuple()):
         """Save the current state to a directory.