diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index 7ba008fb6..f1786e04b 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -1,6 +1,5 @@
 [nlp]
 lang = null
-vocab_data = {}
 pipeline = []
 
 [nlp.tokenizer]
diff --git a/spacy/errors.py b/spacy/errors.py
index 04d831c41..07c3df686 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -83,7 +83,7 @@ class Warnings:
             "doesn't have a normalization table, please ignore this warning. "
             "If this is surprising, make sure you have the spacy-lookups-data "
             "package installed. The languages with lexeme normalization tables "
-            "are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.")
+            "are currently: {langs}")
 
     # TODO: fix numbering after merging develop into master
     W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.")
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index e4250b932..dfbb943f8 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -184,8 +184,10 @@ class Tagger(Pipe):
         lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"]
         if not any(table in self.vocab.lookups for table in lemma_tables):
             warnings.warn(Warnings.W022)
-        if len(self.vocab.lookups.get_table("lexeme_norm", {})) == 0:
-            warnings.warn(Warnings.W033.format(model="part-of-speech tagger"))
+        lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
+        if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
+            langs = ", ".join(util.LEXEME_NORM_LANGS)
+            warnings.warn(Warnings.W033.format(model="part-of-speech tagger", langs=langs))
         orig_tag_map = dict(self.vocab.morphology.tag_map)
         new_tag_map = {}
         for example in get_examples():
diff --git a/spacy/schemas.py b/spacy/schemas.py
index ad16f3233..e55123e14 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -239,7 +239,6 @@ class ConfigSchemaNlp(BaseModel):
     pipeline: List[StrictStr] = Field(..., title="The pipeline component names in order")
     tokenizer: Callable = Field(..., title="The tokenizer to use")
     lemmatizer: Callable = Field(..., title="The lemmatizer to use")
-    vocab_data: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., title="Vocabulary data, e.g. lexeme normalization tables")
     # fmt: on
 
     class Config:
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 09616ee75..f640e2e8d 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -431,8 +431,10 @@ cdef class Parser:
 
     def begin_training(self, get_examples, pipeline=None, sgd=None, **kwargs):
         self.cfg.update(kwargs)
-        if len(self.vocab.lookups.get_table("lexeme_norm", {})) == 0:
-            warnings.warn(Warnings.W033.format(model="parser or NER"))
+        lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
+        if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
+            langs = ", ".join(util.LEXEME_NORM_LANGS)
+            warnings.warn(Warnings.W033.format(model="parser or NER", langs=langs))
         if not hasattr(get_examples, '__call__'):
             gold_tuples = get_examples
             get_examples = lambda: gold_tuples
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 71539fe60..4a6bf73a5 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -342,7 +342,8 @@ def test_overfitting_IO():
 
 
 def test_ner_warns_no_lookups():
-    nlp = Language()
+    nlp = English()
+    assert nlp.lang in util.LEXEME_NORM_LANGS
     nlp.vocab.lookups = Lookups()
     assert not len(nlp.vocab.lookups)
     nlp.add_pipe("ner")
diff --git a/spacy/util.py b/spacy/util.py
index 0d732034f..18ce7e474 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -53,6 +53,7 @@ if TYPE_CHECKING:
 
 _PRINT_ENV = False
 OOV_RANK = numpy.iinfo(numpy.uint64).max
+LEXEME_NORM_LANGS = ["da", "de", "el", "en", "id", "lb", "pt", "ru", "sr", "ta", "th"]
 
 
 class registry(thinc.registry):
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 0f99a45f5..56e62834a 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -17,13 +17,20 @@ from .lemmatizer import Lemmatizer
 from .attrs import intify_attrs, NORM, IS_STOP
 from .vectors import Vectors
 from .util import link_vectors_to_models, registry
-from .lookups import Lookups
+from .lookups import Lookups, load_lookups
 from . import util
 from .lang.norm_exceptions import BASE_NORMS
 from .lang.lex_attrs import LEX_ATTRS, is_stop, get_lang
 
 
-def create_vocab(lang, defaults, lemmatizer=None, vocab_data={}, vectors_name=None):
+def create_vocab(lang, defaults, lemmatizer=None, vectors_name=None, load_lookups_data=True):
+    # If the spacy-lookups-data package is installed, we pre-populate the lookups
+    # with lexeme data, if available
+    if load_lookups_data:
+        tables = ["lexeme_norm", "lexeme_prob", "lexeme_cluster", "lexeme_settings"]
+        lookups = load_lookups(lang, tables=tables, strict=False)
+    else:
+        lookups = Lookups()
     lex_attrs = {**LEX_ATTRS, **defaults.lex_attr_getters}
     # This is messy, but it's the minimal working fix to Issue #639.
     lex_attrs[IS_STOP] = functools.partial(is_stop, stops=defaults.stop_words)
@@ -32,13 +39,8 @@ def create_vocab(lang, defaults, lemmatizer=None, vocab_data={}, vectors_name=No
     lex_attrs[NORM] = util.add_lookups(
         lex_attrs.get(NORM, LEX_ATTRS[NORM]),
         BASE_NORMS,
-        vocab_data.get("lexeme_norm", {}),
+        lookups.get_table("lexeme_norm", {}),
     )
-    lookups = Lookups()
-    for name, data in vocab_data.items():
-        if name not in lookups:
-            data = data if data is not None else {}
-            lookups.add_table(name, data)
     return Vocab(
         lex_attr_getters=lex_attrs,
         lemmatizer=lemmatizer,
@@ -49,7 +51,6 @@ def create_vocab(lang, defaults, lemmatizer=None, vocab_data={}, vectors_name=No
     )
 
 
-
 cdef class Vocab:
     """A look-up table that allows you to access `Lexeme` objects. The `Vocab`
     instance also provides access to the `StringStore`, and owns underlying