diff --git a/spacy/errors.py b/spacy/errors.py
index 881a697f6..4edd1cbae 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -477,6 +477,8 @@ class Errors:
     E201 = ("Span index out of range.")
 
     # TODO: fix numbering after merging develop into master
+    E912 = ("Failed to initialize lemmatizer. Missing lemmatizer table(s) found "
+            "for mode '{mode}'. Required tables: {tables}. Found: {found}.")
     E913 = ("Corpus path can't be None. Maybe you forgot to define it in your "
             "config.cfg or override it on the CLI?")
     E914 = ("Executing {name} callback failed. Expected the function to "
@@ -556,10 +558,10 @@ class Errors:
     E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
     E954 = ("The Tok2Vec listener did not receive any valid input from an upstream "
             "component.")
-    E955 = ("Can't find table(s) '{table}' for language '{lang}' in "
-            "spacy-lookups-data. If you want to initialize a blank nlp object, "
-            "make sure you have the spacy-lookups-data package installed or "
-            "remove the [initialize.lookups] block from your config.")
+    E955 = ("Can't find table(s) {table} for language '{lang}' in "
+            "spacy-lookups-data. Make sure you have the package installed or "
+            "provide your own lookup tables if no default lookups are available "
+            "for your language.")
     E956 = ("Can't find component '{name}' in [components] block in the config. "
             "Available components: {opts}")
     E957 = ("Writing directly to Language.factories isn't needed anymore in "
@@ -685,9 +687,8 @@ class Errors:
     E1002 = ("Span index out of range.")
     E1003 = ("Unsupported lemmatizer mode '{mode}'.")
     E1004 = ("Missing lemmatizer table(s) found for lemmatizer mode '{mode}'. "
-             "Required tables '{tables}', found '{found}'. If you are not "
-             "providing custom lookups, make sure you have the package "
-             "spacy-lookups-data installed.")
+             "Required tables: {tables}. Found: {found}. Maybe you forgot to "
+             "call nlp.initialize() to load in the data?")
     E1005 = ("Unable to set attribute '{attr}' in tokenizer exception for "
              "'{chunk}'. Tokenizer exceptions are only allowed to specify "
              "`ORTH` and `NORM`.")
diff --git a/spacy/lang/bn/__init__.py b/spacy/lang/bn/__init__.py
index 923e29a17..879229888 100644
--- a/spacy/lang/bn/__init__.py
+++ b/spacy/lang/bn/__init__.py
@@ -4,7 +4,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
 from .stop_words import STOP_WORDS
 from ...language import Language
-from ...lookups import Lookups
 from ...pipeline import Lemmatizer
 
 
@@ -24,18 +23,11 @@ class Bengali(Language):
 @Bengali.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = Lemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return Lemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return Lemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Bengali"]
diff --git a/spacy/lang/el/__init__.py b/spacy/lang/el/__init__.py
index 1a7b19914..53069334e 100644
--- a/spacy/lang/el/__init__.py
+++ b/spacy/lang/el/__init__.py
@@ -7,7 +7,6 @@ from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
 from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
 from .lemmatizer import GreekLemmatizer
-from ...lookups import Lookups
 from ...language import Language
 
 
@@ -29,18 +28,11 @@ class Greek(Language):
 @Greek.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = GreekLemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return GreekLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return GreekLemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Greek"]
diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py
index cc01f1aea..3a3ebeefd 100644
--- a/spacy/lang/en/__init__.py
+++ b/spacy/lang/en/__init__.py
@@ -8,7 +8,6 @@ from .syntax_iterators import SYNTAX_ITERATORS
 from .punctuation import TOKENIZER_INFIXES
 from .lemmatizer import EnglishLemmatizer
 from ...language import Language
-from ...lookups import Lookups
 
 
 class EnglishDefaults(Language.Defaults):
@@ -27,18 +26,11 @@ class English(Language):
 @English.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = EnglishLemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return EnglishLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return EnglishLemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["English"]
diff --git a/spacy/lang/fa/__init__.py b/spacy/lang/fa/__init__.py
index f3a6635dc..77ee3bca3 100644
--- a/spacy/lang/fa/__init__.py
+++ b/spacy/lang/fa/__init__.py
@@ -6,7 +6,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .punctuation import TOKENIZER_SUFFIXES
 from .syntax_iterators import SYNTAX_ITERATORS
 from ...language import Language
-from ...lookups import Lookups
 from ...pipeline import Lemmatizer
 
 
@@ -27,18 +26,11 @@ class Persian(Language):
 @Persian.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = Lemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return Lemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return Lemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Persian"]
diff --git a/spacy/lang/fr/__init__.py b/spacy/lang/fr/__init__.py
index 72e641d1f..1e0011fba 100644
--- a/spacy/lang/fr/__init__.py
+++ b/spacy/lang/fr/__init__.py
@@ -9,7 +9,6 @@ from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
 from .lemmatizer import FrenchLemmatizer
-from ...lookups import Lookups
 from ...language import Language
 
 
@@ -32,18 +31,11 @@ class French(Language):
 @French.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = FrenchLemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return FrenchLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return FrenchLemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["French"]
diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py
index 9672dfd6e..62d7707f3 100644
--- a/spacy/lang/nb/__init__.py
+++ b/spacy/lang/nb/__init__.py
@@ -6,7 +6,6 @@ from .punctuation import TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .syntax_iterators import SYNTAX_ITERATORS
 from ...language import Language
-from ...lookups import Lookups
 from ...pipeline import Lemmatizer
 
 
@@ -27,18 +26,11 @@ class Norwegian(Language):
 @Norwegian.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = Lemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return Lemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return Lemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Norwegian"]
diff --git a/spacy/lang/nl/__init__.py b/spacy/lang/nl/__init__.py
index 15b6b9de2..a3591f1bf 100644
--- a/spacy/lang/nl/__init__.py
+++ b/spacy/lang/nl/__init__.py
@@ -1,5 +1,4 @@
 from typing import Optional
-
 from thinc.api import Model
 
 from .stop_words import STOP_WORDS
@@ -8,7 +7,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
 from .punctuation import TOKENIZER_SUFFIXES
 from .lemmatizer import DutchLemmatizer
-from ...lookups import Lookups
 from ...language import Language
 
 
@@ -29,18 +27,11 @@ class Dutch(Language):
 @Dutch.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = DutchLemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return DutchLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return DutchLemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Dutch"]
diff --git a/spacy/lang/pl/__init__.py b/spacy/lang/pl/__init__.py
index 573dbc6f9..f7be8a6c2 100644
--- a/spacy/lang/pl/__init__.py
+++ b/spacy/lang/pl/__init__.py
@@ -34,18 +34,11 @@ class Polish(Language):
 @Polish.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "pos_lookup", "lookups": None},
+    default_config={"model": None, "mode": "pos_lookup"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = PolishLemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return PolishLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return PolishLemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Polish"]
diff --git a/spacy/lang/ru/__init__.py b/spacy/lang/ru/__init__.py
index 6436ae0c7..1d59ca043 100644
--- a/spacy/lang/ru/__init__.py
+++ b/spacy/lang/ru/__init__.py
@@ -6,7 +6,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .lex_attrs import LEX_ATTRS
 from .lemmatizer import RussianLemmatizer
 from ...language import Language
-from ...lookups import Lookups
 
 
 class RussianDefaults(Language.Defaults):
@@ -23,17 +22,11 @@ class Russian(Language):
 @Russian.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "pymorphy2", "lookups": None},
+    default_config={"model": None, "mode": "pymorphy2"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    return RussianLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return RussianLemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Russian"]
diff --git a/spacy/lang/sv/__init__.py b/spacy/lang/sv/__init__.py
index ea314f487..2490eb9ec 100644
--- a/spacy/lang/sv/__init__.py
+++ b/spacy/lang/sv/__init__.py
@@ -5,7 +5,6 @@ from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
 from ...language import Language
-from ...lookups import Lookups
 from ...pipeline import Lemmatizer
 
 
@@ -30,18 +29,11 @@ class Swedish(Language):
 @Swedish.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "rule", "lookups": None},
+    default_config={"model": None, "mode": "rule"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    lookups = Lemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return Lemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return Lemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Swedish"]
diff --git a/spacy/lang/uk/__init__.py b/spacy/lang/uk/__init__.py
index 006a1cf7f..73c065379 100644
--- a/spacy/lang/uk/__init__.py
+++ b/spacy/lang/uk/__init__.py
@@ -7,7 +7,6 @@ from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .lemmatizer import UkrainianLemmatizer
 from ...language import Language
-from ...lookups import Lookups
 
 
 class UkrainianDefaults(Language.Defaults):
@@ -24,17 +23,11 @@ class Ukrainian(Language):
 @Ukrainian.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={"model": None, "mode": "pymorphy2", "lookups": None},
+    default_config={"model": None, "mode": "pymorphy2"},
     default_score_weights={"lemma_acc": 1.0},
 )
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    lookups: Optional[Lookups],
-):
-    return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
+    return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode)
 
 
 __all__ = ["Ukrainian"]
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index 391769604..9be596868 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -1,26 +1,25 @@
-from typing import Optional, List, Dict, Any
+from typing import Optional, List, Dict, Any, Callable, Iterable, Iterator, Union
+from typing import Tuple
 from thinc.api import Model
+from pathlib import Path
 
 from .pipe import Pipe
 from ..errors import Errors
 from ..language import Language
+from ..training import Example
 from ..lookups import Lookups, load_lookups
 from ..scorer import Scorer
 from ..tokens import Doc, Token
 from ..vocab import Vocab
 from ..training import validate_examples
+from ..util import logger, SimpleFrozenList
 from .. import util
 
 
 @Language.factory(
     "lemmatizer",
     assigns=["token.lemma"],
-    default_config={
-        "model": None,
-        "mode": "lookup",
-        "lookups": None,
-        "overwrite": False,
-    },
+    default_config={"model": None, "mode": "lookup", "overwrite": False},
     default_score_weights={"lemma_acc": 1.0},
 )
 def make_lemmatizer(
@@ -28,13 +27,9 @@ def make_lemmatizer(
     model: Optional[Model],
     name: str,
     mode: str,
-    lookups: Optional[Lookups],
     overwrite: bool = False,
 ):
-    lookups = Lemmatizer.load_lookups(nlp.lang, mode, lookups)
-    return Lemmatizer(
-        nlp.vocab, model, name, mode=mode, lookups=lookups, overwrite=overwrite
-    )
+    return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
 
 
 class Lemmatizer(Pipe):
@@ -46,59 +41,19 @@ class Lemmatizer(Pipe):
     """
 
     @classmethod
-    def get_lookups_config(cls, mode: str) -> Dict:
+    def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
         """Returns the lookups configuration settings for a given mode for use
         in Lemmatizer.load_lookups.
 
         mode (str): The lemmatizer mode.
-        RETURNS (dict): The lookups configuration settings for this mode.
-
-        DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
+        RETURNS (Tuple[List[str], List[str]]): The required and optional
+            lookup tables for this mode.
         """
         if mode == "lookup":
-            return {
-                "required_tables": ["lemma_lookup"],
-            }
+            return (["lemma_lookup"], [])
         elif mode == "rule":
-            return {
-                "required_tables": ["lemma_rules"],
-                "optional_tables": ["lemma_exc", "lemma_index"],
-            }
-        return {}
-
-    @classmethod
-    def load_lookups(cls, lang: str, mode: str, lookups: Optional[Lookups]) -> Lookups:
-        """Load and validate lookups tables. If the provided lookups is None,
-        load the default lookups tables according to the language and mode
-        settings. Confirm that all required tables for the language and mode
-        are present.
-
-        lang (str): The language code.
-        mode (str): The lemmatizer mode.
-        lookups (Lookups): The provided lookups, may be None if the default
-            lookups should be loaded.
-        RETURNS (Lookups): The Lookups object.
-
-        DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
-        """
-        config = cls.get_lookups_config(mode)
-        required_tables = config.get("required_tables", [])
-        optional_tables = config.get("optional_tables", [])
-        if lookups is None:
-            lookups = load_lookups(lang=lang, tables=required_tables)
-            optional_lookups = load_lookups(
-                lang=lang, tables=optional_tables, strict=False
-            )
-            for table in optional_lookups.tables:
-                lookups.set_table(table, optional_lookups.get_table(table))
-        for table in required_tables:
-            if table not in lookups:
-                raise ValueError(
-                    Errors.E1004.format(
-                        mode=mode, tables=required_tables, found=lookups.tables
-                    )
-                )
-        return lookups
+            return (["lemma_rules"], ["lemma_exc", "lemma_index"])
+        return ([], [])
 
     def __init__(
         self,
@@ -107,7 +62,6 @@ class Lemmatizer(Pipe):
         name: str = "lemmatizer",
         *,
         mode: str = "lookup",
-        lookups: Optional[Lookups] = None,
         overwrite: bool = False,
     ) -> None:
         """Initialize a Lemmatizer.
@@ -116,9 +70,6 @@ class Lemmatizer(Pipe):
         model (Model): A model (not yet implemented).
         name (str): The component name. Defaults to "lemmatizer".
         mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup".
-        lookups (Lookups): The lookups object containing the (optional) tables
-            such as "lemma_rules", "lemma_index", "lemma_exc" and
-            "lemma_lookup". Defaults to None
         overwrite (bool): Whether to overwrite existing lemmas. Defaults to
             `False`.
 
@@ -128,8 +79,9 @@ class Lemmatizer(Pipe):
         self.model = model
         self.name = name
         self._mode = mode
-        self.lookups = lookups if lookups is not None else Lookups()
+        self.lookups = Lookups()
         self.overwrite = overwrite
+        self._validated = False
         if self.mode == "lookup":
             self.lemmatize = self.lookup_lemmatize
         elif self.mode == "rule":
@@ -153,12 +105,56 @@ class Lemmatizer(Pipe):
 
         DOCS: https://nightly.spacy.io/api/lemmatizer#call
         """
+        if not self._validated:
+            self._validate_tables(Errors.E1004)
         for token in doc:
             if self.overwrite or token.lemma == 0:
                 token.lemma_ = self.lemmatize(token)[0]
         return doc
 
-    def pipe(self, stream, *, batch_size=128):
+    def initialize(
+        self,
+        get_examples: Optional[Callable[[], Iterable[Example]]] = None,
+        *,
+        nlp: Optional[Language] = None,
+        lookups: Optional[Lookups] = None,
+    ):
+        """Initialize the lemmatizer and load in data.
+
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
+        nlp (Language): The current nlp object the component is part of.
+        lookups (Lookups): The lookups object containing the (optional) tables
+            such as "lemma_rules", "lemma_index", "lemma_exc" and
+            "lemma_lookup". Defaults to None.
+        """
+        required_tables, optional_tables = self.get_lookups_config(self.mode)
+        if lookups is None:
+            logger.debug("Lemmatizer: loading tables from spacy-lookups-data")
+            lookups = load_lookups(lang=self.vocab.lang, tables=required_tables)
+            optional_lookups = load_lookups(
+                lang=self.vocab.lang, tables=optional_tables, strict=False
+            )
+            for table in optional_lookups.tables:
+                lookups.set_table(table, optional_lookups.get_table(table))
+        self.lookups = lookups
+        self._validate_tables(Errors.E1004)
+
+    def _validate_tables(self, error_message: str = Errors.E912) -> None:
+        """Check that the lookups are correct for the current mode."""
+        required_tables, optional_tables = self.get_lookups_config(self.mode)
+        for table in required_tables:
+            if table not in self.lookups:
+                raise ValueError(
+                    error_message.format(
+                        mode=self.mode,
+                        tables=required_tables,
+                        found=self.lookups.tables,
+                    )
+                )
+        self._validated = True
+
+    def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
         """Apply the pipe to a stream of documents. This usually happens under
         the hood when the nlp object is called on a text and all components are
         applied to the Doc.
@@ -263,7 +259,7 @@ class Lemmatizer(Pipe):
         """
         return False
 
-    def score(self, examples, **kwargs) -> Dict[str, Any]:
+    def score(self, examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
         """Score a batch of examples.
 
         examples (Iterable[Example]): The examples to score.
@@ -274,58 +270,66 @@ class Lemmatizer(Pipe):
         validate_examples(examples, "Lemmatizer.score")
         return Scorer.score_token_attr(examples, "lemma", **kwargs)
 
-    def to_disk(self, path, *, exclude=tuple()):
-        """Save the current state to a directory.
+    def to_disk(
+        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
+    ):
+        """Serialize the pipe to disk.
 
-        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist.
-        exclude (list): String names of serialization fields to exclude.
+        path (str / Path): Path to a directory.
+        exclude (Iterable[str]): String names of serialization fields to exclude.
 
-        DOCS: https://nightly.spacy.io/api/vocab#to_disk
+        DOCS: https://nightly.spacy.io/api/lemmatizer#to_disk
         """
         serialize = {}
         serialize["vocab"] = lambda p: self.vocab.to_disk(p)
         serialize["lookups"] = lambda p: self.lookups.to_disk(p)
         util.to_disk(path, serialize, exclude)
 
-    def from_disk(self, path, *, exclude=tuple()):
-        """Loads state from a directory. Modifies the object in place and
-        returns it.
+    def from_disk(
+        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
+    ) -> "Lemmatizer":
+        """Load the pipe from disk. Modifies the object in place and returns it.
 
-        path (unicode or Path): A path to a directory.
-        exclude (list): String names of serialization fields to exclude.
-        RETURNS (Vocab): The modified `Vocab` object.
+        path (str / Path): Path to a directory.
+        exclude (Iterable[str]): String names of serialization fields to exclude.
+        RETURNS (Lemmatizer): The modified Lemmatizer object.
 
-        DOCS: https://nightly.spacy.io/api/vocab#to_disk
+        DOCS: https://nightly.spacy.io/api/lemmatizer#from_disk
         """
         deserialize = {}
         deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
         deserialize["lookups"] = lambda p: self.lookups.from_disk(p)
         util.from_disk(path, deserialize, exclude)
+        self._validate_tables()
+        return self
 
-    def to_bytes(self, *, exclude=tuple()) -> bytes:
-        """Serialize the current state to a binary string.
+    def to_bytes(self, *, exclude: Iterable[str] = SimpleFrozenList()) -> bytes:
+        """Serialize the pipe to a bytestring.
 
-        exclude (list): String names of serialization fields to exclude.
-        RETURNS (bytes): The serialized form of the `Vocab` object.
+        exclude (Iterable[str]): String names of serialization fields to exclude.
+        RETURNS (bytes): The serialized object.
 
-        DOCS: https://nightly.spacy.io/api/vocab#to_bytes
+        DOCS: https://nightly.spacy.io/api/lemmatizer#to_bytes
         """
         serialize = {}
         serialize["vocab"] = self.vocab.to_bytes
         serialize["lookups"] = self.lookups.to_bytes
         return util.to_bytes(serialize, exclude)
 
-    def from_bytes(self, bytes_data: bytes, *, exclude=tuple()):
-        """Load state from a binary string.
+    def from_bytes(
+        self, bytes_data: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
+    ) -> "Lemmatizer":
+        """Load the pipe from a bytestring.
 
-        bytes_data (bytes): The data to load from.
-        exclude (list): String names of serialization fields to exclude.
-        RETURNS (Vocab): The `Vocab` object.
+        bytes_data (bytes): The serialized pipe.
+        exclude (Iterable[str]): String names of serialization fields to exclude.
+        RETURNS (Lemmatizer): The loaded Lemmatizer.
 
-        DOCS: https://nightly.spacy.io/api/vocab#from_bytes
+        DOCS: https://nightly.spacy.io/api/lemmatizer#from_bytes
         """
         deserialize = {}
         deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
         deserialize["lookups"] = lambda b: self.lookups.from_bytes(b)
         util.from_bytes(bytes_data, deserialize, exclude)
+        self._validate_tables()
+        return self
diff --git a/spacy/tests/lang/test_lemmatizers.py b/spacy/tests/lang/test_lemmatizers.py
index 6e7f82341..5f45664eb 100644
--- a/spacy/tests/lang/test_lemmatizers.py
+++ b/spacy/tests/lang/test_lemmatizers.py
@@ -17,16 +17,31 @@ def test_lemmatizer_initialize(lang, capfd):
     @registry.misc("lemmatizer_init_lookups")
     def lemmatizer_init_lookups():
         lookups = Lookups()
-        lookups.add_table("lemma_lookup", {"cope": "cope"})
+        lookups.add_table("lemma_lookup", {"cope": "cope", "x": "y"})
         lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
         lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
         lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
         return lookups
 
-    """Test that languages can be initialized."""
+    # Test that languages can be initialized
     nlp = get_lang_class(lang)()
-    nlp.add_pipe("lemmatizer", config={"lookups": {"@misc": "lemmatizer_init_lookups"}})
+    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
+    assert not lemmatizer.lookups.tables
+    nlp.config["initialize"]["components"]["lemmatizer"] = {
+        "lookups": {"@misc": "lemmatizer_init_lookups"}
+    }
+    with pytest.raises(ValueError):
+        nlp("x")
+    nlp.initialize()
+    assert lemmatizer.lookups.tables
+    doc = nlp("x")
     # Check for stray print statements (see #3342)
-    doc = nlp("test")  # noqa: F841
     captured = capfd.readouterr()
     assert not captured.out
+    assert doc[0].lemma_ == "y"
+
+    # Test initialization by calling .initialize() directly
+    nlp = get_lang_class(lang)()
+    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
+    lemmatizer.initialize(lookups=lemmatizer_init_lookups())
+    assert nlp("x")[0].lemma_ == "y"
diff --git a/spacy/tests/pipeline/test_lemmatizer.py b/spacy/tests/pipeline/test_lemmatizer.py
index 05e15bc16..d37c87059 100644
--- a/spacy/tests/pipeline/test_lemmatizer.py
+++ b/spacy/tests/pipeline/test_lemmatizer.py
@@ -8,61 +8,52 @@ from ..util import make_tempdir
 
 @pytest.fixture
 def nlp():
-    return English()
-
-
-@pytest.fixture
-def lemmatizer(nlp):
     @registry.misc("cope_lookups")
     def cope_lookups():
         lookups = Lookups()
-        lookups.add_table("lemma_lookup", {"cope": "cope"})
+        lookups.add_table("lemma_lookup", {"cope": "cope", "coped": "cope"})
         lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
         lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
         lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
         return lookups
 
-    lemmatizer = nlp.add_pipe(
-        "lemmatizer", config={"mode": "rule", "lookups": {"@misc": "cope_lookups"}}
-    )
-    return lemmatizer
+    nlp = English()
+    nlp.config["initialize"]["components"]["lemmatizer"] = {
+        "lookups": {"@misc": "cope_lookups"}
+    }
+    return nlp
 
 
 def test_lemmatizer_init(nlp):
-    @registry.misc("cope_lookups")
-    def cope_lookups():
-        lookups = Lookups()
-        lookups.add_table("lemma_lookup", {"cope": "cope"})
-        lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
-        lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
-        lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
-        return lookups
-
-    lemmatizer = nlp.add_pipe(
-        "lemmatizer", config={"mode": "lookup", "lookups": {"@misc": "cope_lookups"}}
-    )
+    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
     assert isinstance(lemmatizer.lookups, Lookups)
+    assert not lemmatizer.lookups.tables
     assert lemmatizer.mode == "lookup"
+    with pytest.raises(ValueError):
+        nlp("test")
+    nlp.initialize()
+    assert lemmatizer.lookups.tables
+    assert nlp("cope")[0].lemma_ == "cope"
+    assert nlp("coped")[0].lemma_ == "cope"
     # replace any tables from spacy-lookups-data
     lemmatizer.lookups = Lookups()
-    doc = nlp("coping")
     # lookup with no tables sets text as lemma
-    assert doc[0].lemma_ == "coping"
-
+    assert nlp("cope")[0].lemma_ == "cope"
+    assert nlp("coped")[0].lemma_ == "coped"
     nlp.remove_pipe("lemmatizer")
-
-    @registry.misc("empty_lookups")
-    def empty_lookups():
-        return Lookups()
-
+    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
     with pytest.raises(ValueError):
-        nlp.add_pipe(
-            "lemmatizer",
-            config={"mode": "lookup", "lookups": {"@misc": "empty_lookups"}},
-        )
+        # Can't initialize without required tables
+        lemmatizer.initialize(lookups=Lookups())
+    lookups = Lookups()
+    lookups.add_table("lemma_lookup", {})
+    lemmatizer.initialize(lookups=lookups)
 
 
-def test_lemmatizer_config(nlp, lemmatizer):
+def test_lemmatizer_config(nlp):
+    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "rule"})
+    nlp.initialize()
+
     doc = nlp.make_doc("coping")
     doc[0].pos_ = "VERB"
     assert doc[0].lemma_ == ""
@@ -78,20 +69,21 @@ def test_lemmatizer_config(nlp, lemmatizer):
     assert doc[0].lemma_ == "cope"
 
 
-def test_lemmatizer_serialize(nlp, lemmatizer):
-    @registry.misc("cope_lookups")
+def test_lemmatizer_serialize(nlp):
+    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "rule"})
+    nlp.initialize()
+
     def cope_lookups():
         lookups = Lookups()
-        lookups.add_table("lemma_lookup", {"cope": "cope"})
+        lookups.add_table("lemma_lookup", {"cope": "cope", "coped": "cope"})
         lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
         lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
         lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
         return lookups
 
     nlp2 = English()
-    lemmatizer2 = nlp2.add_pipe(
-        "lemmatizer", config={"mode": "rule", "lookups": {"@misc": "cope_lookups"}}
-    )
+    lemmatizer2 = nlp2.add_pipe("lemmatizer", config={"mode": "rule"})
+    lemmatizer2.initialize(lookups=cope_lookups())
     lemmatizer2.from_bytes(lemmatizer.to_bytes())
     assert lemmatizer.to_bytes() == lemmatizer2.to_bytes()
     assert lemmatizer.lookups.tables == lemmatizer2.lookups.tables
@@ -100,9 +92,9 @@ def test_lemmatizer_serialize(nlp, lemmatizer):
     with make_tempdir() as tmp_dir:
         nlp.to_disk(tmp_dir)
         nlp2 = util.load_model_from_path(tmp_dir)
-        doc2 = nlp2.make_doc("coping")
-        doc2[0].pos_ = "VERB"
-        assert doc2[0].lemma_ == ""
-        doc2 = lemmatizer(doc2)
-        assert doc2[0].text == "coping"
-        assert doc2[0].lemma_ == "cope"
+    doc2 = nlp2.make_doc("coping")
+    doc2[0].pos_ = "VERB"
+    assert doc2[0].lemma_ == ""
+    doc2 = lemmatizer(doc2)
+    assert doc2[0].text == "coping"
+    assert doc2[0].lemma_ == "cope"
diff --git a/website/docs/api/lemmatizer.md b/website/docs/api/lemmatizer.md
index f980756e5..27ea04432 100644
--- a/website/docs/api/lemmatizer.md
+++ b/website/docs/api/lemmatizer.md
@@ -48,12 +48,11 @@ data format used by the lookup and rule-based lemmatizers, see
 > nlp.add_pipe("lemmatizer", config=config)
 > ```
 
-| Setting     | Description                                                                                                                                                                                                                                                                         |
-| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `mode`      | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `"lookup"`. ~~str~~                                                                                                                                                                                                   |
-| `lookups`   | The lookups object containing the tables such as `"lemma_rules"`, `"lemma_index"`, `"lemma_exc"` and `"lemma_lookup"`. If `None`, default tables are loaded from [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data). Defaults to `None`. ~~Optional[Lookups]~~ |
-| `overwrite` | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                 |
-| `model`     | **Not yet implemented:** the model to use. ~~Model~~                                                                                                                                                                                                                                |
+| Setting     | Description                                                                       |
+| ----------- | --------------------------------------------------------------------------------- |
+| `mode`      | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `"lookup"`. ~~str~~ |
+| `overwrite` | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~               |
+| `model`     | **Not yet implemented:** the model to use. ~~Model~~                              |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/lemmatizer.py
@@ -76,15 +75,14 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#add_pipe).
 
-| Name           | Description                                                                                                                                                    |
-| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                               |
-| `model`        | **Not yet implemented:** The model to use. ~~Model~~                                                                                                           |
-| `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                            |
-| _keyword-only_ |                                                                                                                                                                |
-| mode           | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `"lookup"`. ~~str~~                                                                              |
-| lookups        | A lookups object containing the tables such as `"lemma_rules"`, `"lemma_index"`, `"lemma_exc"` and `"lemma_lookup"`. Defaults to `None`. ~~Optional[Lookups]~~ |
-| overwrite      | Whether to overwrite existing lemmas. ~~bool~                                                                                                                  |
+| Name           | Description                                                                                         |
+| -------------- | --------------------------------------------------------------------------------------------------- |
+| `vocab`        | The shared vocabulary. ~~Vocab~~                                                                    |
+| `model`        | **Not yet implemented:** The model to use. ~~Model~~                                                |
+| `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
+| _keyword-only_ |                                                                                                     |
+| mode           | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `"lookup"`. ~~str~~                   |
+| overwrite      | Whether to overwrite existing lemmas. ~~bool~                                                       |
 
 ## Lemmatizer.\_\_call\_\_ {#call tag="method"}
 
@@ -127,6 +125,37 @@ applied to the `Doc` in order.
 | `batch_size`   | The number of documents to buffer. Defaults to `128`. ~~int~~ |
 | **YIELDS**     | The processed documents in order. ~~Doc~~                     |
 
+## Lemmatizer.initialize {#initialize tag="method"}
+
+Initialize the lemmatizer and load any data resources. This method is typically
+called by [`Language.initialize`](/api/language#initialize) and lets you
+customize arguments it receives via the
+[`[initialize.components]`](/api/data-formats#config-initialize) block in the
+config. The loading only happens during initialization, typically before
+training. At runtime, all data is loaded from disk.
+
+> #### Example
+>
+> ```python
+> lemmatizer = nlp.add_pipe("lemmatizer")
+> lemmatizer.initialize(lookups=lookups)
+> ```
+>
+> ```ini
+> ### config.cfg
+> [initialize.components.lemmatizer]
+>
+> [initialize.components.lemmatizer.lookups]
+> @misc = "load_my_lookups.v1"
+> ```
+
+| Name           | Description                                                                                                                                                                                                                                                                         |
+| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects. Defaults to `None`. ~~Optional[Callable[[], Iterable[Example]]]~~                                                                                                                 |
+| _keyword-only_ |                                                                                                                                                                                                                                                                                     |
+| `nlp`          | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~                                                                                                                                                                                                                |
+| `lookups`      | The lookups object containing the tables such as `"lemma_rules"`, `"lemma_index"`, `"lemma_exc"` and `"lemma_lookup"`. If `None`, default tables are loaded from [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data). Defaults to `None`. ~~Optional[Lookups]~~ |
+
 ## Lemmatizer.lookup_lemmatize {#lookup_lemmatize tag="method"}
 
 Lemmatize a token using a lookup-based approach. If no lemma is found, the