diff --git a/spacy/about.py b/spacy/about.py
index eddbeea09..eb85e6af3 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -5,3 +5,5 @@ __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"
 __projects_branch__ = "v3"
+__lookups_tag__ = "v1.0.3"
+__lookups_url__ = f"https://raw.githubusercontent.com/explosion/spacy-lookups-data/{__lookups_tag__}/spacy_lookups_data/data/"
diff --git a/spacy/errors.py b/spacy/errors.py
index eadbf63d6..56cdde409 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -1,6 +1,8 @@
 from typing import Literal
 import warnings
 
+from . import about
+
 
 class ErrorsWithCodes(type):
     def __getattribute__(self, code):
@@ -103,13 +105,14 @@ class Warnings(metaclass=ErrorsWithCodes):
             "table. This may degrade the performance of the model to some "
             "degree. If this is intentional or the language you're using "
             "doesn't have a normalization table, please ignore this warning. "
-            "If this is surprising, make sure you have the spacy-lookups-data "
-            "package installed and load the table in your config. The "
-            "languages with lexeme normalization tables are currently: "
-            "{langs}\n\nLoad the table in your config with:\n\n"
+            "If this is surprising, make sure you are loading the table in "
+            "your config. The languages with lexeme normalization tables are "
+            "currently: {langs}\n\nAn example of how to load a table in "
+            "your config :\n\n"
             "[initialize.lookups]\n"
-            "@misc = \"spacy.LookupsDataLoader.v1\"\n"
+            "@misc = \"spacy.LookupsDataLoaderFromURL.v1\"\n"
             "lang = ${{nlp.lang}}\n"
+             f'url = "{about.__lookups_url__}"\n'
             "tables = [\"lexeme_norm\"]\n")
     W035 = ("Discarding subpattern '{pattern}' due to an unrecognized "
             "attribute or operator.")
@@ -961,6 +964,18 @@ class Errors(metaclass=ErrorsWithCodes):
     E4003 = ("Training examples for distillation must have the exact same tokens in the "
              "reference and predicted docs.")
     E4004 = ("Backprop is not supported when is_train is not set.")
+    E4005 = ("Required lemmatizer table(s) {missing_tables} not found in "
+             "[initialize] or in registered lookups (spacy-lookups-data). An "
+             "example for how to load lemmatizer tables in [initialize]:\n\n"
+             "[initialize.components]\n\n"
+             "[initialize.components.{pipe_name}]\n\n"
+             "[initialize.components.{pipe_name}.lookups]\n"
+             '@misc = "spacy.LookupsDataLoaderFromURL.v1"\n'
+             "lang = ${{nlp.lang}}\n"
+             f'url = "{about.__lookups_url__}"\n'
+             "tables = {tables}\n"
+             "# or required tables only: tables = {required_tables}\n")
+    E4006 = ("Server error ({status_code}), couldn't fetch {url}")
 
 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}
 
diff --git a/spacy/language.py b/spacy/language.py
index 13a3d101a..c5750ea85 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -104,13 +104,6 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
     return tokenizer_factory
 
 
-@registry.misc("spacy.LookupsDataLoader.v1")
-def load_lookups_data(lang, tables):
-    util.logger.debug(f"Loading lookups from spacy-lookups-data: {tables}")
-    lookups = load_lookups(lang=lang, tables=tables)
-    return lookups
-
-
 class Language:
     """A text-processing pipeline. Usually you'll load this once per process,
     and pass the instance around your application.
diff --git a/spacy/lookups.py b/spacy/lookups.py
index d7cc44fb3..0e6fb3b7c 100644
--- a/spacy/lookups.py
+++ b/spacy/lookups.py
@@ -1,17 +1,42 @@
 from typing import Any, List, Union, Optional, Dict
 from pathlib import Path
+import requests
 import srsly
 from preshed.bloom import BloomFilter
 from collections import OrderedDict
 
 from .errors import Errors
 from .util import SimpleFrozenDict, ensure_path, registry, load_language_data
+from .util import logger
 from .strings import get_string_id
 
 
 UNSET = object()
 
 
+@registry.misc("spacy.LookupsDataLoader.v1")
+def load_lookups_data(lang, tables):
+    logger.debug(f"Loading lookups from spacy-lookups-data: {tables}")
+    lookups = load_lookups(lang=lang, tables=tables)
+    return lookups
+
+
+@registry.misc("spacy.LookupsDataLoaderFromURL.v1")
+def load_lookups_data_from_url(lang, tables, url):
+    logger.debug(f"Loading lookups from {url}: {tables}")
+    lookups = Lookups()
+    for table in tables:
+        table_url = url + lang + "_" + table + ".json"
+        r = requests.get(table_url)
+        if r.status_code != 200:
+            raise ValueError(
+                Errors.E4006.format(status_code=r.status_code, url=table_url)
+            )
+        table_data = r.json()
+        lookups.add_table(table, table_data)
+    return lookups
+
+
 def load_lookups(lang: str, tables: List[str], strict: bool = True) -> "Lookups":
     """Load the data from the spacy-lookups-data package for a given language,
     if available. Returns an empty `Lookups` container if there's no data or if the package
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index 9c2fc2f09..03495ba74 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -1,5 +1,6 @@
 from typing import Optional, List, Dict, Any, Callable, Iterable, Union, Tuple
 from thinc.api import Model
+import srsly
 from pathlib import Path
 
 import warnings
@@ -155,8 +156,24 @@ class Lemmatizer(Pipe):
         """
         required_tables, optional_tables = self.get_lookups_config(self.mode)
         if lookups is None:
-            logger.debug("Lemmatizer: loading tables from spacy-lookups-data")
-            lookups = load_lookups(lang=self.vocab.lang, tables=required_tables)
+            logger.debug(
+                "Lemmatizer: no lemmatizer lookups tables provided, "
+                "trying to load tables from registered lookups (usually "
+                "spacy-lookups-data)"
+            )
+            lookups = load_lookups(
+                lang=self.vocab.lang, tables=required_tables, strict=False
+            )
+            missing_tables = set(required_tables) - set(lookups.tables)
+            if len(missing_tables) > 0:
+                raise ValueError(
+                    Errors.E4005.format(
+                        missing_tables=list(missing_tables),
+                        pipe_name=self.name,
+                        required_tables=srsly.json_dumps(required_tables),
+                        tables=srsly.json_dumps(required_tables + optional_tables),
+                    )
+                )
             optional_lookups = load_lookups(
                 lang=self.vocab.lang, tables=optional_tables, strict=False
             )
diff --git a/spacy/util.py b/spacy/util.py
index e2ca0e6a4..d653e0305 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -55,7 +55,7 @@ if TYPE_CHECKING:
 # fmt: off
 OOV_RANK = numpy.iinfo(numpy.uint64).max
 DEFAULT_OOV_PROB = -20
-LEXEME_NORM_LANGS = ["cs", "da", "de", "el", "en", "id", "lb", "mk", "pt", "ru", "sr", "ta", "th"]
+LEXEME_NORM_LANGS = ["cs", "da", "de", "el", "en", "grc", "id", "lb", "mk", "pt", "ru", "sr", "ta", "th"]
 
 # Default order of sections in the config file. Not all sections needs to exist,
 # and additional sections are added at the end, in alphabetical order.
diff --git a/website/docs/api/lemmatizer.mdx b/website/docs/api/lemmatizer.mdx
index f6657dbf4..5bd0112e2 100644
--- a/website/docs/api/lemmatizer.mdx
+++ b/website/docs/api/lemmatizer.mdx
@@ -14,7 +14,7 @@ implement their own lemmatizer components via
 [language-specific factories](/usage/processing-pipelines#factories-language).
 The default data used is provided by the
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data)
-extension package.
+repository.
 
 For a trainable lemmatizer, see [`EditTreeLemmatizer`](/api/edittreelemmatizer).
 
@@ -174,6 +174,8 @@ training. At runtime, all data is loaded from disk.
 >
 > ```python
 > lemmatizer = nlp.add_pipe("lemmatizer")
+> req_tables, opt_tables = lemmatizer.get_lookups_config(mode=lemmatizer.mode)
+> lookups = load_lookups(nlp.lang, req_tables + opt_tables)
 > lemmatizer.initialize(lookups=lookups)
 > ```
 >
diff --git a/website/docs/api/top-level.mdx b/website/docs/api/top-level.mdx
index b13a6d28b..01690f161 100644
--- a/website/docs/api/top-level.mdx
+++ b/website/docs/api/top-level.mdx
@@ -9,6 +9,7 @@ menu:
   - ['Batchers', 'batchers']
   - ['Augmenters', 'augmenters']
   - ['Callbacks', 'callbacks']
+  - ['Miscellaneous', 'misc']
   - ['Training & Alignment', 'gold']
   - ['Utility Functions', 'util']
 ---
@@ -931,6 +932,54 @@ methods are wrapped: `pipe`, `predict`, `set_annotations`, `update`, `rehearse`,
 | `additional_pipe_functions` | Additional pipeline methods to wrap. Keys are pipeline names and values are lists of method identifiers. Defaults to `None`. ~~Optional[Dict[str, List[str]]]~~ |
 | **CREATES**                 | A function that takes the current `nlp` and wraps pipe models and methods in NVTX ranges. ~~Callable[[Language], Language]~~                                    |
 
+## Miscellaneous {id="misc",version="3"}
+
+### spacy.LookupsDataLoaderFromURL.v1 {id="lookups_data_reader",tag="registered function"}
+
+> #### Example config
+>
+> ```ini
+> [initialize.lookups]
+> @misc = "spacy.LookupsDataLoader.v1"
+> lang = ${nlp.lang}
+> tables = ["lexeme_prob"]
+> ```
+
+Load the specified tables from the [`lookups` registry](#registry), which are
+provided by a package such as
+[`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data).
+
+| Name        | Description                                                                                     |
+| ----------- | ----------------------------------------------------------------------------------------------- |
+| `lang`      | The language. ~~str~~                                                                           |
+| `tables`    | The tables to load. ~~List[str]~~                                                               |
+| **CREATES** | A function that loads the specified tables from the lookups registry. ~~Callable[[], Lookups]~~ |
+
+### spacy.LookupsDataLoaderFromURLFromUrl.v1 {id="lookups_data_reader_from_url",tag="registered function",version="4"}
+
+> #### Example config
+>
+> ```ini
+> [initialize.components.lemmatizer.lookups]
+> @misc = "spacy.LookupsDataLoaderFromURL.v1"
+> lang = ${nlp.lang}
+> url = "https://raw.githubusercontent.com/explosion/spacy-lookups-data/v1.0.3/spacy_lookups_data/data/"
+> tables = ["lemma_rules","lemma_exc","lemma_index"]
+> ```
+
+Load the specified tables from the provided URL. The individual tables are
+expected to have filenames in the format `{lang}_{table}.json` under the
+specified URL directory as in the
+[`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data/spacy_lookups_data/data/)
+repository.
+
+| Name        | Description                                                                                 |
+| ----------- | ------------------------------------------------------------------------------------------- |
+| `lang`      | The language. ~~str~~                                                                       |
+| `url`       | The URL for the directory where the tables can be downloaded. ~~str~~                       |
+| `tables`    | The tables to load. ~~List[str]~~                                                           |
+| **CREATES** | A function that loads the specified tables from the provided URL. ~~Callable[[], Lookups]~~ |
+
 ## Training data and alignment {id="gold",source="spacy/training"}
 
 ### training.offsets_to_biluo_tags {id="offsets_to_biluo_tags",tag="function"}
diff --git a/website/docs/usage/index.mdx b/website/docs/usage/index.mdx
index 07f2bd282..b283d117e 100644
--- a/website/docs/usage/index.mdx
+++ b/website/docs/usage/index.mdx
@@ -59,19 +59,18 @@ $ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
 ```
 
 spaCy also lets you install extra dependencies by specifying the following
-keywords in brackets, e.g. `spacy[ja]` or `spacy[lookups,transformers]` (with
+keywords in brackets, e.g. `spacy[ja]` or `spacy[apple,transformers]` (with
 multiple comma-separated extras). See the `[options.extras_require]` section in
 spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included.
 
 > #### Example
 >
 > ```bash
-> $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS
+> $ pip install %%SPACY_PKG_NAME[apple,transformers]%%SPACY_PKG_FLAGS
 > ```
 
 | Name             | Description                                                                                                                                                                                                                                                    |
 | ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `lookups`        | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
 | `transformers`   | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline.                                                                                    |
 | `cuda`, ...      | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options.                                                                                |
 | `apple`          | Install [`thinc-apple-ops`](https://github.com/explosion/thinc-apple-ops) to improve performance on an Apple M1.                                                                                                                                               |
@@ -174,7 +173,7 @@ $ pip install --no-build-isolation --editable . # compile and install spaCy
 To install with extras:
 
 ```bash
-$ pip install --no-build-isolation --editable .[lookups,cuda102]
+$ pip install --no-build-isolation --editable .[ja,cuda102]
 ```
 
 How to install compilers and related build tools:
diff --git a/website/docs/usage/linguistic-features.mdx b/website/docs/usage/linguistic-features.mdx
index 55d5680fe..add27de07 100644
--- a/website/docs/usage/linguistic-features.mdx
+++ b/website/docs/usage/linguistic-features.mdx
@@ -148,11 +148,11 @@ component.
 
 </Infobox>
 
-The data for spaCy's lemmatizers is distributed in the package
+The data for spaCy's lemmatizers is distributed in the repository
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data). The
 provided trained pipelines already include all the required tables, but if you
-are creating new pipelines, you'll probably want to install `spacy-lookups-data`
-to provide the data when the lemmatizer is initialized.
+are creating new pipelines, you can load data from the repository in the
+lemmatizer initialization.
 
 ### Lookup lemmatizer {id="lemmatizer-lookup"}
 
diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js
index b6c8b9b4c..081040ceb 100644
--- a/website/src/widgets/quickstart-install.js
+++ b/website/src/widgets/quickstart-install.js
@@ -46,7 +46,6 @@ const QuickstartInstall = ({ id, title }) => {
     const pipExtras = [
         hardware === 'gpu' && (platform !== 'arm' || os === 'linux') && cuda,
         train && 'transformers',
-        train && 'lookups',
         apple && 'apple',
         ...modelExtras,
     ]
@@ -210,9 +209,6 @@ const QuickstartInstall = ({ id, title }) => {
             <QS config="train" package="conda" comment prompt={false}>
                 # packages only available via pip
             </QS>
-            <QS config="train" package="conda">
-                pip install spacy-lookups-data
-            </QS>
 
             {languages.map(({ code, models: modelOptions }) => {
                 const pkg = modelOptions[efficiency ? 0 : modelOptions.length - 1]