💫 Make serialization methods consistent (#3385)

* Make serialization methods consistent exclude keyword argument instead of random named keyword arguments and deprecation handling * Update docs and add section on serialization fields
2025-08-29 08:24:54 +03:00 · 2019-03-10 19:16:45 +01:00 · 2019-03-10 19:16:45 +01:00 · 7ba3a5d95c
commit 7ba3a5d95c
parent 9a8f169e5c
25 changed files with 598 additions and 314 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -70,6 +70,12 @@ class Warnings(object):
    W013 = ("As of v2.1.0, {obj}.merge is deprecated. Please use the more "
            "efficient and less error-prone Doc.retokenize context manager "
            "instead.")
+    W014 = ("As of v2.1.0, the `disable` keyword argument on the serialization "
+            "methods is and should be replaced with `exclude`. This makes it "
+            "consistent with the other objects serializable.")
+    W015 = ("As of v2.1.0, the use of keyword arguments to exclude fields from "
+            "being serialized or deserialized is deprecated. Please use the "
+            "`exclude` argument instead. For example: exclude=['{arg}'].")


@add_codes
@ -348,7 +354,10 @@ class Errors(object):
            "This is likely a bug in spaCy, so feel free to open an issue.")
    E127 = ("Cannot create phrase pattern representation for length 0. This "
            "is likely a bug in spaCy.")
-
+    E128 = ("Unsupported serialization argument: '{arg}'. The use of keyword "
+            "arguments to exclude fields from being serialized or deserialized "
+            "is now deprecated. Please use the `exclude` argument instead. "
+            "For example: exclude=['{arg}'].")


@add_codes
--- a/spacy/language.py
+++ b/spacy/language.py
@ -28,7 +28,7 @@ from .lang.punctuation import TOKENIZER_INFIXES
 from .lang.tokenizer_exceptions import TOKEN_MATCH
 from .lang.tag_map import TAG_MAP
 from .lang.lex_attrs import LEX_ATTRS, is_stop
-from .errors import Errors
+from .errors import Errors, Warnings, deprecation_warning
 from . import util
 from . import about

@ -699,124 +699,114 @@ class Language(object):
                        self.tokenizer._reset_cache(keys)
                    nr_seen = 0

-    def to_disk(self, path, disable=tuple()):
+    def to_disk(self, path, exclude=tuple(), disable=None):
        """Save the current state to a directory.  If a model is loaded, this
        will include the model.

-        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be strings or `Path`-like objects.
-        disable (list): Names of pipeline components to disable and prevent
-            from being saved.
+        path (unicode or Path): Path to a directory, which will be created if
+            it doesn't exist.
+        exclude (list): Names of components or serialization fields to exclude.

-        EXAMPLE:
-            >>> nlp.to_disk('/path/to/models')
+        DOCS: https://spacy.io/api/language#to_disk
        """
+        if disable is not None:
+            deprecation_warning(Warnings.W014)
+            exclude = disable
        path = util.ensure_path(path)
-        serializers = OrderedDict(
-            (
-                ("tokenizer", lambda p: self.tokenizer.to_disk(p, vocab=False)),
-                ("meta.json", lambda p: p.open("w").write(srsly.json_dumps(self.meta))),
-            )
-        )
+        serializers = OrderedDict()
+        serializers["tokenizer"] = lambda p: self.tokenizer.to_disk(p, exclude=["vocab"])
+        serializers["meta.json"] = lambda p: p.open("w").write(srsly.json_dumps(self.meta))
        for name, proc in self.pipeline:
            if not hasattr(proc, "name"):
                continue
-            if name in disable:
+            if name in exclude:
                continue
            if not hasattr(proc, "to_disk"):
                continue
-            serializers[name] = lambda p, proc=proc: proc.to_disk(p, vocab=False)
+            serializers[name] = lambda p, proc=proc: proc.to_disk(p, exclude=["vocab"])
        serializers["vocab"] = lambda p: self.vocab.to_disk(p)
-        util.to_disk(path, serializers, {p: False for p in disable})
+        util.to_disk(path, serializers, exclude)

-    def from_disk(self, path, disable=tuple()):
+    def from_disk(self, path, exclude=tuple(), disable=None):
        """Loads state from a directory. Modifies the object in place and
        returns it. If the saved `Language` object contains a model, the
        model will be loaded.

-        path (unicode or Path): A path to a directory. Paths may be either
-            strings or `Path`-like objects.
-        disable (list): Names of the pipeline components to disable.
+        path (unicode or Path): A path to a directory.
+        exclude (list): Names of components or serialization fields to exclude.
        RETURNS (Language): The modified `Language` object.

-        EXAMPLE:
-            >>> from spacy.language import Language
-            >>> nlp = Language().from_disk('/path/to/models')
+        DOCS: https://spacy.io/api/language#from_disk
        """
+        if disable is not None:
+            deprecation_warning(Warnings.W014)
+            exclude = disable
        path = util.ensure_path(path)
-        deserializers = OrderedDict(
-            (
-                ("meta.json", lambda p: self.meta.update(srsly.read_json(p))),
-                (
-                    "vocab",
-                    lambda p: (
-                        self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self)
-                    ),
-                ),
-                ("tokenizer", lambda p: self.tokenizer.from_disk(p, vocab=False)),
-            )
-        )
+        deserializers = OrderedDict()
+        deserializers["meta.json"] = lambda p: self.meta.update(srsly.read_json(p))
+        deserializers["vocab"] = lambda p: self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self)
+        deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(p, exclude=["vocab"])
        for name, proc in self.pipeline:
-            if name in disable:
+            if name in exclude:
                continue
            if not hasattr(proc, "from_disk"):
                continue
-            deserializers[name] = lambda p, proc=proc: proc.from_disk(p, vocab=False)
-        exclude = {p: False for p in disable}
-        if not (path / "vocab").exists():
-            exclude["vocab"] = True
+            deserializers[name] = lambda p, proc=proc: proc.from_disk(p, exclude=["vocab"])
+        if not (path / "vocab").exists() and "vocab" not in exclude:
+            # Convert to list here in case exclude is (default) tuple
+            exclude = list(exclude) + ["vocab"]
        util.from_disk(path, deserializers, exclude)
        self._path = path
        return self

-    def to_bytes(self, disable=[], **exclude):
+    def to_bytes(self, exclude=tuple(), disable=None, **kwargs):
        """Serialize the current state to a binary string.

-        disable (list): Nameds of pipeline components to disable and prevent
-            from being serialized.
+        exclude (list): Names of components or serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Language` object.
+
+        DOCS: https://spacy.io/api/language#to_bytes
        """
-        serializers = OrderedDict(
-            (
-                ("vocab", lambda: self.vocab.to_bytes()),
-                ("tokenizer", lambda: self.tokenizer.to_bytes(vocab=False)),
-                ("meta", lambda: srsly.json_dumps(self.meta)),
-            )
-        )
-        for i, (name, proc) in enumerate(self.pipeline):
-            if name in disable:
+        if disable is not None:
+            deprecation_warning(Warnings.W014)
+            exclude = disable
+        serializers = OrderedDict()
+        serializers["vocab"] = lambda: self.vocab.to_bytes()
+        serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"])
+        serializers["meta.json"] = lambda: srsly.json_dumps(self.meta)
+        for name, proc in self.pipeline:
+            if name in exclude:
                continue
            if not hasattr(proc, "to_bytes"):
                continue
-            serializers[i] = lambda proc=proc: proc.to_bytes(vocab=False)
+            serializers[name] = lambda proc=proc: proc.to_bytes(exclude=["vocab"])
+        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)

-    def from_bytes(self, bytes_data, disable=[]):
+    def from_bytes(self, bytes_data, exclude=tuple(), disable=None, **kwargs):
        """Load state from a binary string.

        bytes_data (bytes): The data to load from.
-        disable (list): Names of the pipeline components to disable.
+        exclude (list): Names of components or serialization fields to exclude.
        RETURNS (Language): The `Language` object.
+
+        DOCS: https://spacy.io/api/language#from_bytes
        """
-        deserializers = OrderedDict(
-            (
-                ("meta", lambda b: self.meta.update(srsly.json_loads(b))),
-                (
-                    "vocab",
-                    lambda b: (
-                        self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self)
-                    ),
-                ),
-                ("tokenizer", lambda b: self.tokenizer.from_bytes(b, vocab=False)),
-            )
-        )
-        for i, (name, proc) in enumerate(self.pipeline):
-            if name in disable:
+        if disable is not None:
+            deprecation_warning(Warnings.W014)
+            exclude = disable
+        deserializers = OrderedDict()
+        deserializers["meta.json"] = lambda b: self.meta.update(srsly.json_loads(b))
+        deserializers["vocab"] = lambda b: self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self)
+        deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(b, exclude=["vocab"])
+        for name, proc in self.pipeline:
+            if name in exclude:
                continue
            if not hasattr(proc, "from_bytes"):
                continue
-            deserializers[i] = lambda b, proc=proc: proc.from_bytes(b, vocab=False)
-        util.from_bytes(bytes_data, deserializers, {})
+            deserializers[name] = lambda b, proc=proc: proc.from_bytes(b, exclude=["vocab"])
+        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
+        util.from_bytes(bytes_data, deserializers, exclude)
        return self


--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -141,16 +141,21 @@ class Pipe(object):
        with self.model.use_params(params):
            yield

-    def to_bytes(self, **exclude):
-        """Serialize the pipe to a bytestring."""
+    def to_bytes(self, exclude=tuple(), **kwargs):
+        """Serialize the pipe to a bytestring.
+
+        exclude (list): String names of serialization fields to exclude.
+        RETURNS (bytes): The serialized object.
+        """
        serialize = OrderedDict()
        serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
        if self.model not in (True, False, None):
            serialize["model"] = self.model.to_bytes
        serialize["vocab"] = self.vocab.to_bytes
+        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        return util.to_bytes(serialize, exclude)

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Load the pipe from a bytestring."""

        def load_model(b):
@ -161,26 +166,25 @@ class Pipe(object):
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(b)

-        deserialize = OrderedDict(
-            (
-                ("cfg", lambda b: self.cfg.update(srsly.json_loads(b))),
-                ("vocab", lambda b: self.vocab.from_bytes(b)),
-                ("model", load_model),
-            )
-        )
+        deserialize = OrderedDict()
+        deserialize["cfg"] = lambda b: self.cfg.update(srsly.json_loads(b))
+        deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
+        deserialize["model"] = load_model
+        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        """Serialize the pipe to disk."""
        serialize = OrderedDict()
        serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
        serialize["vocab"] = lambda p: self.vocab.to_disk(p)
        if self.model not in (None, True, False):
            serialize["model"] = lambda p: p.open("wb").write(self.model.to_bytes())
+        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        util.to_disk(path, serialize, exclude)

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        """Load the pipe from disk."""

        def load_model(p):
@ -191,13 +195,11 @@ class Pipe(object):
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(p.open("rb").read())

-        deserialize = OrderedDict(
-            (
-                ("cfg", lambda p: self.cfg.update(_load_cfg(p))),
-                ("vocab", lambda p: self.vocab.from_disk(p)),
-                ("model", load_model),
-            )
-        )
+        deserialize = OrderedDict()
+        deserialize["cfg"] = lambda p: self.cfg.update(_load_cfg(p))
+        deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
+        deserialize["model"] = load_model
+        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
        util.from_disk(path, deserialize, exclude)
        return self

@ -537,7 +539,7 @@ class Tagger(Pipe):
        with self.model.use_params(params):
            yield

-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        serialize = OrderedDict()
        if self.model not in (None, True, False):
            serialize["model"] = self.model.to_bytes
@ -545,9 +547,10 @@ class Tagger(Pipe):
        serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
        tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
        serialize["tag_map"] = lambda: srsly.msgpack_dumps(tag_map)
+        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        return util.to_bytes(serialize, exclude)

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        def load_model(b):
            # TODO: Remove this once we don't have to handle previous models
            if self.cfg.get("pretrained_dims") and "pretrained_vectors" not in self.cfg:
@ -572,20 +575,22 @@ class Tagger(Pipe):
            ("cfg", lambda b: self.cfg.update(srsly.json_loads(b))),
            ("model", lambda b: load_model(b)),
        ))
+        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
        serialize = OrderedDict((
-            ('vocab', lambda p: self.vocab.to_disk(p)),
-            ('tag_map', lambda p: srsly.write_msgpack(p, tag_map)),
-            ('model', lambda p: p.open("wb").write(self.model.to_bytes())),
-            ('cfg', lambda p: srsly.write_json(p, self.cfg))
+            ("vocab", lambda p: self.vocab.to_disk(p)),
+            ("tag_map", lambda p: srsly.write_msgpack(p, tag_map)),
+            ("model", lambda p: p.open("wb").write(self.model.to_bytes())),
+            ("cfg", lambda p: srsly.write_json(p, self.cfg))
        ))
+        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        util.to_disk(path, serialize, exclude)

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        def load_model(p):
            # TODO: Remove this once we don't have to handle previous models
            if self.cfg.get("pretrained_dims") and "pretrained_vectors" not in self.cfg:
@ -608,6 +613,7 @@ class Tagger(Pipe):
            ("tag_map", load_tag_map),
            ("model", load_model),
        ))
+        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
        util.from_disk(path, deserialize, exclude)
        return self

--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -236,19 +236,17 @@ cdef class StringStore:
            self.add(word)
        return self

-    def to_bytes(self, **exclude):
+    def to_bytes(self, **kwargs):
        """Serialize the current state to a binary string.

-        **exclude: Named attributes to prevent from being serialized.
        RETURNS (bytes): The serialized form of the `StringStore` object.
        """
        return srsly.json_dumps(list(self))

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, **kwargs):
        """Load state from a binary string.

        bytes_data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
        RETURNS (StringStore): The `StringStore` object.
        """
        strings = srsly.json_loads(bytes_data)
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -228,7 +228,7 @@ cdef class Parser:
                self.set_annotations(subbatch, parse_states, tensors=None)
            for doc in batch_in_order:
                yield doc
-                
+
    def require_model(self):
        """Raise an error if the component's model is not initialized."""
        if getattr(self, 'model', None) in (None, True, False):
@ -272,7 +272,7 @@ cdef class Parser:
        beams = self.moves.init_beams(docs, beam_width, beam_density=beam_density)
        # This is pretty dirty, but the NER can resize itself in init_batch,
        # if labels are missing. We therefore have to check whether we need to
-        # expand our model output. 
+        # expand our model output.
        self.model.resize_output(self.moves.n_moves)
        model = self.model(docs)
        token_ids = numpy.zeros((len(docs) * beam_width, self.nr_feature),
@ -442,7 +442,7 @@ cdef class Parser:
        if self._rehearsal_model is None:
            return None
        losses.setdefault(self.name, 0.)
- 
+
        states = self.moves.init_batch(docs)
        # This is pretty dirty, but the NER can resize itself in init_batch,
        # if labels are missing. We therefore have to check whether we need to
@ -603,22 +603,24 @@ cdef class Parser:
        self.cfg.update(cfg)
        return sgd

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        serializers = {
            'model': lambda p: (self.model.to_disk(p) if self.model is not True else True),
            'vocab': lambda p: self.vocab.to_disk(p),
-            'moves': lambda p: self.moves.to_disk(p, strings=False),
+            'moves': lambda p: self.moves.to_disk(p, exclude=["strings"]),
            'cfg': lambda p: srsly.write_json(p, self.cfg)
        }
+        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        util.to_disk(path, serializers, exclude)

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        deserializers = {
            'vocab': lambda p: self.vocab.from_disk(p),
-            'moves': lambda p: self.moves.from_disk(p, strings=False),
+            'moves': lambda p: self.moves.from_disk(p, exclude=["strings"]),
            'cfg': lambda p: self.cfg.update(srsly.read_json(p)),
            'model': lambda p: None
        }
+        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        util.from_disk(path, deserializers, exclude)
        if 'model' not in exclude:
            path = util.ensure_path(path)
@ -632,22 +634,24 @@ cdef class Parser:
            self.cfg.update(cfg)
        return self

-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        serializers = OrderedDict((
            ('model', lambda: (self.model.to_bytes() if self.model is not True else True)),
            ('vocab', lambda: self.vocab.to_bytes()),
-            ('moves', lambda: self.moves.to_bytes(strings=False)),
+            ('moves', lambda: self.moves.to_bytes(exclude=["strings"])),
            ('cfg', lambda: srsly.json_dumps(self.cfg, indent=2, sort_keys=True))
        ))
+        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        deserializers = OrderedDict((
            ('vocab', lambda b: self.vocab.from_bytes(b)),
-            ('moves', lambda b: self.moves.from_bytes(b, strings=False)),
+            ('moves', lambda b: self.moves.from_bytes(b, exclude=["strings"])),
            ('cfg', lambda b: self.cfg.update(srsly.json_loads(b))),
            ('model', lambda b: None)
        ))
+        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        if 'model' not in exclude:
            # TODO: Remove this once we don't have to handle previous models
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@ -208,30 +208,32 @@ cdef class TransitionSystem:
        self.labels[action][label_name] = new_freq-1
        return 1

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        with path.open('wb') as file_:
-            file_.write(self.to_bytes(**exclude))
+            file_.write(self.to_bytes(**kwargs))

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        with path.open('rb') as file_:
            byte_data = file_.read()
-        self.from_bytes(byte_data, **exclude)
+        self.from_bytes(byte_data, **kwargs)
        return self

-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        transitions = []
        serializers = {
            'moves': lambda: srsly.json_dumps(self.labels),
            'strings': lambda: self.strings.to_bytes()
        }
+        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        labels = {}
        deserializers = {
            'moves': lambda b: labels.update(srsly.json_loads(b)),
            'strings': lambda b: self.strings.from_bytes(b)
        }
+        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        self.initialize_actions(labels)
        return self
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -113,14 +113,14 @@ def test_doc_api_serialize(en_tokenizer, text):
    assert [t.orth for t in tokens] == [t.orth for t in new_tokens]

    new_tokens = Doc(tokens.vocab).from_bytes(
-        tokens.to_bytes(tensor=False), tensor=False
+        tokens.to_bytes(exclude=["tensor"]), exclude=["tensor"]
    )
    assert tokens.text == new_tokens.text
    assert [t.text for t in tokens] == [t.text for t in new_tokens]
    assert [t.orth for t in tokens] == [t.orth for t in new_tokens]

    new_tokens = Doc(tokens.vocab).from_bytes(
-        tokens.to_bytes(sentiment=False), sentiment=False
+        tokens.to_bytes(exclude=["sentiment"]), exclude=["sentiment"]
    )
    assert tokens.text == new_tokens.text
    assert [t.text for t in tokens] == [t.text for t in new_tokens]
--- a/spacy/tests/serialize/test_serialize_doc.py
+++ b/spacy/tests/serialize/test_serialize_doc.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import pytest
 from spacy.tokens import Doc
 from spacy.compat import path2str

@ -41,3 +42,18 @@ def test_serialize_doc_roundtrip_disk_str_path(en_vocab):
        doc.to_disk(file_path)
        doc_d = Doc(en_vocab).from_disk(file_path)
        assert doc.to_bytes() == doc_d.to_bytes()
+
+
+def test_serialize_doc_exclude(en_vocab):
+    doc = Doc(en_vocab, words=["hello", "world"])
+    doc.user_data["foo"] = "bar"
+    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
+    assert new_doc.user_data["foo"] == "bar"
+    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes(), exclude=["user_data"])
+    assert not new_doc.user_data
+    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes(exclude=["user_data"]))
+    assert not new_doc.user_data
+    with pytest.raises(ValueError):
+        doc.to_bytes(user_data=False)
+    with pytest.raises(ValueError):
+        Doc(en_vocab).from_bytes(doc.to_bytes(), tensor=False)
--- a/spacy/tests/serialize/test_serialize_language.py
+++ b/spacy/tests/serialize/test_serialize_language.py
@ -52,3 +52,19 @@ def test_serialize_with_custom_tokenizer():
    nlp.tokenizer = custom_tokenizer(nlp)
    with make_tempdir() as d:
        nlp.to_disk(d)
+
+
+def test_serialize_language_exclude(meta_data):
+    name = "name-in-fixture"
+    nlp = Language(meta=meta_data)
+    assert nlp.meta["name"] == name
+    new_nlp = Language().from_bytes(nlp.to_bytes())
+    assert nlp.meta["name"] == name
+    new_nlp = Language().from_bytes(nlp.to_bytes(), exclude=["meta"])
+    assert not new_nlp.meta["name"] == name
+    new_nlp = Language().from_bytes(nlp.to_bytes(exclude=["meta"]))
+    assert not new_nlp.meta["name"] == name
+    with pytest.raises(ValueError):
+        nlp.to_bytes(meta=False)
+    with pytest.raises(ValueError):
+        Language().from_bytes(nlp.to_bytes(), meta=False)
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@ -55,7 +55,9 @@ def test_serialize_parser_roundtrip_disk(en_vocab, Parser):
        parser_d = Parser(en_vocab)
        parser_d.model, _ = parser_d.Model(0)
        parser_d = parser_d.from_disk(file_path)
-        assert parser.to_bytes(model=False) == parser_d.to_bytes(model=False)
+        parser_bytes = parser.to_bytes(exclude=["model"])
+        parser_d_bytes = parser_d.to_bytes(exclude=["model"])
+        assert parser_bytes == parser_d_bytes


 def test_to_from_bytes(parser, blank_parser):
@ -114,3 +116,25 @@ def test_serialize_textcat_empty(en_vocab):
    # See issue #1105
    textcat = TextCategorizer(en_vocab, labels=["ENTITY", "ACTION", "MODIFIER"])
    textcat.to_bytes()
+
+
+@pytest.mark.parametrize("Parser", test_parsers)
+def test_serialize_pipe_exclude(en_vocab, Parser):
+    def get_new_parser():
+        new_parser = Parser(en_vocab)
+        new_parser.model, _ = new_parser.Model(0)
+        return new_parser
+
+    parser = Parser(en_vocab)
+    parser.model, _ = parser.Model(0)
+    parser.cfg["foo"] = "bar"
+    new_parser = get_new_parser().from_bytes(parser.to_bytes())
+    assert "foo" in new_parser.cfg
+    new_parser = get_new_parser().from_bytes(parser.to_bytes(), exclude=["cfg"])
+    assert "foo" not in new_parser.cfg
+    new_parser = get_new_parser().from_bytes(parser.to_bytes(exclude=["cfg"]))
+    assert "foo" not in new_parser.cfg
+    with pytest.raises(ValueError):
+        parser.to_bytes(cfg=False)
+    with pytest.raises(ValueError):
+        get_new_parser().from_bytes(parser.to_bytes(), cfg=False)
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -360,36 +360,37 @@ cdef class Tokenizer:
        self._cache.set(key, cached)
        self._rules[string] = substrings

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.

        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or Path-like objects.
+            it doesn't exist.
+        exclude (list): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/tokenizer#to_disk
        """
        with path.open("wb") as file_:
-            file_.write(self.to_bytes(**exclude))
+            file_.write(self.to_bytes(**kwargs))

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.

-        path (unicode or Path): A path to a directory. Paths may be either
-            strings or `Path`-like objects.
+        path (unicode or Path): A path to a directory.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Tokenizer): The modified `Tokenizer` object.

        DOCS: https://spacy.io/api/tokenizer#from_disk
        """
        with path.open("rb") as file_:
            bytes_data = file_.read()
-        self.from_bytes(bytes_data, **exclude)
+        self.from_bytes(bytes_data, **kwargs)
        return self

-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        """Serialize the current state to a binary string.

-        **exclude: Named attributes to prevent from being serialized.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Tokenizer` object.

        DOCS: https://spacy.io/api/tokenizer#to_bytes
@ -402,13 +403,14 @@ cdef class Tokenizer:
            ("token_match", lambda: _get_regex_pattern(self.token_match)),
            ("exceptions", lambda: OrderedDict(sorted(self._rules.items())))
        ))
+        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Load state from a binary string.

        bytes_data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Tokenizer): The `Tokenizer` object.

        DOCS: https://spacy.io/api/tokenizer#from_bytes
@ -422,6 +424,7 @@ cdef class Tokenizer:
            ("token_match", lambda b: data.setdefault("token_match", b)),
            ("exceptions", lambda b: data.setdefault("rules", b))
        ))
+        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        if data.get("prefix_search"):
            self.prefix_search = re.compile(data["prefix_search"]).search
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -794,24 +794,26 @@ cdef class Doc:
        """
        return numpy.asarray(_get_lca_matrix(self, 0, len(self)))

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.

        path (unicode or Path): A path to a directory, which will be created if
            it doesn't exist. Paths may be either strings or Path-like objects.
+        exclude (list): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/doc#to_disk
        """
        path = util.ensure_path(path)
        with path.open("wb") as file_:
-            file_.write(self.to_bytes(**exclude))
+            file_.write(self.to_bytes(**kwargs))

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.

        path (unicode or Path): A path to a directory. Paths may be either
            strings or `Path`-like objects.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Doc): The modified `Doc` object.

        DOCS: https://spacy.io/api/doc#from_disk
@ -819,11 +821,12 @@ cdef class Doc:
        path = util.ensure_path(path)
        with path.open("rb") as file_:
            bytes_data = file_.read()
-        return self.from_bytes(bytes_data, **exclude)
+        return self.from_bytes(bytes_data, **kwargs)

-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        """Serialize, i.e. export the document contents to a binary string.

+        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
            all annotations.

@ -849,16 +852,22 @@ cdef class Doc:
            "sentiment": lambda: self.sentiment,
            "tensor": lambda: self.tensor,
        }
+        for key in kwargs:
+            if key in serializers or key in ("user_data", "user_data_keys", "user_data_values"):
+                raise ValueError(Errors.E128.format(arg=key))
        if "user_data" not in exclude and self.user_data:
            user_data_keys, user_data_values = list(zip(*self.user_data.items()))
-            serializers["user_data_keys"] = lambda: srsly.msgpack_dumps(user_data_keys)
-            serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
+            if "user_data_keys" not in exclude:
+                serializers["user_data_keys"] = lambda: srsly.msgpack_dumps(user_data_keys)
+            if "user_data_values" not in exclude:
+                serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
        return util.to_bytes(serializers, exclude)

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Deserialize, i.e. import the document contents from a binary string.

        data (bytes): The string to load from.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Doc): Itself.

        DOCS: https://spacy.io/api/doc#from_bytes
@ -874,6 +883,9 @@ cdef class Doc:
            "user_data_keys": lambda b: None,
            "user_data_values": lambda b: None,
        }
+        for key in kwargs:
+            if key in deserializers or key in ("user_data",):
+                raise ValueError(Errors.E128.format(arg=key))
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        # Msgpack doesn't distinguish between lists and tuples, which is
        # vexing for user data. As a best guess, we *know* that within
@ -1170,7 +1182,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):


 def pickle_doc(doc):
-    bytes_data = doc.to_bytes(vocab=False, user_data=False)
+    bytes_data = doc.to_bytes(exclude=["vocab", "user_data"])
    hooks_and_data = (doc.user_data, doc.user_hooks, doc.user_span_hooks,
                      doc.user_token_hooks)
    return (unpickle_doc, (doc.vocab, srsly.pickle_dumps(hooks_and_data), bytes_data))
@ -1179,7 +1191,7 @@ def pickle_doc(doc):
 def unpickle_doc(vocab, hooks_and_data, bytes_data):
    user_data, doc_hooks, span_hooks, token_hooks = srsly.pickle_loads(hooks_and_data)

-    doc = Doc(vocab, user_data=user_data).from_bytes(bytes_data, exclude="user_data")
+    doc = Doc(vocab, user_data=user_data).from_bytes(bytes_data, exclude=["user_data"])
    doc.user_hooks.update(doc_hooks)
    doc.user_span_hooks.update(span_hooks)
    doc.user_token_hooks.update(token_hooks)
--- a/spacy/util.py
+++ b/spacy/util.py
@ -25,7 +25,7 @@ except ImportError:
 from .symbols import ORTH
 from .compat import cupy, CudaStream, path2str, basestring_, unicode_
 from .compat import import_file
-from .errors import Errors
+from .errors import Errors, Warnings, deprecation_warning


 LANGUAGES = {}
@ -565,7 +565,8 @@ def itershuffle(iterable, bufsize=1000):
 def to_bytes(getters, exclude):
    serialized = OrderedDict()
    for key, getter in getters.items():
-        if key not in exclude:
+        # Split to support file names like meta.json
+        if key.split(".")[0] not in exclude:
            serialized[key] = getter()
    return srsly.msgpack_dumps(serialized)

@ -573,7 +574,8 @@ def to_bytes(getters, exclude):
 def from_bytes(bytes_data, setters, exclude):
    msg = srsly.msgpack_loads(bytes_data)
    for key, setter in setters.items():
-        if key not in exclude and key in msg:
+        # Split to support file names like meta.json
+        if key.split(".")[0] not in exclude and key in msg:
            setter(msg[key])
    return msg

@ -583,7 +585,8 @@ def to_disk(path, writers, exclude):
    if not path.exists():
        path.mkdir()
    for key, writer in writers.items():
-        if key not in exclude:
+        # Split to support file names like meta.json
+        if key.split(".")[0] not in exclude:
            writer(path / key)
    return path

@ -591,7 +594,8 @@ def to_disk(path, writers, exclude):
 def from_disk(path, readers, exclude):
    path = ensure_path(path)
    for key, reader in readers.items():
-        if key not in exclude:
+        # Split to support file names like meta.json
+        if key.split(".")[0] not in exclude:
            reader(path / key)
    return path

@ -677,6 +681,23 @@ def validate_json(data, validator):
    return errors


+def get_serialization_exclude(serializers, exclude, kwargs):
+    """Helper function to validate serialization args and manage transition from
+    keyword arguments (pre v2.1) to exclude argument.
+    """
+    exclude = list(exclude)
+    # Split to support file names like meta.json
+    options = [name.split(".")[0] for name in serializers]
+    for key, value in kwargs.items():
+        if key in ("vocab",) and value is False:
+            deprecation_warning(Warnings.W015.format(arg=key))
+            exclude.append(key)
+        elif key.split(".")[0] in options:
+            raise ValueError(Errors.E128.format(arg=key))
+        # TODO: user warning?
+    return exclude
+
+
 class SimpleFrozenDict(dict):
    """Simplified implementation of a frozen dict, mainly used as default
    function or method argument (for arguments that should default to empty
@ -696,14 +717,14 @@ class SimpleFrozenDict(dict):
 class DummyTokenizer(object):
    # add dummy methods for to_bytes, from_bytes, to_disk and from_disk to
    # allow serialization (see #1557)
-    def to_bytes(self, **exclude):
+    def to_bytes(self, **kwargs):
        return b""

-    def from_bytes(self, _bytes_data, **exclude):
+    def from_bytes(self, _bytes_data, **kwargs):
        return self

-    def to_disk(self, _path, **exclude):
+    def to_disk(self, _path, **kwargs):
        return None

-    def from_disk(self, _path, **exclude):
+    def from_disk(self, _path, **kwargs):
        return self
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@ -377,11 +377,11 @@ cdef class Vectors:
                self.add(key, row=i)
        return strings

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.

        path (unicode / Path): A path to a directory, which will be created if
-            it doesn't exists. Either a string or a Path-like object.
+            it doesn't exists.

        DOCS: https://spacy.io/api/vectors#to_disk
        """
@ -394,9 +394,9 @@ cdef class Vectors:
            ("vectors", lambda p: save_array(self.data, p.open("wb"))),
            ("key2row", lambda p: srsly.write_msgpack(p, self.key2row))
        ))
-        return util.to_disk(path, serializers, exclude)
+        return util.to_disk(path, serializers, [])

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.

@ -428,13 +428,13 @@ cdef class Vectors:
            ("keys", load_keys),
            ("vectors", load_vectors),
        ))
-        util.from_disk(path, serializers, exclude)
+        util.from_disk(path, serializers, [])
        return self

-    def to_bytes(self, **exclude):
+    def to_bytes(self, **kwargs):
        """Serialize the current state to a binary string.

-        **exclude: Named attributes to prevent from being serialized.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Vectors` object.

        DOCS: https://spacy.io/api/vectors#to_bytes
@ -444,17 +444,18 @@ cdef class Vectors:
                return self.data.to_bytes()
            else:
                return srsly.msgpack_dumps(self.data)
+
        serializers = OrderedDict((
            ("key2row", lambda: srsly.msgpack_dumps(self.key2row)),
            ("vectors", serialize_weights)
        ))
-        return util.to_bytes(serializers, exclude)
+        return util.to_bytes(serializers, [])

-    def from_bytes(self, data, **exclude):
+    def from_bytes(self, data, **kwargs):
        """Load state from a binary string.

        data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Vectors): The `Vectors` object.

        DOCS: https://spacy.io/api/vectors#from_bytes
@ -469,5 +470,5 @@ cdef class Vectors:
            ("key2row", lambda b: self.key2row.update(srsly.msgpack_loads(b))),
            ("vectors", deserialize_weights)
        ))
-        util.from_bytes(data, deserializers, exclude)
+        util.from_bytes(data, deserializers, [])
        return self
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -397,47 +397,57 @@ cdef class Vocab:
            orth = self.strings.add(orth)
        return orth in self.vectors

-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        """Save the current state to a directory.

        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or Path-like objects.
+            it doesn't exist.
+        exclude (list): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/vocab#to_disk
        """
        path = util.ensure_path(path)
        if not path.exists():
            path.mkdir()
-        self.strings.to_disk(path / "strings.json")
-        with (path / "lexemes.bin").open('wb') as file_:
-            file_.write(self.lexemes_to_bytes())
-        if self.vectors is not None:
+        setters = ["strings", "lexemes", "vectors"]
+        exclude = util.get_serialization_exclude(setters, exclude, kwargs)
+        if "strings" not in exclude:
+            self.strings.to_disk(path / "strings.json")
+        if "lexemes" not in exclude:
+            with (path / "lexemes.bin").open("wb") as file_:
+                file_.write(self.lexemes_to_bytes())
+        if "vectors" not in "exclude" and self.vectors is not None:
            self.vectors.to_disk(path)

-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.

-        path (unicode or Path): A path to a directory. Paths may be either
-            strings or `Path`-like objects.
+        path (unicode or Path): A path to a directory.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Vocab): The modified `Vocab` object.

        DOCS: https://spacy.io/api/vocab#to_disk
        """
        path = util.ensure_path(path)
-        self.strings.from_disk(path / "strings.json")
-        with (path / "lexemes.bin").open("rb") as file_:
-            self.lexemes_from_bytes(file_.read())
-        if self.vectors is not None:
-            self.vectors.from_disk(path, exclude="strings.json")
-        if self.vectors.name is not None:
-            link_vectors_to_models(self)
+        getters = ["strings", "lexemes", "vectors"]
+        exclude = util.get_serialization_exclude(getters, exclude, kwargs)
+        if "strings" not in exclude:
+            self.strings.from_disk(path / "strings.json")  # TODO: add exclude?
+        if "lexemes" not in exclude:
+            with (path / "lexemes.bin").open("rb") as file_:
+                self.lexemes_from_bytes(file_.read())
+        if "vectors" not in exclude:
+            if self.vectors is not None:
+                self.vectors.from_disk(path, exclude=["strings"])
+            if self.vectors.name is not None:
+                link_vectors_to_models(self)
        return self

-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        """Serialize the current state to a binary string.

-        **exclude: Named attributes to prevent from being serialized.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Vocab` object.

        DOCS: https://spacy.io/api/vocab#to_bytes
@ -453,13 +463,14 @@ cdef class Vocab:
            ("lexemes", lambda: self.lexemes_to_bytes()),
            ("vectors", deserialize_vectors)
        ))
+        exclude = util.get_serialization_exclude(getters, exclude, kwargs)
        return util.to_bytes(getters, exclude)

-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Load state from a binary string.

        bytes_data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Vocab): The `Vocab` object.

        DOCS: https://spacy.io/api/vocab#from_bytes
@ -469,11 +480,13 @@ cdef class Vocab:
                return None
            else:
                return self.vectors.from_bytes(b)
+
        setters = OrderedDict((
            ("strings", lambda b: self.strings.from_bytes(b)),
            ("lexemes", lambda b: self.lexemes_from_bytes(b)),
            ("vectors", lambda b: serialize_vectors(b))
        ))
+        exclude = util.get_serialization_exclude(setters, exclude, kwargs)
        util.from_bytes(bytes_data, setters, exclude)
        if self.vectors.name is not None:
            link_vectors_to_models(self)
--- a/website/docs/api/dependencyparser.md
+++ b/website/docs/api/dependencyparser.md
@ -244,9 +244,10 @@ Serialize the pipe to disk.
 > parser.to_disk("/path/to/parser")
 > ```

-| Name   | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| Name      | Type             | Description                                                                                                           |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## DependencyParser.from_disk {#from_disk tag="method"}

@ -262,6 +263,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type               | Description                                                                |
 | ----------- | ------------------ | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path`   | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list               | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `DependencyParser` | The modified `DependencyParser` object.                                    |

 ## DependencyParser.to_bytes {#to_bytes tag="method"}
@ -275,10 +277,10 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type  | Description                                           |
-| ----------- | ----- | ----------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized.    |
-| **RETURNS** | bytes | The serialized form of the `DependencyParser` object. |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | The serialized form of the `DependencyParser` object.                     |

 ## DependencyParser.from_bytes {#from_bytes tag="method"}

@ -292,11 +294,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > parser.from_bytes(parser_bytes)
 > ```

-| Name         | Type               | Description                                    |
-| ------------ | ------------------ | ---------------------------------------------- |
-| `bytes_data` | bytes              | The data to load from.                         |
-| `**exclude`  | -                  | Named attributes to prevent from being loaded. |
-| **RETURNS**  | `DependencyParser` | The `DependencyParser` object.                 |
+| Name         | Type               | Description                                                               |
+| ------------ | ------------------ | ------------------------------------------------------------------------- |
+| `bytes_data` | bytes              | The data to load from.                                                    |
+| `exclude`    | list               | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**  | `DependencyParser` | The `DependencyParser` object.                                            |

 ## DependencyParser.labels {#labels tag="property"}

@ -312,3 +314,21 @@ The labels currently added to the component.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = parser.to_disk("/path", exclude=["vocab"])
+> ```
+
+| Name    | Description                                                    |
+| ------- | -------------------------------------------------------------- |
+| `vocab` | The shared [`Vocab`](/api/vocab).                              |
+| `cfg`   | The config file. You usually don't want to exclude this.       |
+| `model` | The binary model data. You usually don't want to exclude this. |
--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@ -349,11 +349,12 @@ array of attributes.
 > assert doc[0].pos_ == doc2[0].pos_
 > ```

-| Name        | Type                                   | Description                   |
-| ----------- | -------------------------------------- | ----------------------------- |
-| `attrs`     | list                                   | A list of attribute ID ints.  |
-| `array`     | `numpy.ndarray[ndim=2, dtype='int32']` | The attribute values to load. |
-| **RETURNS** | `Doc`                                  | Itself.                       |
+| Name        | Type                                   | Description                                                               |
+| ----------- | -------------------------------------- | ------------------------------------------------------------------------- |
+| `attrs`     | list                                   | A list of attribute ID ints.                                              |
+| `array`     | `numpy.ndarray[ndim=2, dtype='int32']` | The attribute values to load.                                             |
+| `exclude`   | list                                   | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | `Doc`                                  | Itself.                                                                   |

 ## Doc.to_disk {#to_disk tag="method" new="2"}

@ -365,9 +366,10 @@ Save the current state to a directory.
 > doc.to_disk("/path/to/doc")
 > ```

-| Name   | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| Name      | Type             | Description                                                                                                           |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Doc.from_disk {#from_disk tag="method" new="2"}

@ -384,6 +386,7 @@ Loads state from a directory. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Doc`            | The modified `Doc` object.                                                 |

 ## Doc.to_bytes {#to_bytes tag="method"}
@ -397,9 +400,10 @@ Serialize, i.e. export the document contents to a binary string.
 > doc_bytes = doc.to_bytes()
 > ```

-| Name        | Type  | Description                                                           |
-| ----------- | ----- | --------------------------------------------------------------------- |
-| **RETURNS** | bytes | A losslessly serialized copy of the `Doc`, including all annotations. |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | A losslessly serialized copy of the `Doc`, including all annotations.     |

 ## Doc.from_bytes {#from_bytes tag="method"}

@ -416,10 +420,11 @@ Deserialize, i.e. import the document contents from a binary string.
 > assert doc.text == doc2.text
 > ```

-| Name        | Type  | Description              |
-| ----------- | ----- | ------------------------ |
-| `data`      | bytes | The string to load from. |
-| **RETURNS** | `Doc` | The `Doc` object.        |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `data`      | bytes | The string to load from.                                                  |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | `Doc` | The `Doc` object.                                                         |

 ## Doc.retokenize {#retokenize tag="contextmanager" new="2.1"}

@ -658,3 +663,25 @@ The L2 norm of the document's vector representation.
 | `user_token_hooks`                      | dict         | A dictionary that allows customization of properties of `Token` children.                                                                                                                                                                                                                  |
 | `user_span_hooks`                       | dict         | A dictionary that allows customization of properties of `Span` children.                                                                                                                                                                                                                   |
 | `_`                                     | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                                                                                                                             |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = doc.to_bytes(exclude=["text", "tensor"])
+> doc.from_disk("./doc.bin", exclude=["user_data"])
+> ```
+
+| Name               | Description                                   |
+| ------------------ | --------------------------------------------- |
+| `text`             | The value of the `Doc.text` attribute.        |
+| `sentiment`        | The value of the `Doc.sentiment` attribute.   |
+| `tensor`           | The value of the `Doc.tensor` attribute.      |
+| `user_data`        | The value of the `Doc.user_data` dictionary.  |
+| `user_data_keys`   | The keys of the `Doc.user_data` dictionary.   |
+| `user_data_values` | The values of the `Doc.user_data` dictionary. |
--- a/website/docs/api/entityrecognizer.md
+++ b/website/docs/api/entityrecognizer.md
@ -244,9 +244,10 @@ Serialize the pipe to disk.
 > ner.to_disk("/path/to/ner")
 > ```

-| Name   | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| Name      | Type             | Description                                                                                                           |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## EntityRecognizer.from_disk {#from_disk tag="method"}

@ -262,6 +263,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type               | Description                                                                |
 | ----------- | ------------------ | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path`   | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list               | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `EntityRecognizer` | The modified `EntityRecognizer` object.                                    |

 ## EntityRecognizer.to_bytes {#to_bytes tag="method"}
@ -275,10 +277,10 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type  | Description                                           |
-| ----------- | ----- | ----------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized.    |
-| **RETURNS** | bytes | The serialized form of the `EntityRecognizer` object. |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | The serialized form of the `EntityRecognizer` object.                     |

 ## EntityRecognizer.from_bytes {#from_bytes tag="method"}

@ -292,11 +294,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > ner.from_bytes(ner_bytes)
 > ```

-| Name         | Type               | Description                                    |
-| ------------ | ------------------ | ---------------------------------------------- |
-| `bytes_data` | bytes              | The data to load from.                         |
-| `**exclude`  | -                  | Named attributes to prevent from being loaded. |
-| **RETURNS**  | `EntityRecognizer` | The `EntityRecognizer` object.                 |
+| Name         | Type               | Description                                                               |
+| ------------ | ------------------ | ------------------------------------------------------------------------- |
+| `bytes_data` | bytes              | The data to load from.                                                    |
+| `exclude`    | list               | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**  | `EntityRecognizer` | The `EntityRecognizer` object.                                            |

 ## EntityRecognizer.labels {#labels tag="property"}

@ -312,3 +314,21 @@ The labels currently added to the component.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = ner.to_disk("/path", exclude=["vocab"])
+> ```
+
+| Name    | Description                                                    |
+| ------- | -------------------------------------------------------------- |
+| `vocab` | The shared [`Vocab`](/api/vocab).                              |
+| `cfg`   | The config file. You usually don't want to exclude this.       |
+| `model` | The binary model data. You usually don't want to exclude this. |
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -327,7 +327,7 @@ the model**.
 | Name      | Type             | Description                                                                                                           |
 | --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
 | `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `disable` | list             | Names of pipeline components to [disable](/usage/processing-pipelines#disabling) and prevent from being saved.        |
+| `exclude` | list             | Names of pipeline components or [serialization fields](#serialization-fields) to exclude.                             |

 ## Language.from_disk {#from_disk tag="method" new="2"}

@ -349,22 +349,22 @@ loaded object.
 > nlp = English().from_disk("/path/to/en_model")
 > ```

-| Name        | Type             | Description                                                                       |
-| ----------- | ---------------- | --------------------------------------------------------------------------------- |
-| `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects.        |
-| `disable`   | list             | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). |
-| **RETURNS** | `Language`       | The modified `Language` object.                                                   |
+| Name        | Type             | Description                                                                               |
+| ----------- | ---------------- | ----------------------------------------------------------------------------------------- |
+| `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects.                |
+| `exclude`   | list             | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | `Language`       | The modified `Language` object.                                                           |

 <Infobox title="Changed in v2.0" variant="warning">

 As of spaCy v2.0, the `save_to_directory` method has been renamed to `to_disk`,
 to improve consistency across classes. Pipeline components to prevent from being
-loaded can now be added as a list to `disable`, instead of specifying one
-keyword argument per component.
+loaded can now be added as a list to `disable` (v2.0) or `exclude` (v2.1),
+instead of specifying one keyword argument per component.

 ```diff
 - nlp = spacy.load("en", tagger=False, entity=False)
-+ nlp = English().from_disk("/model", disable=["tagger', 'ner"])
+ nlp = English().from_disk("/model", exclude=["tagger", "ner"])
 ```

 </Infobox>
@ -379,10 +379,10 @@ Serialize the current state to a binary string.
 > nlp_bytes = nlp.to_bytes()
 > ```

-| Name        | Type  | Description                                                                                                         |
-| ----------- | ----- | ------------------------------------------------------------------------------------------------------------------- |
-| `disable`   | list  | Names of pipeline components to [disable](/usage/processing-pipelines#disabling) and prevent from being serialized. |
-| **RETURNS** | bytes | The serialized form of the `Language` object.                                                                       |
+| Name        | Type  | Description                                                                               |
+| ----------- | ----- | ----------------------------------------------------------------------------------------- |
+| `exclude`   | list  | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | The serialized form of the `Language` object.                                             |

 ## Language.from_bytes {#from_bytes tag="method"}

@ -400,20 +400,21 @@ available to the loaded object.
 > nlp2.from_bytes(nlp_bytes)
 > ```

-| Name         | Type       | Description                                                                       |
-| ------------ | ---------- | --------------------------------------------------------------------------------- |
-| `bytes_data` | bytes      | The data to load from.                                                            |
-| `disable`    | list       | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). |
-| **RETURNS**  | `Language` | The `Language` object.                                                            |
+| Name         | Type       | Description                                                                               |
+| ------------ | ---------- | ----------------------------------------------------------------------------------------- |
+| `bytes_data` | bytes      | The data to load from.                                                                    |
+| `exclude`    | list       | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**  | `Language` | The `Language` object.                                                                    |

 <Infobox title="Changed in v2.0" variant="warning">

 Pipeline components to prevent from being loaded can now be added as a list to
-`disable`, instead of specifying one keyword argument per component.
+`disable` (v2.0) or `exclude` (v2.1), instead of specifying one keyword argument
+per component.

 ```diff
 - nlp = English().from_bytes(bytes, tagger=False, entity=False)
-+ nlp = English().from_bytes(bytes, disable=["tagger", "ner"])
+ nlp = English().from_bytes(bytes, exclude=["tagger", "ner"])
 ```

 </Infobox>
@ -437,3 +438,23 @@ Pipeline components to prevent from being loaded can now be added as a list to
 | `Defaults`                             | class   | Settings, data and factory methods for creating the `nlp` object and processing pipeline.                                           |
 | `lang`                                 | unicode | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).                                     |
 | `factories` <Tag variant="new">2</Tag> | dict    | Factories that create pre-defined pipeline components, e.g. the tagger, parser or entity recognizer, keyed by their component name. |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = nlp.to_bytes(exclude=["tokenizer", "vocab"])
+> nlp.from_disk("./model-data", exclude=["ner"])
+> ```
+
+| Name        | Description                                        |
+| ----------- | -------------------------------------------------- |
+| `vocab`     | The shared [`Vocab`](/api/vocab).                  |
+| `tokenizer` | Tokenization rules and exceptions.                 |
+| `meta`      | The meta data, available as `Language.meta`.       |
+| ...         | String names of pipeline components, e.g. `"ner"`. |
--- a/website/docs/api/stringstore.md
+++ b/website/docs/api/stringstore.md
@ -151,10 +151,9 @@ Serialize the current state to a binary string.
 > store_bytes = stringstore.to_bytes()
 > ```

-| Name        | Type  | Description                                        |
-| ----------- | ----- | -------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
-| **RETURNS** | bytes | The serialized form of the `StringStore` object.   |
+| Name        | Type  | Description                                      |
+| ----------- | ----- | ------------------------------------------------ |
+| **RETURNS** | bytes | The serialized form of the `StringStore` object. |

 ## StringStore.from_bytes {#from_bytes tag="method"}

@ -168,11 +167,10 @@ Load state from a binary string.
 > new_store = StringStore().from_bytes(store_bytes)
 > ```

-| Name         | Type          | Description                                    |
-| ------------ | ------------- | ---------------------------------------------- |
-| `bytes_data` | bytes         | The data to load from.                         |
-| `**exclude`  | -             | Named attributes to prevent from being loaded. |
-| **RETURNS**  | `StringStore` | The `StringStore` object.                      |
+| Name         | Type          | Description               |
+| ------------ | ------------- | ------------------------- |
+| `bytes_data` | bytes         | The data to load from.    |
+| **RETURNS**  | `StringStore` | The `StringStore` object. |

 ## Utilities {#util}

--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -244,9 +244,10 @@ Serialize the pipe to disk.
 > tagger.to_disk("/path/to/tagger")
 > ```

-| Name   | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| Name      | Type             | Description                                                                                                           |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Tagger.from_disk {#from_disk tag="method"}

@ -262,6 +263,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Tagger`         | The modified `Tagger` object.                                              |

 ## Tagger.to_bytes {#to_bytes tag="method"}
@ -275,10 +277,10 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type  | Description                                        |
-| ----------- | ----- | -------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
-| **RETURNS** | bytes | The serialized form of the `Tagger` object.        |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | The serialized form of the `Tagger` object.                               |

 ## Tagger.from_bytes {#from_bytes tag="method"}

@ -292,11 +294,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > tagger.from_bytes(tagger_bytes)
 > ```

-| Name         | Type     | Description                                    |
-| ------------ | -------- | ---------------------------------------------- |
-| `bytes_data` | bytes    | The data to load from.                         |
-| `**exclude`  | -        | Named attributes to prevent from being loaded. |
-| **RETURNS**  | `Tagger` | The `Tagger` object.                           |
+| Name         | Type     | Description                                                               |
+| ------------ | -------- | ------------------------------------------------------------------------- |
+| `bytes_data` | bytes    | The data to load from.                                                    |
+| `exclude`    | list     | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**  | `Tagger` | The `Tagger` object.                                                      |

 ## Tagger.labels {#labels tag="property"}

@ -314,3 +316,22 @@ tags by default, e.g. `VERB`, `NOUN` and so on.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = tagger.to_disk("/path", exclude=["vocab"])
+> ```
+
+| Name      | Description                                                                                |
+| --------- | ------------------------------------------------------------------------------------------ |
+| `vocab`   | The shared [`Vocab`](/api/vocab).                                                          |
+| `cfg`     | The config file. You usually don't want to exclude this.                                   |
+| `model`   | The binary model data. You usually don't want to exclude this.                             |
+| `tag_map` | The [tag map](/usage/adding-languages#tag-map) mapping fine-grained to coarse-grained tag. |
--- a/website/docs/api/textcategorizer.md
+++ b/website/docs/api/textcategorizer.md
@ -260,9 +260,10 @@ Serialize the pipe to disk.
 > textcat.to_disk("/path/to/textcat")
 > ```

-| Name   | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| Name      | Type             | Description                                                                                                           |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## TextCategorizer.from_disk {#from_disk tag="method"}

@ -278,6 +279,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type              | Description                                                                |
 | ----------- | ----------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path`  | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list              | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `TextCategorizer` | The modified `TextCategorizer` object.                                     |

 ## TextCategorizer.to_bytes {#to_bytes tag="method"}
@ -291,10 +293,10 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type  | Description                                          |
-| ----------- | ----- | ---------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized.   |
-| **RETURNS** | bytes | The serialized form of the `TextCategorizer` object. |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | The serialized form of the `TextCategorizer` object.                      |

 ## TextCategorizer.from_bytes {#from_bytes tag="method"}

@ -308,11 +310,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > textcat.from_bytes(textcat_bytes)
 > ```

-| Name         | Type              | Description                                    |
-| ------------ | ----------------- | ---------------------------------------------- |
-| `bytes_data` | bytes             | The data to load from.                         |
-| `**exclude`  | -                 | Named attributes to prevent from being loaded. |
-| **RETURNS**  | `TextCategorizer` | The `TextCategorizer` object.                  |
+| Name         | Type              | Description                                                               |
+| ------------ | ----------------- | ------------------------------------------------------------------------- |
+| `bytes_data` | bytes             | The data to load from.                                                    |
+| `exclude`    | list              | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**  | `TextCategorizer` | The `TextCategorizer` object.                                             |

 ## TextCategorizer.labels {#labels tag="property"}

@ -328,3 +330,21 @@ The labels currently added to the component.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = textcat.to_disk("/path", exclude=["vocab"])
+> ```
+
+| Name    | Description                                                    |
+| ------- | -------------------------------------------------------------- |
+| `vocab` | The shared [`Vocab`](/api/vocab).                              |
+| `cfg`   | The config file. You usually don't want to exclude this.       |
+| `model` | The binary model data. You usually don't want to exclude this. |
--- a/website/docs/api/tokenizer.md
+++ b/website/docs/api/tokenizer.md
@ -127,9 +127,10 @@ Serialize the tokenizer to disk.
 > tokenizer.to_disk("/path/to/tokenizer")
 > ```

-| Name   | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| Name      | Type             | Description                                                                                                           |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Tokenizer.from_disk {#from_disk tag="method"}

@ -145,6 +146,7 @@ Load the tokenizer from disk. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Tokenizer`      | The modified `Tokenizer` object.                                           |

 ## Tokenizer.to_bytes {#to_bytes tag="method"}
@ -158,10 +160,10 @@ Load the tokenizer from disk. Modifies the object in place and returns it.

 Serialize the tokenizer to a bytestring.

-| Name        | Type  | Description                                        |
-| ----------- | ----- | -------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
-| **RETURNS** | bytes | The serialized form of the `Tokenizer` object.     |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | The serialized form of the `Tokenizer` object.                            |

 ## Tokenizer.from_bytes {#from_bytes tag="method"}

@ -176,11 +178,11 @@ it.
 > tokenizer.from_bytes(tokenizer_bytes)
 > ```

-| Name         | Type        | Description                                    |
-| ------------ | ----------- | ---------------------------------------------- |
-| `bytes_data` | bytes       | The data to load from.                         |
-| `**exclude`  | -           | Named attributes to prevent from being loaded. |
-| **RETURNS**  | `Tokenizer` | The `Tokenizer` object.                        |
+| Name         | Type        | Description                                                               |
+| ------------ | ----------- | ------------------------------------------------------------------------- |
+| `bytes_data` | bytes       | The data to load from.                                                    |
+| `exclude`    | list        | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**  | `Tokenizer` | The `Tokenizer` object.                                                   |

 ## Attributes {#attributes}

@ -190,3 +192,25 @@ it.
 | `prefix_search`  | -       | A function to find segment boundaries from the start of a string. Returns the length of the segment, or `None`.            |
 | `suffix_search`  | -       | A function to find segment boundaries from the end of a string. Returns the length of the segment, or `None`.              |
 | `infix_finditer` | -       | A function to find internal segment separators, e.g. hyphens. Returns a (possibly empty) list of `re.MatchObject` objects. |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = tokenizer.to_bytes(exclude=["vocab", "exceptions"])
+> tokenizer.from_disk("./data", exclude=["token_match"])
+> ```
+
+| Name             | Description                       |
+| ---------------- | --------------------------------- |
+| `vocab`          | The shared [`Vocab`](/api/vocab). |
+| `prefix_search`  | The prefix rules.                 |
+| `suffix_search`  | The suffix rules.                 |
+| `infix_finditer` | The infix rules.                  |
+| `token_match`    | The token match expression.       |
+| `exceptions`     | The tokenizer exception rules.    |
--- a/website/docs/api/vectors.md
+++ b/website/docs/api/vectors.md
@ -311,10 +311,9 @@ Save the current state to a directory.
 >
 > ```

-| Name        | Type             | Description                                                                                                           |
-| ----------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`      | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `**exclude` | -                | Named attributes to prevent from being saved.                                                                         |
+| Name   | Type             | Description                                                                                                           |
+| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |

 ## Vectors.from_disk {#from_disk tag="method"}

@ -342,10 +341,9 @@ Serialize the current state to a binary string.
 > vectors_bytes = vectors.to_bytes()
 > ```

-| Name        | Type  | Description                                        |
-| ----------- | ----- | -------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
-| **RETURNS** | bytes | The serialized form of the `Vectors` object.       |
+| Name        | Type  | Description                                  |
+| ----------- | ----- | -------------------------------------------- |
+| **RETURNS** | bytes | The serialized form of the `Vectors` object. |

 ## Vectors.from_bytes {#from_bytes tag="method"}

@ -360,11 +358,10 @@ Load state from a binary string.
 > new_vectors.from_bytes(vectors_bytes)
 > ```

-| Name        | Type      | Description                                    |
-| ----------- | --------- | ---------------------------------------------- |
-| `data`      | bytes     | The data to load from.                         |
-| `**exclude` | -         | Named attributes to prevent from being loaded. |
-| **RETURNS** | `Vectors` | The `Vectors` object.                          |
+| Name        | Type      | Description            |
+| ----------- | --------- | ---------------------- |
+| `data`      | bytes     | The data to load from. |
+| **RETURNS** | `Vectors` | The `Vectors` object.  |

 ## Attributes {#attributes}

--- a/website/docs/api/vocab.md
+++ b/website/docs/api/vocab.md
@ -221,9 +221,10 @@ Save the current state to a directory.
 > nlp.vocab.to_disk("/path/to/vocab")
 > ```

-| Name   | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| Name      | Type             | Description                                                                                                           |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Vocab.from_disk {#from_disk tag="method" new="2"}

@ -239,6 +240,7 @@ Loads state from a directory. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Vocab`          | The modified `Vocab` object.                                               |

 ## Vocab.to_bytes {#to_bytes tag="method"}
@ -251,10 +253,10 @@ Serialize the current state to a binary string.
 > vocab_bytes = nlp.vocab.to_bytes()
 > ```

-| Name        | Type  | Description                                        |
-| ----------- | ----- | -------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
-| **RETURNS** | bytes | The serialized form of the `Vocab` object.         |
+| Name        | Type  | Description                                                               |
+| ----------- | ----- | ------------------------------------------------------------------------- |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS** | bytes | The serialized form of the `Vocab` object.                                |

 ## Vocab.from_bytes {#from_bytes tag="method"}

@ -269,11 +271,11 @@ Load state from a binary string.
 > vocab.from_bytes(vocab_bytes)
 > ```

-| Name         | Type    | Description                                    |
-| ------------ | ------- | ---------------------------------------------- |
-| `bytes_data` | bytes   | The data to load from.                         |
-| `**exclude`  | -       | Named attributes to prevent from being loaded. |
-| **RETURNS**  | `Vocab` | The `Vocab` object.                            |
+| Name         | Type    | Description                                                               |
+| ------------ | ------- | ------------------------------------------------------------------------- |
+| `bytes_data` | bytes   | The data to load from.                                                    |
+| `exclude`    | list    | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**  | `Vocab` | The `Vocab` object.                                                       |

 ## Attributes {#attributes}

@ -291,3 +293,22 @@ Load state from a binary string.
 | `strings`                            | `StringStore` | A table managing the string-to-int mapping.   |
 | `vectors` <Tag variant="new">2</Tag> | `Vectors`     | A table associating word IDs to word vectors. |
 | `vectors_length`                     | int           | Number of dimensions for each word vector.    |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = vocab.to_bytes(exclude=["strings", "vectors"])
+> vocab.from_disk("./vocab", exclude=["strings"])
+> ```
+
+| Name      | Description                                           |
+| --------- | ----------------------------------------------------- |
+| `strings` | The strings in the [`StringStore`](/api/stringstore). |
+| `lexemes` | The lexeme data.                                      |
+| `vectors` | The word vectors, if available.                       |