💫 Make serialization methods consistent (#3385)

* Make serialization methods consistent exclude keyword argument instead of random named keyword arguments and deprecation handling * Update docs and add section on serialization fields
2025-11-17 16:26:09 +03:00 · 2019-03-10 19:16:45 +01:00 · 2019-03-10 19:16:45 +01:00 · 7ba3a5d95c
commit 7ba3a5d95c
parent 9a8f169e5c
25 changed files with 598 additions and 314 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -70,6 +70,12 @@ class Warnings(object):
    W013 = ("As of v2.1.0, {obj}.merge is deprecated. Please use the more "
            "efficient and less error-prone Doc.retokenize context manager "
            "instead.")
    W014 = ("As of v2.1.0, the `disable` keyword argument on the serialization "
            "methods is and should be replaced with `exclude`. This makes it "
            "consistent with the other objects serializable.")
    W015 = ("As of v2.1.0, the use of keyword arguments to exclude fields from "
            "being serialized or deserialized is deprecated. Please use the "
            "`exclude` argument instead. For example: exclude=['{arg}'].")
@add_codes
@ -348,7 +354,10 @@ class Errors(object):
            "This is likely a bug in spaCy, so feel free to open an issue.")
    E127 = ("Cannot create phrase pattern representation for length 0. This "
            "is likely a bug in spaCy.")
-
+    E128 = ("Unsupported serialization argument: '{arg}'. The use of keyword "
            "arguments to exclude fields from being serialized or deserialized "
            "is now deprecated. Please use the `exclude` argument instead. "
            "For example: exclude=['{arg}'].")
@add_codes
--- a/spacy/language.py
+++ b/spacy/language.py
@ -28,7 +28,7 @@ from .lang.punctuation import TOKENIZER_INFIXES
 from .lang.tokenizer_exceptions import TOKEN_MATCH
 from .lang.tag_map import TAG_MAP
 from .lang.lex_attrs import LEX_ATTRS, is_stop
-from .errors import Errors
+from .errors import Errors, Warnings, deprecation_warning
 from . import util
 from . import about
@ -699,124 +699,114 @@ class Language(object):
                        self.tokenizer._reset_cache(keys)
                    nr_seen = 0
-    def to_disk(self, path, disable=tuple()):
+    def to_disk(self, path, exclude=tuple(), disable=None):
        """Save the current state to a directory.  If a model is loaded, this
        will include the model.
-        path (unicode or Path): A path to a directory, which will be created if
+        path (unicode or Path): Path to a directory, which will be created if
-            it doesn't exist. Paths may be strings or `Path`-like objects.
+            it doesn't exist.
-        disable (list): Names of pipeline components to disable and prevent
+        exclude (list): Names of components or serialization fields to exclude.
            from being saved.
-        EXAMPLE:
+        DOCS: https://spacy.io/api/language#to_disk
            >>> nlp.to_disk('/path/to/models')
        """
        if disable is not None:
            deprecation_warning(Warnings.W014)
            exclude = disable
        path = util.ensure_path(path)
-        serializers = OrderedDict(
+        serializers = OrderedDict()
-            (
+        serializers["tokenizer"] = lambda p: self.tokenizer.to_disk(p, exclude=["vocab"])
-                ("tokenizer", lambda p: self.tokenizer.to_disk(p, vocab=False)),
+        serializers["meta.json"] = lambda p: p.open("w").write(srsly.json_dumps(self.meta))
                ("meta.json", lambda p: p.open("w").write(srsly.json_dumps(self.meta))),
            )
        )
        for name, proc in self.pipeline:
            if not hasattr(proc, "name"):
                continue
-            if name in disable:
+            if name in exclude:
                continue
            if not hasattr(proc, "to_disk"):
                continue
-            serializers[name] = lambda p, proc=proc: proc.to_disk(p, vocab=False)
+            serializers[name] = lambda p, proc=proc: proc.to_disk(p, exclude=["vocab"])
        serializers["vocab"] = lambda p: self.vocab.to_disk(p)
-        util.to_disk(path, serializers, {p: False for p in disable})
+        util.to_disk(path, serializers, exclude)
-    def from_disk(self, path, disable=tuple()):
+    def from_disk(self, path, exclude=tuple(), disable=None):
        """Loads state from a directory. Modifies the object in place and
        returns it. If the saved `Language` object contains a model, the
        model will be loaded.
-        path (unicode or Path): A path to a directory. Paths may be either
+        path (unicode or Path): A path to a directory.
-            strings or `Path`-like objects.
+        exclude (list): Names of components or serialization fields to exclude.
        disable (list): Names of the pipeline components to disable.
        RETURNS (Language): The modified `Language` object.
-        EXAMPLE:
+        DOCS: https://spacy.io/api/language#from_disk
            >>> from spacy.language import Language
            >>> nlp = Language().from_disk('/path/to/models')
        """
        if disable is not None:
            deprecation_warning(Warnings.W014)
            exclude = disable
        path = util.ensure_path(path)
-        deserializers = OrderedDict(
+        deserializers = OrderedDict()
-            (
+        deserializers["meta.json"] = lambda p: self.meta.update(srsly.read_json(p))
-                ("meta.json", lambda p: self.meta.update(srsly.read_json(p))),
+        deserializers["vocab"] = lambda p: self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self)
-                (
+        deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(p, exclude=["vocab"])
                    "vocab",
                    lambda p: (
                        self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self)
                    ),
                ),
                ("tokenizer", lambda p: self.tokenizer.from_disk(p, vocab=False)),
            )
        )
        for name, proc in self.pipeline:
-            if name in disable:
+            if name in exclude:
                continue
            if not hasattr(proc, "from_disk"):
                continue
-            deserializers[name] = lambda p, proc=proc: proc.from_disk(p, vocab=False)
+            deserializers[name] = lambda p, proc=proc: proc.from_disk(p, exclude=["vocab"])
-        exclude = {p: False for p in disable}
+        if not (path / "vocab").exists() and "vocab" not in exclude:
-        if not (path / "vocab").exists():
+            # Convert to list here in case exclude is (default) tuple
-            exclude["vocab"] = True
+            exclude = list(exclude) + ["vocab"]
        util.from_disk(path, deserializers, exclude)
        self._path = path
        return self
-    def to_bytes(self, disable=[], **exclude):
+    def to_bytes(self, exclude=tuple(), disable=None, **kwargs):
        """Serialize the current state to a binary string.
-        disable (list): Nameds of pipeline components to disable and prevent
+        exclude (list): Names of components or serialization fields to exclude.
            from being serialized.
        RETURNS (bytes): The serialized form of the `Language` object.
        DOCS: https://spacy.io/api/language#to_bytes
        """
-        serializers = OrderedDict(
+        if disable is not None:
-            (
+            deprecation_warning(Warnings.W014)
-                ("vocab", lambda: self.vocab.to_bytes()),
+            exclude = disable
-                ("tokenizer", lambda: self.tokenizer.to_bytes(vocab=False)),
+        serializers = OrderedDict()
-                ("meta", lambda: srsly.json_dumps(self.meta)),
+        serializers["vocab"] = lambda: self.vocab.to_bytes()
-            )
+        serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"])
-        )
+        serializers["meta.json"] = lambda: srsly.json_dumps(self.meta)
-        for i, (name, proc) in enumerate(self.pipeline):
+        for name, proc in self.pipeline:
-            if name in disable:
+            if name in exclude:
                continue
            if not hasattr(proc, "to_bytes"):
                continue
-            serializers[i] = lambda proc=proc: proc.to_bytes(vocab=False)
+            serializers[name] = lambda proc=proc: proc.to_bytes(exclude=["vocab"])
        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)
-    def from_bytes(self, bytes_data, disable=[]):
+    def from_bytes(self, bytes_data, exclude=tuple(), disable=None, **kwargs):
        """Load state from a binary string.
        bytes_data (bytes): The data to load from.
-        disable (list): Names of the pipeline components to disable.
+        exclude (list): Names of components or serialization fields to exclude.
        RETURNS (Language): The `Language` object.
        DOCS: https://spacy.io/api/language#from_bytes
        """
-        deserializers = OrderedDict(
+        if disable is not None:
-            (
+            deprecation_warning(Warnings.W014)
-                ("meta", lambda b: self.meta.update(srsly.json_loads(b))),
+            exclude = disable
-                (
+        deserializers = OrderedDict()
-                    "vocab",
+        deserializers["meta.json"] = lambda b: self.meta.update(srsly.json_loads(b))
-                    lambda b: (
+        deserializers["vocab"] = lambda b: self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self)
-                        self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self)
+        deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(b, exclude=["vocab"])
-                    ),
+        for name, proc in self.pipeline:
-                ),
+            if name in exclude:
                ("tokenizer", lambda b: self.tokenizer.from_bytes(b, vocab=False)),
            )
        )
        for i, (name, proc) in enumerate(self.pipeline):
            if name in disable:
                continue
            if not hasattr(proc, "from_bytes"):
                continue
-            deserializers[i] = lambda b, proc=proc: proc.from_bytes(b, vocab=False)
+            deserializers[name] = lambda b, proc=proc: proc.from_bytes(b, exclude=["vocab"])
-        util.from_bytes(bytes_data, deserializers, {})
+        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        util.from_bytes(bytes_data, deserializers, exclude)
        return self
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -141,16 +141,21 @@ class Pipe(object):
        with self.model.use_params(params):
            yield
-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
-        """Serialize the pipe to a bytestring."""
+        """Serialize the pipe to a bytestring.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized object.
        """
        serialize = OrderedDict()
        serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
        if self.model not in (True, False, None):
            serialize["model"] = self.model.to_bytes
        serialize["vocab"] = self.vocab.to_bytes
        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        return util.to_bytes(serialize, exclude)
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Load the pipe from a bytestring."""
        def load_model(b):
@ -161,26 +166,25 @@ class Pipe(object):
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(b)
-        deserialize = OrderedDict(
+        deserialize = OrderedDict()
-            (
+        deserialize["cfg"] = lambda b: self.cfg.update(srsly.json_loads(b))
-                ("cfg", lambda b: self.cfg.update(srsly.json_loads(b))),
+        deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
-                ("vocab", lambda b: self.vocab.from_bytes(b)),
+        deserialize["model"] = load_model
-                ("model", load_model),
+        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
            )
        )
        util.from_bytes(bytes_data, deserialize, exclude)
        return self
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        """Serialize the pipe to disk."""
        serialize = OrderedDict()
        serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
        serialize["vocab"] = lambda p: self.vocab.to_disk(p)
        if self.model not in (None, True, False):
            serialize["model"] = lambda p: p.open("wb").write(self.model.to_bytes())
        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        util.to_disk(path, serialize, exclude)
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        """Load the pipe from disk."""
        def load_model(p):
@ -191,13 +195,11 @@ class Pipe(object):
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(p.open("rb").read())
-        deserialize = OrderedDict(
+        deserialize = OrderedDict()
-            (
+        deserialize["cfg"] = lambda p: self.cfg.update(_load_cfg(p))
-                ("cfg", lambda p: self.cfg.update(_load_cfg(p))),
+        deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
-                ("vocab", lambda p: self.vocab.from_disk(p)),
+        deserialize["model"] = load_model
-                ("model", load_model),
+        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
            )
        )
        util.from_disk(path, deserialize, exclude)
        return self
@ -537,7 +539,7 @@ class Tagger(Pipe):
        with self.model.use_params(params):
            yield
-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        serialize = OrderedDict()
        if self.model not in (None, True, False):
            serialize["model"] = self.model.to_bytes
@ -545,9 +547,10 @@ class Tagger(Pipe):
        serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
        tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
        serialize["tag_map"] = lambda: srsly.msgpack_dumps(tag_map)
        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        return util.to_bytes(serialize, exclude)
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        def load_model(b):
            # TODO: Remove this once we don't have to handle previous models
            if self.cfg.get("pretrained_dims") and "pretrained_vectors" not in self.cfg:
@ -572,20 +575,22 @@ class Tagger(Pipe):
            ("cfg", lambda b: self.cfg.update(srsly.json_loads(b))),
            ("model", lambda b: load_model(b)),
        ))
        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
        util.from_bytes(bytes_data, deserialize, exclude)
        return self
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
        serialize = OrderedDict((
-            ('vocab', lambda p: self.vocab.to_disk(p)),
+            ("vocab", lambda p: self.vocab.to_disk(p)),
-            ('tag_map', lambda p: srsly.write_msgpack(p, tag_map)),
+            ("tag_map", lambda p: srsly.write_msgpack(p, tag_map)),
-            ('model', lambda p: p.open("wb").write(self.model.to_bytes())),
+            ("model", lambda p: p.open("wb").write(self.model.to_bytes())),
-            ('cfg', lambda p: srsly.write_json(p, self.cfg))
+            ("cfg", lambda p: srsly.write_json(p, self.cfg))
        ))
        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        util.to_disk(path, serialize, exclude)
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        def load_model(p):
            # TODO: Remove this once we don't have to handle previous models
            if self.cfg.get("pretrained_dims") and "pretrained_vectors" not in self.cfg:
@ -608,6 +613,7 @@ class Tagger(Pipe):
            ("tag_map", load_tag_map),
            ("model", load_model),
        ))
        exclude = util.get_serialization_exclude(deserialize, exclude, kwargs)
        util.from_disk(path, deserialize, exclude)
        return self
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -236,19 +236,17 @@ cdef class StringStore:
            self.add(word)
        return self
-    def to_bytes(self, **exclude):
+    def to_bytes(self, **kwargs):
        """Serialize the current state to a binary string.
        **exclude: Named attributes to prevent from being serialized.
        RETURNS (bytes): The serialized form of the `StringStore` object.
        """
        return srsly.json_dumps(list(self))
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, **kwargs):
        """Load state from a binary string.
        bytes_data (bytes): The data to load from.
        **exclude: Named attributes to prevent from being loaded.
        RETURNS (StringStore): The `StringStore` object.
        """
        strings = srsly.json_loads(bytes_data)
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -603,22 +603,24 @@ cdef class Parser:
        self.cfg.update(cfg)
        return sgd
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        serializers = {
            'model': lambda p: (self.model.to_disk(p) if self.model is not True else True),
            'vocab': lambda p: self.vocab.to_disk(p),
-            'moves': lambda p: self.moves.to_disk(p, strings=False),
+            'moves': lambda p: self.moves.to_disk(p, exclude=["strings"]),
            'cfg': lambda p: srsly.write_json(p, self.cfg)
        }
        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        util.to_disk(path, serializers, exclude)
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        deserializers = {
            'vocab': lambda p: self.vocab.from_disk(p),
-            'moves': lambda p: self.moves.from_disk(p, strings=False),
+            'moves': lambda p: self.moves.from_disk(p, exclude=["strings"]),
            'cfg': lambda p: self.cfg.update(srsly.read_json(p)),
            'model': lambda p: None
        }
        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        util.from_disk(path, deserializers, exclude)
        if 'model' not in exclude:
            path = util.ensure_path(path)
@ -632,22 +634,24 @@ cdef class Parser:
            self.cfg.update(cfg)
        return self
-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        serializers = OrderedDict((
            ('model', lambda: (self.model.to_bytes() if self.model is not True else True)),
            ('vocab', lambda: self.vocab.to_bytes()),
-            ('moves', lambda: self.moves.to_bytes(strings=False)),
+            ('moves', lambda: self.moves.to_bytes(exclude=["strings"])),
            ('cfg', lambda: srsly.json_dumps(self.cfg, indent=2, sort_keys=True))
        ))
        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        deserializers = OrderedDict((
            ('vocab', lambda b: self.vocab.from_bytes(b)),
-            ('moves', lambda b: self.moves.from_bytes(b, strings=False)),
+            ('moves', lambda b: self.moves.from_bytes(b, exclude=["strings"])),
            ('cfg', lambda b: self.cfg.update(srsly.json_loads(b))),
            ('model', lambda b: None)
        ))
        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        if 'model' not in exclude:
            # TODO: Remove this once we don't have to handle previous models
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@ -208,30 +208,32 @@ cdef class TransitionSystem:
        self.labels[action][label_name] = new_freq-1
        return 1
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        with path.open('wb') as file_:
-            file_.write(self.to_bytes(**exclude))
+            file_.write(self.to_bytes(**kwargs))
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        with path.open('rb') as file_:
            byte_data = file_.read()
-        self.from_bytes(byte_data, **exclude)
+        self.from_bytes(byte_data, **kwargs)
        return self
-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        transitions = []
        serializers = {
            'moves': lambda: srsly.json_dumps(self.labels),
            'strings': lambda: self.strings.to_bytes()
        }
        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        labels = {}
        deserializers = {
            'moves': lambda b: labels.update(srsly.json_loads(b)),
            'strings': lambda b: self.strings.from_bytes(b)
        }
        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        self.initialize_actions(labels)
        return self
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -113,14 +113,14 @@ def test_doc_api_serialize(en_tokenizer, text):
    assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
    new_tokens = Doc(tokens.vocab).from_bytes(
-        tokens.to_bytes(tensor=False), tensor=False
+        tokens.to_bytes(exclude=["tensor"]), exclude=["tensor"]
    )
    assert tokens.text == new_tokens.text
    assert [t.text for t in tokens] == [t.text for t in new_tokens]
    assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
    new_tokens = Doc(tokens.vocab).from_bytes(
-        tokens.to_bytes(sentiment=False), sentiment=False
+        tokens.to_bytes(exclude=["sentiment"]), exclude=["sentiment"]
    )
    assert tokens.text == new_tokens.text
    assert [t.text for t in tokens] == [t.text for t in new_tokens]
--- a/spacy/tests/serialize/test_serialize_doc.py
+++ b/spacy/tests/serialize/test_serialize_doc.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import pytest
 from spacy.tokens import Doc
 from spacy.compat import path2str
@ -41,3 +42,18 @@ def test_serialize_doc_roundtrip_disk_str_path(en_vocab):
        doc.to_disk(file_path)
        doc_d = Doc(en_vocab).from_disk(file_path)
        assert doc.to_bytes() == doc_d.to_bytes()
 def test_serialize_doc_exclude(en_vocab):
    doc = Doc(en_vocab, words=["hello", "world"])
    doc.user_data["foo"] = "bar"
    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
    assert new_doc.user_data["foo"] == "bar"
    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes(), exclude=["user_data"])
    assert not new_doc.user_data
    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes(exclude=["user_data"]))
    assert not new_doc.user_data
    with pytest.raises(ValueError):
        doc.to_bytes(user_data=False)
    with pytest.raises(ValueError):
        Doc(en_vocab).from_bytes(doc.to_bytes(), tensor=False)
--- a/spacy/tests/serialize/test_serialize_language.py
+++ b/spacy/tests/serialize/test_serialize_language.py
@ -52,3 +52,19 @@ def test_serialize_with_custom_tokenizer():
    nlp.tokenizer = custom_tokenizer(nlp)
    with make_tempdir() as d:
        nlp.to_disk(d)
 def test_serialize_language_exclude(meta_data):
    name = "name-in-fixture"
    nlp = Language(meta=meta_data)
    assert nlp.meta["name"] == name
    new_nlp = Language().from_bytes(nlp.to_bytes())
    assert nlp.meta["name"] == name
    new_nlp = Language().from_bytes(nlp.to_bytes(), exclude=["meta"])
    assert not new_nlp.meta["name"] == name
    new_nlp = Language().from_bytes(nlp.to_bytes(exclude=["meta"]))
    assert not new_nlp.meta["name"] == name
    with pytest.raises(ValueError):
        nlp.to_bytes(meta=False)
    with pytest.raises(ValueError):
        Language().from_bytes(nlp.to_bytes(), meta=False)
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@ -55,7 +55,9 @@ def test_serialize_parser_roundtrip_disk(en_vocab, Parser):
        parser_d = Parser(en_vocab)
        parser_d.model, _ = parser_d.Model(0)
        parser_d = parser_d.from_disk(file_path)
-        assert parser.to_bytes(model=False) == parser_d.to_bytes(model=False)
+        parser_bytes = parser.to_bytes(exclude=["model"])
        parser_d_bytes = parser_d.to_bytes(exclude=["model"])
        assert parser_bytes == parser_d_bytes
 def test_to_from_bytes(parser, blank_parser):
@ -114,3 +116,25 @@ def test_serialize_textcat_empty(en_vocab):
    # See issue #1105
    textcat = TextCategorizer(en_vocab, labels=["ENTITY", "ACTION", "MODIFIER"])
    textcat.to_bytes()
@pytest.mark.parametrize("Parser", test_parsers)
 def test_serialize_pipe_exclude(en_vocab, Parser):
    def get_new_parser():
        new_parser = Parser(en_vocab)
        new_parser.model, _ = new_parser.Model(0)
        return new_parser
    parser = Parser(en_vocab)
    parser.model, _ = parser.Model(0)
    parser.cfg["foo"] = "bar"
    new_parser = get_new_parser().from_bytes(parser.to_bytes())
    assert "foo" in new_parser.cfg
    new_parser = get_new_parser().from_bytes(parser.to_bytes(), exclude=["cfg"])
    assert "foo" not in new_parser.cfg
    new_parser = get_new_parser().from_bytes(parser.to_bytes(exclude=["cfg"]))
    assert "foo" not in new_parser.cfg
    with pytest.raises(ValueError):
        parser.to_bytes(cfg=False)
    with pytest.raises(ValueError):
        get_new_parser().from_bytes(parser.to_bytes(), cfg=False)
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -360,36 +360,37 @@ cdef class Tokenizer:
        self._cache.set(key, cached)
        self._rules[string] = substrings
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.
        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or Path-like objects.
+            it doesn't exist.
        exclude (list): String names of serialization fields to exclude.
        DOCS: https://spacy.io/api/tokenizer#to_disk
        """
        with path.open("wb") as file_:
-            file_.write(self.to_bytes(**exclude))
+            file_.write(self.to_bytes(**kwargs))
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.
-        path (unicode or Path): A path to a directory. Paths may be either
+        path (unicode or Path): A path to a directory.
-            strings or `Path`-like objects.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Tokenizer): The modified `Tokenizer` object.
        DOCS: https://spacy.io/api/tokenizer#from_disk
        """
        with path.open("rb") as file_:
            bytes_data = file_.read()
-        self.from_bytes(bytes_data, **exclude)
+        self.from_bytes(bytes_data, **kwargs)
        return self
-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        """Serialize the current state to a binary string.
-        **exclude: Named attributes to prevent from being serialized.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Tokenizer` object.
        DOCS: https://spacy.io/api/tokenizer#to_bytes
@ -402,13 +403,14 @@ cdef class Tokenizer:
            ("token_match", lambda: _get_regex_pattern(self.token_match)),
            ("exceptions", lambda: OrderedDict(sorted(self._rules.items())))
        ))
        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Load state from a binary string.
        bytes_data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Tokenizer): The `Tokenizer` object.
        DOCS: https://spacy.io/api/tokenizer#from_bytes
@ -422,6 +424,7 @@ cdef class Tokenizer:
            ("token_match", lambda b: data.setdefault("token_match", b)),
            ("exceptions", lambda b: data.setdefault("rules", b))
        ))
        exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        if data.get("prefix_search"):
            self.prefix_search = re.compile(data["prefix_search"]).search
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -794,24 +794,26 @@ cdef class Doc:
        """
        return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.
        path (unicode or Path): A path to a directory, which will be created if
            it doesn't exist. Paths may be either strings or Path-like objects.
        exclude (list): String names of serialization fields to exclude.
        DOCS: https://spacy.io/api/doc#to_disk
        """
        path = util.ensure_path(path)
        with path.open("wb") as file_:
-            file_.write(self.to_bytes(**exclude))
+            file_.write(self.to_bytes(**kwargs))
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.
        path (unicode or Path): A path to a directory. Paths may be either
            strings or `Path`-like objects.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (Doc): The modified `Doc` object.
        DOCS: https://spacy.io/api/doc#from_disk
@ -819,11 +821,12 @@ cdef class Doc:
        path = util.ensure_path(path)
        with path.open("rb") as file_:
            bytes_data = file_.read()
-        return self.from_bytes(bytes_data, **exclude)
+        return self.from_bytes(bytes_data, **kwargs)
-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        """Serialize, i.e. export the document contents to a binary string.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
            all annotations.
@ -849,16 +852,22 @@ cdef class Doc:
            "sentiment": lambda: self.sentiment,
            "tensor": lambda: self.tensor,
        }
        for key in kwargs:
            if key in serializers or key in ("user_data", "user_data_keys", "user_data_values"):
                raise ValueError(Errors.E128.format(arg=key))
        if "user_data" not in exclude and self.user_data:
            user_data_keys, user_data_values = list(zip(*self.user_data.items()))
-            serializers["user_data_keys"] = lambda: srsly.msgpack_dumps(user_data_keys)
+            if "user_data_keys" not in exclude:
-            serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
+                serializers["user_data_keys"] = lambda: srsly.msgpack_dumps(user_data_keys)
            if "user_data_values" not in exclude:
                serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
        return util.to_bytes(serializers, exclude)
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Deserialize, i.e. import the document contents from a binary string.
        data (bytes): The string to load from.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (Doc): Itself.
        DOCS: https://spacy.io/api/doc#from_bytes
@ -874,6 +883,9 @@ cdef class Doc:
            "user_data_keys": lambda b: None,
            "user_data_values": lambda b: None,
        }
        for key in kwargs:
            if key in deserializers or key in ("user_data",):
                raise ValueError(Errors.E128.format(arg=key))
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        # Msgpack doesn't distinguish between lists and tuples, which is
        # vexing for user data. As a best guess, we *know* that within
@ -1170,7 +1182,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
 def pickle_doc(doc):
-    bytes_data = doc.to_bytes(vocab=False, user_data=False)
+    bytes_data = doc.to_bytes(exclude=["vocab", "user_data"])
    hooks_and_data = (doc.user_data, doc.user_hooks, doc.user_span_hooks,
                      doc.user_token_hooks)
    return (unpickle_doc, (doc.vocab, srsly.pickle_dumps(hooks_and_data), bytes_data))
@ -1179,7 +1191,7 @@ def pickle_doc(doc):
 def unpickle_doc(vocab, hooks_and_data, bytes_data):
    user_data, doc_hooks, span_hooks, token_hooks = srsly.pickle_loads(hooks_and_data)
-    doc = Doc(vocab, user_data=user_data).from_bytes(bytes_data, exclude="user_data")
+    doc = Doc(vocab, user_data=user_data).from_bytes(bytes_data, exclude=["user_data"])
    doc.user_hooks.update(doc_hooks)
    doc.user_span_hooks.update(span_hooks)
    doc.user_token_hooks.update(token_hooks)
--- a/spacy/util.py
+++ b/spacy/util.py
@ -25,7 +25,7 @@ except ImportError:
 from .symbols import ORTH
 from .compat import cupy, CudaStream, path2str, basestring_, unicode_
 from .compat import import_file
-from .errors import Errors
+from .errors import Errors, Warnings, deprecation_warning
 LANGUAGES = {}
@ -565,7 +565,8 @@ def itershuffle(iterable, bufsize=1000):
 def to_bytes(getters, exclude):
    serialized = OrderedDict()
    for key, getter in getters.items():
-        if key not in exclude:
+        # Split to support file names like meta.json
        if key.split(".")[0] not in exclude:
            serialized[key] = getter()
    return srsly.msgpack_dumps(serialized)
@ -573,7 +574,8 @@ def to_bytes(getters, exclude):
 def from_bytes(bytes_data, setters, exclude):
    msg = srsly.msgpack_loads(bytes_data)
    for key, setter in setters.items():
-        if key not in exclude and key in msg:
+        # Split to support file names like meta.json
        if key.split(".")[0] not in exclude and key in msg:
            setter(msg[key])
    return msg
@ -583,7 +585,8 @@ def to_disk(path, writers, exclude):
    if not path.exists():
        path.mkdir()
    for key, writer in writers.items():
-        if key not in exclude:
+        # Split to support file names like meta.json
        if key.split(".")[0] not in exclude:
            writer(path / key)
    return path
@ -591,7 +594,8 @@ def to_disk(path, writers, exclude):
 def from_disk(path, readers, exclude):
    path = ensure_path(path)
    for key, reader in readers.items():
-        if key not in exclude:
+        # Split to support file names like meta.json
        if key.split(".")[0] not in exclude:
            reader(path / key)
    return path
@ -677,6 +681,23 @@ def validate_json(data, validator):
    return errors
 def get_serialization_exclude(serializers, exclude, kwargs):
    """Helper function to validate serialization args and manage transition from
    keyword arguments (pre v2.1) to exclude argument.
    """
    exclude = list(exclude)
    # Split to support file names like meta.json
    options = [name.split(".")[0] for name in serializers]
    for key, value in kwargs.items():
        if key in ("vocab",) and value is False:
            deprecation_warning(Warnings.W015.format(arg=key))
            exclude.append(key)
        elif key.split(".")[0] in options:
            raise ValueError(Errors.E128.format(arg=key))
        # TODO: user warning?
    return exclude
 class SimpleFrozenDict(dict):
    """Simplified implementation of a frozen dict, mainly used as default
    function or method argument (for arguments that should default to empty
@ -696,14 +717,14 @@ class SimpleFrozenDict(dict):
 class DummyTokenizer(object):
    # add dummy methods for to_bytes, from_bytes, to_disk and from_disk to
    # allow serialization (see #1557)
-    def to_bytes(self, **exclude):
+    def to_bytes(self, **kwargs):
        return b""
-    def from_bytes(self, _bytes_data, **exclude):
+    def from_bytes(self, _bytes_data, **kwargs):
        return self
-    def to_disk(self, _path, **exclude):
+    def to_disk(self, _path, **kwargs):
        return None
-    def from_disk(self, _path, **exclude):
+    def from_disk(self, _path, **kwargs):
        return self
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@ -377,11 +377,11 @@ cdef class Vectors:
                self.add(key, row=i)
        return strings
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.
        path (unicode / Path): A path to a directory, which will be created if
-            it doesn't exists. Either a string or a Path-like object.
+            it doesn't exists.
        DOCS: https://spacy.io/api/vectors#to_disk
        """
@ -394,9 +394,9 @@ cdef class Vectors:
            ("vectors", lambda p: save_array(self.data, p.open("wb"))),
            ("key2row", lambda p: srsly.write_msgpack(p, self.key2row))
        ))
-        return util.to_disk(path, serializers, exclude)
+        return util.to_disk(path, serializers, [])
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.
@ -428,13 +428,13 @@ cdef class Vectors:
            ("keys", load_keys),
            ("vectors", load_vectors),
        ))
-        util.from_disk(path, serializers, exclude)
+        util.from_disk(path, serializers, [])
        return self
-    def to_bytes(self, **exclude):
+    def to_bytes(self, **kwargs):
        """Serialize the current state to a binary string.
-        **exclude: Named attributes to prevent from being serialized.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Vectors` object.
        DOCS: https://spacy.io/api/vectors#to_bytes
@ -444,17 +444,18 @@ cdef class Vectors:
                return self.data.to_bytes()
            else:
                return srsly.msgpack_dumps(self.data)
        serializers = OrderedDict((
            ("key2row", lambda: srsly.msgpack_dumps(self.key2row)),
            ("vectors", serialize_weights)
        ))
-        return util.to_bytes(serializers, exclude)
+        return util.to_bytes(serializers, [])
-    def from_bytes(self, data, **exclude):
+    def from_bytes(self, data, **kwargs):
        """Load state from a binary string.
        data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Vectors): The `Vectors` object.
        DOCS: https://spacy.io/api/vectors#from_bytes
@ -469,5 +470,5 @@ cdef class Vectors:
            ("key2row", lambda b: self.key2row.update(srsly.msgpack_loads(b))),
            ("vectors", deserialize_weights)
        ))
-        util.from_bytes(data, deserializers, exclude)
+        util.from_bytes(data, deserializers, [])
        return self
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -397,47 +397,57 @@ cdef class Vocab:
            orth = self.strings.add(orth)
        return orth in self.vectors
-    def to_disk(self, path, **exclude):
+    def to_disk(self, path, exclude=tuple(), **kwargs):
        """Save the current state to a directory.
        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or Path-like objects.
+            it doesn't exist.
        exclude (list): String names of serialization fields to exclude.
        DOCS: https://spacy.io/api/vocab#to_disk
        """
        path = util.ensure_path(path)
        if not path.exists():
            path.mkdir()
-        self.strings.to_disk(path / "strings.json")
+        setters = ["strings", "lexemes", "vectors"]
-        with (path / "lexemes.bin").open('wb') as file_:
+        exclude = util.get_serialization_exclude(setters, exclude, kwargs)
-            file_.write(self.lexemes_to_bytes())
+        if "strings" not in exclude:
-        if self.vectors is not None:
+            self.strings.to_disk(path / "strings.json")
        if "lexemes" not in exclude:
            with (path / "lexemes.bin").open("wb") as file_:
                file_.write(self.lexemes_to_bytes())
        if "vectors" not in "exclude" and self.vectors is not None:
            self.vectors.to_disk(path)
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, exclude=tuple(), **kwargs):
        """Loads state from a directory. Modifies the object in place and
        returns it.
-        path (unicode or Path): A path to a directory. Paths may be either
+        path (unicode or Path): A path to a directory.
-            strings or `Path`-like objects.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Vocab): The modified `Vocab` object.
        DOCS: https://spacy.io/api/vocab#to_disk
        """
        path = util.ensure_path(path)
-        self.strings.from_disk(path / "strings.json")
+        getters = ["strings", "lexemes", "vectors"]
-        with (path / "lexemes.bin").open("rb") as file_:
+        exclude = util.get_serialization_exclude(getters, exclude, kwargs)
-            self.lexemes_from_bytes(file_.read())
+        if "strings" not in exclude:
-        if self.vectors is not None:
+            self.strings.from_disk(path / "strings.json")  # TODO: add exclude?
-            self.vectors.from_disk(path, exclude="strings.json")
+        if "lexemes" not in exclude:
-        if self.vectors.name is not None:
+            with (path / "lexemes.bin").open("rb") as file_:
-            link_vectors_to_models(self)
+                self.lexemes_from_bytes(file_.read())
        if "vectors" not in exclude:
            if self.vectors is not None:
                self.vectors.from_disk(path, exclude=["strings"])
            if self.vectors.name is not None:
                link_vectors_to_models(self)
        return self
-    def to_bytes(self, **exclude):
+    def to_bytes(self, exclude=tuple(), **kwargs):
        """Serialize the current state to a binary string.
-        **exclude: Named attributes to prevent from being serialized.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Vocab` object.
        DOCS: https://spacy.io/api/vocab#to_bytes
@ -453,13 +463,14 @@ cdef class Vocab:
            ("lexemes", lambda: self.lexemes_to_bytes()),
            ("vectors", deserialize_vectors)
        ))
        exclude = util.get_serialization_exclude(getters, exclude, kwargs)
        return util.to_bytes(getters, exclude)
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Load state from a binary string.
        bytes_data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
+        exclude (list): String names of serialization fields to exclude.
        RETURNS (Vocab): The `Vocab` object.
        DOCS: https://spacy.io/api/vocab#from_bytes
@ -469,11 +480,13 @@ cdef class Vocab:
                return None
            else:
                return self.vectors.from_bytes(b)
        setters = OrderedDict((
            ("strings", lambda b: self.strings.from_bytes(b)),
            ("lexemes", lambda b: self.lexemes_from_bytes(b)),
            ("vectors", lambda b: serialize_vectors(b))
        ))
        exclude = util.get_serialization_exclude(setters, exclude, kwargs)
        util.from_bytes(bytes_data, setters, exclude)
        if self.vectors.name is not None:
            link_vectors_to_models(self)
--- a/website/docs/api/dependencyparser.md
+++ b/website/docs/api/dependencyparser.md
@ -244,9 +244,10 @@ Serialize the pipe to disk.
 > parser.to_disk("/path/to/parser")
 > ```
-| Name   | Type             | Description                                                                                                           |
+| Name      | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 ## DependencyParser.from_disk {#from_disk tag="method"}
@ -262,6 +263,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type               | Description                                                                |
 | ----------- | ------------------ | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path`   | A path to a directory. Paths may be either strings or `Path`-like objects. |
 | `exclude`   | list               | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `DependencyParser` | The modified `DependencyParser` object.                                    |
 ## DependencyParser.to_bytes {#to_bytes tag="method"}
@ -275,10 +277,10 @@ Load the pipe from disk. Modifies the object in place and returns it.
 Serialize the pipe to a bytestring.
-| Name        | Type  | Description                                           |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | ----------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized.    |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `DependencyParser` object. |
+| **RETURNS** | bytes | The serialized form of the `DependencyParser` object.                     |
 ## DependencyParser.from_bytes {#from_bytes tag="method"}
@ -292,11 +294,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > parser.from_bytes(parser_bytes)
 > ```
-| Name         | Type               | Description                                    |
+| Name         | Type               | Description                                                               |
-| ------------ | ------------------ | ---------------------------------------------- |
+| ------------ | ------------------ | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes              | The data to load from.                         |
+| `bytes_data` | bytes              | The data to load from.                                                    |
-| `**exclude`  | -                  | Named attributes to prevent from being loaded. |
+| `exclude`    | list               | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `DependencyParser` | The `DependencyParser` object.                 |
+| **RETURNS**  | `DependencyParser` | The `DependencyParser` object.                                            |
 ## DependencyParser.labels {#labels tag="property"}
@ -312,3 +314,21 @@ The labels currently added to the component.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = parser.to_disk("/path", exclude=["vocab"])
 > ```
 | Name    | Description                                                    |
 | ------- | -------------------------------------------------------------- |
 | `vocab` | The shared [`Vocab`](/api/vocab).                              |
 | `cfg`   | The config file. You usually don't want to exclude this.       |
 | `model` | The binary model data. You usually don't want to exclude this. |
--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@ -349,11 +349,12 @@ array of attributes.
 > assert doc[0].pos_ == doc2[0].pos_
 > ```
-| Name        | Type                                   | Description                   |
+| Name        | Type                                   | Description                                                               |
-| ----------- | -------------------------------------- | ----------------------------- |
+| ----------- | -------------------------------------- | ------------------------------------------------------------------------- |
-| `attrs`     | list                                   | A list of attribute ID ints.  |
+| `attrs`     | list                                   | A list of attribute ID ints.                                              |
-| `array`     | `numpy.ndarray[ndim=2, dtype='int32']` | The attribute values to load. |
+| `array`     | `numpy.ndarray[ndim=2, dtype='int32']` | The attribute values to load.                                             |
-| **RETURNS** | `Doc`                                  | Itself.                       |
+| `exclude`   | list                                   | String names of [serialization fields](#serialization-fields) to exclude. |
 | **RETURNS** | `Doc`                                  | Itself.                                                                   |
 ## Doc.to_disk {#to_disk tag="method" new="2"}
@ -365,9 +366,10 @@ Save the current state to a directory.
 > doc.to_disk("/path/to/doc")
 > ```
-| Name   | Type             | Description                                                                                                           |
+| Name      | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 ## Doc.from_disk {#from_disk tag="method" new="2"}
@ -384,6 +386,7 @@ Loads state from a directory. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
 | `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Doc`            | The modified `Doc` object.                                                 |
 ## Doc.to_bytes {#to_bytes tag="method"}
@ -397,9 +400,10 @@ Serialize, i.e. export the document contents to a binary string.
 > doc_bytes = doc.to_bytes()
 > ```
-| Name        | Type  | Description                                                           |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | --------------------------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| **RETURNS** | bytes | A losslessly serialized copy of the `Doc`, including all annotations. |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
 | **RETURNS** | bytes | A losslessly serialized copy of the `Doc`, including all annotations.     |
 ## Doc.from_bytes {#from_bytes tag="method"}
@ -416,10 +420,11 @@ Deserialize, i.e. import the document contents from a binary string.
 > assert doc.text == doc2.text
 > ```
-| Name        | Type  | Description              |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | ------------------------ |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| `data`      | bytes | The string to load from. |
+| `data`      | bytes | The string to load from.                                                  |
-| **RETURNS** | `Doc` | The `Doc` object.        |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
 | **RETURNS** | `Doc` | The `Doc` object.                                                         |
 ## Doc.retokenize {#retokenize tag="contextmanager" new="2.1"}
@ -658,3 +663,25 @@ The L2 norm of the document's vector representation.
 | `user_token_hooks`                      | dict         | A dictionary that allows customization of properties of `Token` children.                                                                                                                                                                                                                  |
 | `user_span_hooks`                       | dict         | A dictionary that allows customization of properties of `Span` children.                                                                                                                                                                                                                   |
 | `_`                                     | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                                                                                                                             |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = doc.to_bytes(exclude=["text", "tensor"])
 > doc.from_disk("./doc.bin", exclude=["user_data"])
 > ```
 | Name               | Description                                   |
 | ------------------ | --------------------------------------------- |
 | `text`             | The value of the `Doc.text` attribute.        |
 | `sentiment`        | The value of the `Doc.sentiment` attribute.   |
 | `tensor`           | The value of the `Doc.tensor` attribute.      |
 | `user_data`        | The value of the `Doc.user_data` dictionary.  |
 | `user_data_keys`   | The keys of the `Doc.user_data` dictionary.   |
 | `user_data_values` | The values of the `Doc.user_data` dictionary. |
--- a/website/docs/api/entityrecognizer.md
+++ b/website/docs/api/entityrecognizer.md
@ -244,9 +244,10 @@ Serialize the pipe to disk.
 > ner.to_disk("/path/to/ner")
 > ```
-| Name   | Type             | Description                                                                                                           |
+| Name      | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 ## EntityRecognizer.from_disk {#from_disk tag="method"}
@ -262,6 +263,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type               | Description                                                                |
 | ----------- | ------------------ | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path`   | A path to a directory. Paths may be either strings or `Path`-like objects. |
 | `exclude`   | list               | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `EntityRecognizer` | The modified `EntityRecognizer` object.                                    |
 ## EntityRecognizer.to_bytes {#to_bytes tag="method"}
@ -275,10 +277,10 @@ Load the pipe from disk. Modifies the object in place and returns it.
 Serialize the pipe to a bytestring.
-| Name        | Type  | Description                                           |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | ----------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized.    |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `EntityRecognizer` object. |
+| **RETURNS** | bytes | The serialized form of the `EntityRecognizer` object.                     |
 ## EntityRecognizer.from_bytes {#from_bytes tag="method"}
@ -292,11 +294,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > ner.from_bytes(ner_bytes)
 > ```
-| Name         | Type               | Description                                    |
+| Name         | Type               | Description                                                               |
-| ------------ | ------------------ | ---------------------------------------------- |
+| ------------ | ------------------ | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes              | The data to load from.                         |
+| `bytes_data` | bytes              | The data to load from.                                                    |
-| `**exclude`  | -                  | Named attributes to prevent from being loaded. |
+| `exclude`    | list               | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `EntityRecognizer` | The `EntityRecognizer` object.                 |
+| **RETURNS**  | `EntityRecognizer` | The `EntityRecognizer` object.                                            |
 ## EntityRecognizer.labels {#labels tag="property"}
@ -312,3 +314,21 @@ The labels currently added to the component.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = ner.to_disk("/path", exclude=["vocab"])
 > ```
 | Name    | Description                                                    |
 | ------- | -------------------------------------------------------------- |
 | `vocab` | The shared [`Vocab`](/api/vocab).                              |
 | `cfg`   | The config file. You usually don't want to exclude this.       |
 | `model` | The binary model data. You usually don't want to exclude this. |
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -327,7 +327,7 @@ the model**.
 | Name      | Type             | Description                                                                                                           |
 | --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
 | `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `disable` | list             | Names of pipeline components to [disable](/usage/processing-pipelines#disabling) and prevent from being saved.        |
+| `exclude` | list             | Names of pipeline components or [serialization fields](#serialization-fields) to exclude.                             |
 ## Language.from_disk {#from_disk tag="method" new="2"}
@ -349,22 +349,22 @@ loaded object.
 > nlp = English().from_disk("/path/to/en_model")
 > ```
-| Name        | Type             | Description                                                                       |
+| Name        | Type             | Description                                                                               |
-| ----------- | ---------------- | --------------------------------------------------------------------------------- |
+| ----------- | ---------------- | ----------------------------------------------------------------------------------------- |
-| `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects.        |
+| `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects.                |
-| `disable`   | list             | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). |
+| `exclude`   | list             | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | `Language`       | The modified `Language` object.                                                   |
+| **RETURNS** | `Language`       | The modified `Language` object.                                                           |
 <Infobox title="Changed in v2.0" variant="warning">
 As of spaCy v2.0, the `save_to_directory` method has been renamed to `to_disk`,
 to improve consistency across classes. Pipeline components to prevent from being
-loaded can now be added as a list to `disable`, instead of specifying one
+loaded can now be added as a list to `disable` (v2.0) or `exclude` (v2.1),
-keyword argument per component.
+instead of specifying one keyword argument per component.
 ```diff
 - nlp = spacy.load("en", tagger=False, entity=False)
-+ nlp = English().from_disk("/model", disable=["tagger', 'ner"])
+ nlp = English().from_disk("/model", exclude=["tagger", "ner"])
 ```
 </Infobox>
@ -379,10 +379,10 @@ Serialize the current state to a binary string.
 > nlp_bytes = nlp.to_bytes()
 > ```
-| Name        | Type  | Description                                                                                                         |
+| Name        | Type  | Description                                                                               |
-| ----------- | ----- | ------------------------------------------------------------------------------------------------------------------- |
+| ----------- | ----- | ----------------------------------------------------------------------------------------- |
-| `disable`   | list  | Names of pipeline components to [disable](/usage/processing-pipelines#disabling) and prevent from being serialized. |
+| `exclude`   | list  | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `Language` object.                                                                       |
+| **RETURNS** | bytes | The serialized form of the `Language` object.                                             |
 ## Language.from_bytes {#from_bytes tag="method"}
@ -400,20 +400,21 @@ available to the loaded object.
 > nlp2.from_bytes(nlp_bytes)
 > ```
-| Name         | Type       | Description                                                                       |
+| Name         | Type       | Description                                                                               |
-| ------------ | ---------- | --------------------------------------------------------------------------------- |
+| ------------ | ---------- | ----------------------------------------------------------------------------------------- |
-| `bytes_data` | bytes      | The data to load from.                                                            |
+| `bytes_data` | bytes      | The data to load from.                                                                    |
-| `disable`    | list       | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). |
+| `exclude`    | list       | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Language` | The `Language` object.                                                            |
+| **RETURNS**  | `Language` | The `Language` object.                                                                    |
 <Infobox title="Changed in v2.0" variant="warning">
 Pipeline components to prevent from being loaded can now be added as a list to
-`disable`, instead of specifying one keyword argument per component.
+`disable` (v2.0) or `exclude` (v2.1), instead of specifying one keyword argument
 per component.
 ```diff
 - nlp = English().from_bytes(bytes, tagger=False, entity=False)
-+ nlp = English().from_bytes(bytes, disable=["tagger", "ner"])
+ nlp = English().from_bytes(bytes, exclude=["tagger", "ner"])
 ```
 </Infobox>
@ -437,3 +438,23 @@ Pipeline components to prevent from being loaded can now be added as a list to
 | `Defaults`                             | class   | Settings, data and factory methods for creating the `nlp` object and processing pipeline.                                           |
 | `lang`                                 | unicode | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).                                     |
 | `factories` <Tag variant="new">2</Tag> | dict    | Factories that create pre-defined pipeline components, e.g. the tagger, parser or entity recognizer, keyed by their component name. |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = nlp.to_bytes(exclude=["tokenizer", "vocab"])
 > nlp.from_disk("./model-data", exclude=["ner"])
 > ```
 | Name        | Description                                        |
 | ----------- | -------------------------------------------------- |
 | `vocab`     | The shared [`Vocab`](/api/vocab).                  |
 | `tokenizer` | Tokenization rules and exceptions.                 |
 | `meta`      | The meta data, available as `Language.meta`.       |
 | ...         | String names of pipeline components, e.g. `"ner"`. |
--- a/website/docs/api/stringstore.md
+++ b/website/docs/api/stringstore.md
@ -151,10 +151,9 @@ Serialize the current state to a binary string.
 > store_bytes = stringstore.to_bytes()
 > ```
-| Name        | Type  | Description                                        |
+| Name        | Type  | Description                                      |
-| ----------- | ----- | -------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------ |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
+| **RETURNS** | bytes | The serialized form of the `StringStore` object. |
 | **RETURNS** | bytes | The serialized form of the `StringStore` object.   |
 ## StringStore.from_bytes {#from_bytes tag="method"}
@ -168,11 +167,10 @@ Load state from a binary string.
 > new_store = StringStore().from_bytes(store_bytes)
 > ```
-| Name         | Type          | Description                                    |
+| Name         | Type          | Description               |
-| ------------ | ------------- | ---------------------------------------------- |
+| ------------ | ------------- | ------------------------- |
-| `bytes_data` | bytes         | The data to load from.                         |
+| `bytes_data` | bytes         | The data to load from.    |
-| `**exclude`  | -             | Named attributes to prevent from being loaded. |
+| **RETURNS**  | `StringStore` | The `StringStore` object. |
 | **RETURNS**  | `StringStore` | The `StringStore` object.                      |
 ## Utilities {#util}
--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -244,9 +244,10 @@ Serialize the pipe to disk.
 > tagger.to_disk("/path/to/tagger")
 > ```
-| Name   | Type             | Description                                                                                                           |
+| Name      | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 ## Tagger.from_disk {#from_disk tag="method"}
@ -262,6 +263,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
 | `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Tagger`         | The modified `Tagger` object.                                              |
 ## Tagger.to_bytes {#to_bytes tag="method"}
@ -275,10 +277,10 @@ Load the pipe from disk. Modifies the object in place and returns it.
 Serialize the pipe to a bytestring.
-| Name        | Type  | Description                                        |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | -------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `Tagger` object.        |
+| **RETURNS** | bytes | The serialized form of the `Tagger` object.                               |
 ## Tagger.from_bytes {#from_bytes tag="method"}
@ -292,11 +294,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > tagger.from_bytes(tagger_bytes)
 > ```
-| Name         | Type     | Description                                    |
+| Name         | Type     | Description                                                               |
-| ------------ | -------- | ---------------------------------------------- |
+| ------------ | -------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes    | The data to load from.                         |
+| `bytes_data` | bytes    | The data to load from.                                                    |
-| `**exclude`  | -        | Named attributes to prevent from being loaded. |
+| `exclude`    | list     | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Tagger` | The `Tagger` object.                           |
+| **RETURNS**  | `Tagger` | The `Tagger` object.                                                      |
 ## Tagger.labels {#labels tag="property"}
@ -314,3 +316,22 @@ tags by default, e.g. `VERB`, `NOUN` and so on.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = tagger.to_disk("/path", exclude=["vocab"])
 > ```
 | Name      | Description                                                                                |
 | --------- | ------------------------------------------------------------------------------------------ |
 | `vocab`   | The shared [`Vocab`](/api/vocab).                                                          |
 | `cfg`     | The config file. You usually don't want to exclude this.                                   |
 | `model`   | The binary model data. You usually don't want to exclude this.                             |
 | `tag_map` | The [tag map](/usage/adding-languages#tag-map) mapping fine-grained to coarse-grained tag. |
--- a/website/docs/api/textcategorizer.md
+++ b/website/docs/api/textcategorizer.md
@ -260,9 +260,10 @@ Serialize the pipe to disk.
 > textcat.to_disk("/path/to/textcat")
 > ```
-| Name   | Type             | Description                                                                                                           |
+| Name      | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 ## TextCategorizer.from_disk {#from_disk tag="method"}
@ -278,6 +279,7 @@ Load the pipe from disk. Modifies the object in place and returns it.
 | Name        | Type              | Description                                                                |
 | ----------- | ----------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path`  | A path to a directory. Paths may be either strings or `Path`-like objects. |
 | `exclude`   | list              | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `TextCategorizer` | The modified `TextCategorizer` object.                                     |
 ## TextCategorizer.to_bytes {#to_bytes tag="method"}
@ -291,10 +293,10 @@ Load the pipe from disk. Modifies the object in place and returns it.
 Serialize the pipe to a bytestring.
-| Name        | Type  | Description                                          |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | ---------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized.   |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `TextCategorizer` object. |
+| **RETURNS** | bytes | The serialized form of the `TextCategorizer` object.                      |
 ## TextCategorizer.from_bytes {#from_bytes tag="method"}
@ -308,11 +310,11 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > textcat.from_bytes(textcat_bytes)
 > ```
-| Name         | Type              | Description                                    |
+| Name         | Type              | Description                                                               |
-| ------------ | ----------------- | ---------------------------------------------- |
+| ------------ | ----------------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes             | The data to load from.                         |
+| `bytes_data` | bytes             | The data to load from.                                                    |
-| `**exclude`  | -                 | Named attributes to prevent from being loaded. |
+| `exclude`    | list              | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `TextCategorizer` | The `TextCategorizer` object.                  |
+| **RETURNS**  | `TextCategorizer` | The `TextCategorizer` object.                                             |
 ## TextCategorizer.labels {#labels tag="property"}
@ -328,3 +330,21 @@ The labels currently added to the component.
 | Name        | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | **RETURNS** | tuple | The labels added to the component. |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = textcat.to_disk("/path", exclude=["vocab"])
 > ```
 | Name    | Description                                                    |
 | ------- | -------------------------------------------------------------- |
 | `vocab` | The shared [`Vocab`](/api/vocab).                              |
 | `cfg`   | The config file. You usually don't want to exclude this.       |
 | `model` | The binary model data. You usually don't want to exclude this. |
--- a/website/docs/api/tokenizer.md
+++ b/website/docs/api/tokenizer.md
@ -127,9 +127,10 @@ Serialize the tokenizer to disk.
 > tokenizer.to_disk("/path/to/tokenizer")
 > ```
-| Name   | Type             | Description                                                                                                           |
+| Name      | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 ## Tokenizer.from_disk {#from_disk tag="method"}
@ -145,6 +146,7 @@ Load the tokenizer from disk. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
 | `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Tokenizer`      | The modified `Tokenizer` object.                                           |
 ## Tokenizer.to_bytes {#to_bytes tag="method"}
@ -158,10 +160,10 @@ Load the tokenizer from disk. Modifies the object in place and returns it.
 Serialize the tokenizer to a bytestring.
-| Name        | Type  | Description                                        |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | -------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `Tokenizer` object.     |
+| **RETURNS** | bytes | The serialized form of the `Tokenizer` object.                            |
 ## Tokenizer.from_bytes {#from_bytes tag="method"}
@ -176,11 +178,11 @@ it.
 > tokenizer.from_bytes(tokenizer_bytes)
 > ```
-| Name         | Type        | Description                                    |
+| Name         | Type        | Description                                                               |
-| ------------ | ----------- | ---------------------------------------------- |
+| ------------ | ----------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes       | The data to load from.                         |
+| `bytes_data` | bytes       | The data to load from.                                                    |
-| `**exclude`  | -           | Named attributes to prevent from being loaded. |
+| `exclude`    | list        | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Tokenizer` | The `Tokenizer` object.                        |
+| **RETURNS**  | `Tokenizer` | The `Tokenizer` object.                                                   |
 ## Attributes {#attributes}
@ -190,3 +192,25 @@ it.
 | `prefix_search`  | -       | A function to find segment boundaries from the start of a string. Returns the length of the segment, or `None`.            |
 | `suffix_search`  | -       | A function to find segment boundaries from the end of a string. Returns the length of the segment, or `None`.              |
 | `infix_finditer` | -       | A function to find internal segment separators, e.g. hyphens. Returns a (possibly empty) list of `re.MatchObject` objects. |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = tokenizer.to_bytes(exclude=["vocab", "exceptions"])
 > tokenizer.from_disk("./data", exclude=["token_match"])
 > ```
 | Name             | Description                       |
 | ---------------- | --------------------------------- |
 | `vocab`          | The shared [`Vocab`](/api/vocab). |
 | `prefix_search`  | The prefix rules.                 |
 | `suffix_search`  | The suffix rules.                 |
 | `infix_finditer` | The infix rules.                  |
 | `token_match`    | The token match expression.       |
 | `exceptions`     | The tokenizer exception rules.    |
--- a/website/docs/api/vectors.md
+++ b/website/docs/api/vectors.md
@ -311,10 +311,9 @@ Save the current state to a directory.
 >
 > ```
-| Name        | Type             | Description                                                                                                           |
+| Name   | Type             | Description                                                                                                           |
-| ----------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`      | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `**exclude` | -                | Named attributes to prevent from being saved.                                                                         |
 ## Vectors.from_disk {#from_disk tag="method"}
@ -342,10 +341,9 @@ Serialize the current state to a binary string.
 > vectors_bytes = vectors.to_bytes()
 > ```
-| Name        | Type  | Description                                        |
+| Name        | Type  | Description                                  |
-| ----------- | ----- | -------------------------------------------------- |
+| ----------- | ----- | -------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
+| **RETURNS** | bytes | The serialized form of the `Vectors` object. |
 | **RETURNS** | bytes | The serialized form of the `Vectors` object.       |
 ## Vectors.from_bytes {#from_bytes tag="method"}
@ -360,11 +358,10 @@ Load state from a binary string.
 > new_vectors.from_bytes(vectors_bytes)
 > ```
-| Name        | Type      | Description                                    |
+| Name        | Type      | Description            |
-| ----------- | --------- | ---------------------------------------------- |
+| ----------- | --------- | ---------------------- |
-| `data`      | bytes     | The data to load from.                         |
+| `data`      | bytes     | The data to load from. |
-| `**exclude` | -         | Named attributes to prevent from being loaded. |
+| **RETURNS** | `Vectors` | The `Vectors` object.  |
 | **RETURNS** | `Vectors` | The `Vectors` object.                          |
 ## Attributes {#attributes}
--- a/website/docs/api/vocab.md
+++ b/website/docs/api/vocab.md
@ -221,9 +221,10 @@ Save the current state to a directory.
 > nlp.vocab.to_disk("/path/to/vocab")
 > ```
-| Name   | Type             | Description                                                                                                           |
+| Name      | Type             | Description                                                                                                           |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
+| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path` | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 ## Vocab.from_disk {#from_disk tag="method" new="2"}
@ -239,6 +240,7 @@ Loads state from a directory. Modifies the object in place and returns it.
 | Name        | Type             | Description                                                                |
 | ----------- | ---------------- | -------------------------------------------------------------------------- |
 | `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
 | `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
 | **RETURNS** | `Vocab`          | The modified `Vocab` object.                                               |
 ## Vocab.to_bytes {#to_bytes tag="method"}
@ -251,10 +253,10 @@ Serialize the current state to a binary string.
 > vocab_bytes = nlp.vocab.to_bytes()
 > ```
-| Name        | Type  | Description                                        |
+| Name        | Type  | Description                                                               |
-| ----------- | ----- | -------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------------------- |
-| `**exclude` | -     | Named attributes to prevent from being serialized. |
+| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `Vocab` object.         |
+| **RETURNS** | bytes | The serialized form of the `Vocab` object.                                |
 ## Vocab.from_bytes {#from_bytes tag="method"}
@ -269,11 +271,11 @@ Load state from a binary string.
 > vocab.from_bytes(vocab_bytes)
 > ```
-| Name         | Type    | Description                                    |
+| Name         | Type    | Description                                                               |
-| ------------ | ------- | ---------------------------------------------- |
+| ------------ | ------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes   | The data to load from.                         |
+| `bytes_data` | bytes   | The data to load from.                                                    |
-| `**exclude`  | -       | Named attributes to prevent from being loaded. |
+| `exclude`    | list    | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Vocab` | The `Vocab` object.                            |
+| **RETURNS**  | `Vocab` | The `Vocab` object.                                                       |
 ## Attributes {#attributes}
@ -291,3 +293,22 @@ Load state from a binary string.
 | `strings`                            | `StringStore` | A table managing the string-to-int mapping.   |
 | `vectors` <Tag variant="new">2</Tag> | `Vectors`     | A table associating word IDs to word vectors. |
 | `vectors_length`                     | int           | Number of dimensions for each word vector.    |
 ## Serialization fields {#serialization-fields}
 During serialization, spaCy will export several data fields used to restore
 different aspects of the object. If needed, you can exclude them from
 serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
 > data = vocab.to_bytes(exclude=["strings", "vectors"])
 > vocab.from_disk("./vocab", exclude=["strings"])
 > ```
 | Name      | Description                                           |
 | --------- | ----------------------------------------------------- |
 | `strings` | The strings in the [`StringStore`](/api/stringstore). |
 | `lexemes` | The lexeme data.                                      |
 | `vectors` | The word vectors, if available.                       |