From f37863093a8c329d9e6e318f36fe7d0ca1cefdf6 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Mon, 3 Dec 2018 01:28:22 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=92=AB=20Replace=20ujson,=20msgpack=20and?=
 =?UTF-8?q?=20dill/pickle/cloudpickle=20with=20srsly=20(#3003)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove hacks and wrappers, keep code in sync across our libraries and move spaCy a few steps closer to only depending on packages with binary wheels 🎉

See here: https://github.com/explosion/srsly

    Serialization is hard, especially across Python versions and multiple platforms. After dealing with many subtle bugs over the years (encodings, locales, large files) our libraries like spaCy and Prodigy have steadily grown a number of utility functions to wrap the multiple serialization formats we need to support (especially json, msgpack and pickle). These wrapping functions ended up duplicated across our codebases, so we wanted to put them in one place.

    At the same time, we noticed that having a lot of small dependencies was making maintainence harder, and making installation slower. To solve this, we've made srsly standalone, by including the component packages directly within it. This way we can provide all the serialization utilities we need in a single binary wheel.

    srsly currently includes forks of the following packages:

        ujson
        msgpack
        msgpack-numpy
        cloudpickle


* WIP: replace json/ujson with srsly

* Replace ujson in examples

Use regular json instead of srsly to make code easier to read and follow

* Update requirements

* Fix imports

* Fix typos

* Replace msgpack with srsly

* Fix warning
---
 CONTRIBUTING.md                               |  3 +-
 bin/load_reddit.py                            |  6 +-
 .../information_extraction/phrase_matcher.py  |  6 +-
 examples/keras_parikh_entailment/__main__.py  |  2 +-
 .../notebooks/Decompositional Attention.ipynb |  2 +-
 requirements.txt                              |  2 +-
 setup.py                                      |  2 +-
 spacy/cli/convert.py                          | 13 ++--
 spacy/cli/converters/jsonl2json.py            |  4 +-
 spacy/cli/debug_data.py                       |  7 +-
 spacy/cli/info.py                             |  3 +-
 spacy/cli/init_model.py                       |  5 +-
 spacy/cli/package.py                          |  7 +-
 spacy/cli/pretrain.py                         | 17 ++---
 spacy/cli/profile.py                          |  4 +-
 spacy/cli/schemas/__init__.py                 |  4 +-
 spacy/cli/train.py                            | 13 ++--
 spacy/cli/ud/ud_run_test.py                   |  7 +-
 spacy/cli/validate.py                         |  5 +-
 spacy/compat.py                               |  7 --
 spacy/gold.pyx                                | 16 ++--
 spacy/language.py                             | 12 +--
 spacy/pipeline.pyx                            | 43 +++++------
 spacy/strings.pyx                             | 13 ++--
 spacy/syntax/_parser_model.pyx                | 15 ++--
 spacy/syntax/nn_parser.pyx                    | 13 ++--
 spacy/syntax/transition_system.pyx            | 11 ++-
 spacy/tests/util.py                           |  6 +-
 spacy/tokens/_serialize.py                    |  6 +-
 spacy/tokens/doc.pyx                          | 15 ++--
 spacy/util.py                                 | 74 ++-----------------
 spacy/vectors.pyx                             | 17 ++---
 website/api/_top-level/_compat.jade           |  8 +-
 33 files changed, 130 insertions(+), 238 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cb10a1718..22cad91d6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -292,10 +292,9 @@ for example to show more specific error messages, you can use the `is_config()`
 helper function.
 
 ```python
-from .compat import unicode_, json_dumps, is_config
+from .compat import unicode_, is_config
 
 compatible_unicode = unicode_('hello world')
-compatible_json = json_dumps({'key': 'value'})
 if is_config(windows=True, python2=True):
     print("You are using Python 2 on Windows.")
 ```
diff --git a/bin/load_reddit.py b/bin/load_reddit.py
index 5affa0fb5..507ce58c2 100644
--- a/bin/load_reddit.py
+++ b/bin/load_reddit.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import bz2
 import regex as re
-import ujson
+import srsly
 import sys
 import random
 import datetime
@@ -44,7 +44,7 @@ class Reddit(object):
                     line = line.strip()
                     if not line:
                         continue
-                    comment = ujson.loads(line)
+                    comment = srsly.json_loads(line)
                     if self.is_valid(comment):
                         text = self.strip_tags(comment["body"])
                         yield {"text": text}
@@ -75,7 +75,7 @@ class Reddit(object):
 def main(path):
     reddit = Reddit(path)
     for comment in reddit:
-        print(ujson.dumps(comment))
+        print(srsly.json_dumps(comment))
 
 
 if __name__ == "__main__":
diff --git a/examples/information_extraction/phrase_matcher.py b/examples/information_extraction/phrase_matcher.py
index b49cb88e8..3cdc9cc86 100644
--- a/examples/information_extraction/phrase_matcher.py
+++ b/examples/information_extraction/phrase_matcher.py
@@ -45,7 +45,7 @@ from __future__ import print_function, unicode_literals, division
 from bz2 import BZ2File
 import time
 import plac
-import ujson
+import json
 
 from spacy.matcher import PhraseMatcher
 import spacy
@@ -71,7 +71,7 @@ def main(patterns_loc, text_loc, n=10000, lang="en"):
 
 def read_gazetteer(tokenizer, loc, n=-1):
     for i, line in enumerate(open(loc)):
-        data = ujson.loads(line.strip())
+        data = json.loads(line.strip())
         phrase = tokenizer(data["text"])
         for w in phrase:
             _ = tokenizer.vocab[w.text]
@@ -82,7 +82,7 @@ def read_gazetteer(tokenizer, loc, n=-1):
 def read_text(bz2_loc, n=10000):
     with BZ2File(bz2_loc) as file_:
         for i, line in enumerate(file_):
-            data = ujson.loads(line)
+            data = json.loads(line)
             yield data["body"]
             if i >= n:
                 break
diff --git a/examples/keras_parikh_entailment/__main__.py b/examples/keras_parikh_entailment/__main__.py
index 7cd66a20c..14df8e3d4 100644
--- a/examples/keras_parikh_entailment/__main__.py
+++ b/examples/keras_parikh_entailment/__main__.py
@@ -1,5 +1,5 @@
 import numpy as np
-import ujson as json
+import json
 from keras.utils import to_categorical
 import plac
 import sys
diff --git a/examples/notebooks/Decompositional Attention.ipynb b/examples/notebooks/Decompositional Attention.ipynb
index b61dc9df7..8baaf7d33 100644
--- a/examples/notebooks/Decompositional Attention.ipynb	
+++ b/examples/notebooks/Decompositional Attention.ipynb	
@@ -77,7 +77,7 @@
     }
    ],
    "source": [
-    "import ujson as json\n",
+    "import json\n",
     "from keras.utils import to_categorical\n",
     "\n",
     "LABELS = {'entailment': 0, 'contradiction': 1, 'neutral': 2}\n",
diff --git a/requirements.txt b/requirements.txt
index 3d495277e..d68ac7a31 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,12 +6,12 @@ blis>=0.2.2,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 cytoolz>=0.9.0,<0.10.0
 plac<1.0.0,>=0.9.6
-ujson>=1.35
 dill>=0.2,<0.3
 regex==2018.01.10
 requests>=2.13.0,<3.0.0
 jsonschema>=2.6.0,<3.0.0
 wasabi>=0.0.8,<1.1.0
+srsly>=0.0.4,<1.1.0
 pathlib==1.0.1; python_version < "3.4"
 # Development dependencies
 cython>=0.25
diff --git a/setup.py b/setup.py
index 05d074f28..99ae655bb 100755
--- a/setup.py
+++ b/setup.py
@@ -203,12 +203,12 @@ def setup_package():
                 "thinc==7.0.0.dev4",
                 "blis>=0.2.2,<0.3.0",
                 "plac<1.0.0,>=0.9.6",
-                "ujson>=1.35",
                 "regex==2018.01.10",
                 "dill>=0.2,<0.3",
                 "requests>=2.13.0,<3.0.0",
                 "jsonschema>=2.6.0,<3.0.0",
                 "wasabi>=0.0.8,<1.1.0",
+                "srsly>=0.0.4,<1.1.0",
                 'pathlib==1.0.1; python_version < "3.4"',
             ],
             setup_requires=["wheel"],
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index b41b22036..a2c1d20e0 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -4,9 +4,9 @@ from __future__ import unicode_literals
 import plac
 from pathlib import Path
 from wasabi import Printer
+import srsly
 
-from ..util import write_jsonl, write_json
-from ..compat import json_dumps, path2str
+from ..compat import path2str
 from .converters import conllu2json, conllubio2json, iob2json, conll_ner2json
 from .converters import ner_jsonl2json
 from ._messages import Messages
@@ -77,9 +77,9 @@ def convert(
         suffix = ".{}".format(file_type)
         output_file = Path(output_dir) / Path(input_path.parts[-1]).with_suffix(suffix)
         if file_type == "json":
-            write_json(output_file, data)
+            srsly.write_json(output_file, data)
         elif file_type == "jsonl":
-            write_jsonl(output_file, data)
+            srsly.write_jsonl(output_file, data)
         msg.good(
             Messages.M032.format(name=path2str(output_file)),
             Messages.M033.format(n_docs=len(data)),
@@ -87,7 +87,6 @@ def convert(
     else:
         # Print to stdout
         if file_type == "json":
-            print(json_dumps(data))
+            srsly.write_json("-", data)
         elif file_type == "jsonl":
-            for line in data:
-                print(json_dumps(line))
+            srsly.write_jsonl("-", data)
diff --git a/spacy/cli/converters/jsonl2json.py b/spacy/cli/converters/jsonl2json.py
index 26fdca302..a281db86d 100644
--- a/spacy/cli/converters/jsonl2json.py
+++ b/spacy/cli/converters/jsonl2json.py
@@ -1,7 +1,7 @@
 # coding: utf8
 from __future__ import unicode_literals
 
-import ujson
+import srsly
 
 from ...util import get_lang_class
 from .._messages import Messages
@@ -11,7 +11,7 @@ def ner_jsonl2json(input_data, lang=None, n_sents=10, use_morphology=False):
     if lang is None:
         raise ValueError(Messages.M054)
     json_docs = []
-    input_tuples = [ujson.loads(line) for line in input_data]
+    input_tuples = [srsly.json_loads(line) for line in input_data]
     nlp = get_lang_class(lang)()
     for i, (raw_text, ents) in enumerate(input_tuples):
         doc = nlp.make_doc(raw_text)
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 5bf602828..06f648124 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -5,10 +5,11 @@ from pathlib import Path
 from collections import Counter
 import plac
 import sys
+import srsly
 from wasabi import Printer, MESSAGES
 
 from ..gold import GoldCorpus, read_json_object
-from ..util import load_model, get_lang_class, read_json, read_jsonl
+from ..util import load_model, get_lang_class
 
 # from .schemas import get_schema, validate_json
 from ._messages import Messages
@@ -320,11 +321,11 @@ def debug_data(
 def _load_file(file_path, msg):
     file_name = file_path.parts[-1]
     if file_path.suffix == ".json":
-        data = read_json(file_path)
+        data = srsly.read_json(file_path)
         msg.good("Loaded {}".format(file_name))
         return data
     elif file_path.suffix == ".jsonl":
-        data = read_jsonl(file_path)
+        data = srsly.read_jsonl(file_path)
         msg.good("Loaded {}".format(file_name))
         return data
     msg.fail(
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 5df9ddadb..7339faaab 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -5,6 +5,7 @@ import plac
 import platform
 from pathlib import Path
 from wasabi import Printer
+import srsly
 
 from ._messages import Messages
 from ..compat import path2str, basestring_, unicode_
@@ -32,7 +33,7 @@ def info(model=None, markdown=False, silent=False):
         meta_path = model_path / "meta.json"
         if not meta_path.is_file():
             msg.fail(Messages.M020, meta_path, exits=1)
-        meta = util.read_json(meta_path)
+        meta = srsly.read_json(meta_path)
         if model_path.resolve() != model_path:
             meta["link"] = path2str(model_path)
             meta["source"] = path2str(model_path.resolve())
diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index 4b3406ab0..8dc2a8cf2 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -11,12 +11,13 @@ from preshed.counter import PreshCounter
 import tarfile
 import gzip
 import zipfile
+import srsly
 from wasabi import Printer
 
 from ._messages import Messages
 from ..vectors import Vectors
 from ..errors import Errors, Warnings, user_warning
-from ..util import ensure_path, get_lang_class, read_jsonl
+from ..util import ensure_path, get_lang_class
 
 try:
     import ftfy
@@ -59,7 +60,7 @@ def init_model(
                 settings.append("-c")
             msg.warn(Messages.M063, Messages.M064)
         jsonl_loc = ensure_path(jsonl_loc)
-        lex_attrs = read_jsonl(jsonl_loc)
+        lex_attrs = srsly.read_jsonl(jsonl_loc)
     else:
         clusters_loc = ensure_path(clusters_loc)
         freqs_loc = ensure_path(freqs_loc)
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 84288ac72..916dbc1f2 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -5,9 +5,10 @@ import plac
 import shutil
 from pathlib import Path
 from wasabi import Printer, get_raw_input
+import srsly
 
 from ._messages import Messages
-from ..compat import path2str, json_dumps
+from ..compat import path2str
 from .. import util
 from .. import about
 
@@ -40,7 +41,7 @@ def package(input_dir, output_dir, meta_path=None, create_meta=False, force=Fals
 
     meta_path = meta_path or input_path / "meta.json"
     if meta_path.is_file():
-        meta = util.read_json(meta_path)
+        meta = srsly.read_json(meta_path)
         if not create_meta:  # only print if user doesn't want to overwrite
             msg.good(Messages.M041, meta_path)
         else:
@@ -64,7 +65,7 @@ def package(input_dir, output_dir, meta_path=None, create_meta=False, force=Fals
             )
     Path.mkdir(package_path, parents=True)
     shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
-    create_file(main_path / "meta.json", json_dumps(meta))
+    create_file(main_path / "meta.json", srsly.json_dumps(meta))
     create_file(main_path / "setup.py", TEMPLATE_SETUP)
     create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
     create_file(package_path / "__init__.py", TEMPLATE_INIT)
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 20d097047..70cab05c2 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -5,8 +5,6 @@ import plac
 import random
 import numpy
 import time
-import ujson
-import sys
 from collections import Counter
 from pathlib import Path
 from thinc.v2v import Affine, Maxout
@@ -14,10 +12,10 @@ from thinc.api import wrap
 from thinc.misc import LayerNorm as LN
 from thinc.neural.util import prefer_gpu
 from wasabi import Printer
+import srsly
 
 from ..tokens import Doc
 from ..attrs import ID, HEAD
-from ..compat import json_dumps
 from .._ml import Tok2Vec, flatten, chain, zero_init, create_default_optimizer
 from .. import util
 
@@ -72,7 +70,7 @@ def pretrain(
     if not output_dir.exists():
         output_dir.mkdir()
         msg.good("Created output directory")
-    util.write_json(output_dir / "config.json", config)
+    srsly.write_json(output_dir / "config.json", config)
     msg.good("Saved settings to config.json")
 
     # Load texts from file or stdin
@@ -81,12 +79,12 @@ def pretrain(
         if not texts_loc.exists():
             msg.fail("Input text file doesn't exist", texts_loc, exits=1)
         with msg.loading("Loading input texts..."):
-            texts = list(util.read_jsonl(texts_loc))
+            texts = list(srsly.read_jsonl(texts_loc))
         msg.good("Loaded input texts")
         random.shuffle(texts)
     else:  # reading from stdin
         msg.text("Reading input text from stdin...")
-        texts = stream_texts()
+        texts = srsly.read_jsonl("-")
 
     with msg.loading("Loading model '{}'...".format(vectors_model)):
         nlp = util.load_model(vectors_model)
@@ -130,18 +128,13 @@ def pretrain(
                 "epoch": epoch,
             }
             with (output_dir / "log.jsonl").open("a") as file_:
-                file_.write(json_dumps(log) + "\n")
+                file_.write(srsly.json_dumps(log) + "\n")
         tracker.epoch_loss = 0.0
         if texts_loc != "-":
             # Reshuffle the texts if texts were loaded from a file
             random.shuffle(texts)
 
 
-def stream_texts():
-    for line in sys.stdin:
-        yield ujson.loads(line)
-
-
 def make_update(model, docs, optimizer, drop=0.0):
     """Perform an update over a single batch of documents.
 
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index 506e55871..439ef79a1 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals, division, print_function
 
 import plac
 from pathlib import Path
-import ujson
+import srsly
 import cProfile
 import pstats
 import sys
@@ -64,6 +64,6 @@ def _read_inputs(loc, msg):
         msg.info("Using data from {}".format(input_path.parts[-1]))
         file_ = input_path.open()
     for line in file_:
-        data = ujson.loads(line)
+        data = srsly.json_loads(line)
         text = data["text"]
         yield text
diff --git a/spacy/cli/schemas/__init__.py b/spacy/cli/schemas/__init__.py
index f478c7a9a..c502c6493 100644
--- a/spacy/cli/schemas/__init__.py
+++ b/spacy/cli/schemas/__init__.py
@@ -3,9 +3,9 @@ from __future__ import unicode_literals
 
 from pathlib import Path
 from jsonschema import Draft4Validator
+import srsly
 
 from ...errors import Errors
-from ...util import read_json
 
 
 SCHEMAS = {}
@@ -25,7 +25,7 @@ def get_schema(name):
         schema_path = Path(__file__).parent / "{}.json".format(name)
         if not schema_path.exists():
             raise ValueError(Errors.E104.format(name=name))
-        schema = read_json(schema_path)
+        schema = srsly.read_json(schema_path)
         # TODO: replace with (stable) Draft6Validator, if available
         validator = Draft4Validator(schema)
         validator.check_schema(schema)
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 9dec5d4bd..8d322e32d 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -7,6 +7,7 @@ import tqdm
 from thinc.neural._classes.model import Model
 from timeit import default_timer as timer
 import shutil
+import srsly
 from wasabi import Printer
 
 from ._messages import Messages
@@ -111,7 +112,7 @@ def train(
         msg.fail(Messages.M051, dev_path, exits=1)
     if meta_path is not None and not meta_path.exists():
         msg.fail(Messages.M020, meta_path, exits=1)
-    meta = util.read_json(meta_path) if meta_path else {}
+    meta = srsly.read_json(meta_path) if meta_path else {}
     if not isinstance(meta, dict):
         msg.fail(Messages.M052, Messages.M053.format(meta_type=type(meta)), exits=1)
     if output_path.exists() and [p for p in output_path.iterdir() if p.is_dir()]:
@@ -226,7 +227,7 @@ def train(
                         end_time = timer()
                         cpu_wps = nwords / (end_time - start_time)
                 acc_loc = output_path / ("model%d" % i) / "accuracy.json"
-                util.write_json(acc_loc, scorer.scores)
+                srsly.write_json(acc_loc, scorer.scores)
 
                 # Update model meta.json
                 meta["lang"] = nlp.lang
@@ -242,7 +243,7 @@ def train(
                 meta.setdefault("name", "model%d" % i)
                 meta.setdefault("version", version)
                 meta_loc = output_path / ("model%d" % i) / "meta.json"
-                util.write_json(meta_loc, meta)
+                srsly.write_json(meta_loc, meta)
 
                 util.set_env_log(verbose)
 
@@ -293,17 +294,17 @@ def _collate_best_model(meta, output_path, components):
     for component, best_component_src in bests.items():
         shutil.rmtree(best_dest / component)
         shutil.copytree(best_component_src / component, best_dest / component)
-        accs = util.read_json(best_component_src / "accuracy.json")
+        accs = srsly.read_json(best_component_src / "accuracy.json")
         for metric in _get_metrics(component):
             meta["accuracy"][metric] = accs[metric]
-    util.write_json(best_dest / "meta.json", meta)
+    srsly.write_json(best_dest / "meta.json", meta)
 
 
 def _find_best(experiment_dir, component):
     accuracies = []
     for epoch_model in experiment_dir.iterdir():
         if epoch_model.is_dir() and epoch_model.parts[-1] != "model-final":
-            accs = util.read_json(epoch_model / "accuracy.json")
+            accs = srsly.read_json(epoch_model / "accuracy.json")
             scores = [accs.get(metric, 0.0) for metric in _get_metrics(component)]
             accuracies.append((scores, epoch_model))
     if accuracies:
diff --git a/spacy/cli/ud/ud_run_test.py b/spacy/cli/ud/ud_run_test.py
index f36df2f80..e3771fa92 100644
--- a/spacy/cli/ud/ud_run_test.py
+++ b/spacy/cli/ud/ud_run_test.py
@@ -9,7 +9,7 @@ import tqdm
 from pathlib import Path
 import re
 import sys
-import json
+import srsly
 
 import spacy
 import spacy.util
@@ -44,7 +44,7 @@ from ...lang import ru
 # Data reading #
 ################
 
-space_re = re.compile("\s+")
+space_re = re.compile(r"\s+")
 
 
 def split_text(text):
@@ -332,8 +332,7 @@ def main(test_data_dir, experiment_dir, corpus):
                 / corpus
                 / "{section}-accuracy.json".format(section=section)
             )
-            with open(acc_path, "w") as file_:
-                file_.write(json.dumps(accuracy, indent=2))
+            srsly.write_json(acc_path, accuracy)
 
 
 if __name__ == "__main__":
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index caeaf5ca9..4b5581972 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -5,11 +5,12 @@ import pkg_resources
 from pathlib import Path
 import sys
 import requests
+import srsly
 from wasabi import Printer
 
 from ._messages import Messages
 from ..compat import path2str
-from ..util import get_data_path, read_json
+from ..util import get_data_path
 from .. import about
 
 
@@ -84,7 +85,7 @@ def get_model_links(compat):
             meta_path = Path(model) / "meta.json"
             if not meta_path.exists():
                 continue
-            meta = read_json(meta_path)
+            meta = srsly.read_json(meta_path)
             link = model.parts[-1]
             name = meta["lang"] + "_" + meta["name"]
             links[link] = {
diff --git a/spacy/compat.py b/spacy/compat.py
index f00e2c417..c1869b85f 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 import os
 import sys
-import ujson
 import itertools
 
 from thinc.neural.util import copy_array
@@ -54,9 +53,6 @@ if is_python2:
     unicode_ = unicode  # noqa: F821
     basestring_ = basestring  # noqa: F821
     input_ = raw_input  # noqa: F821
-    json_dumps = lambda data, indent=2: ujson.dumps(
-        data, indent=indent, escape_forward_slashes=False
-    ).decode("utf8")
     path2str = lambda path: str(path).decode("utf8")
 
 elif is_python3:
@@ -64,9 +60,6 @@ elif is_python3:
     unicode_ = str
     basestring_ = str
     input_ = input
-    json_dumps = lambda data, indent=2: ujson.dumps(
-        data, indent=indent, escape_forward_slashes=False
-    )
     path2str = lambda path: str(path)
 
 
diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 26ff9753a..9c0c00652 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -10,10 +10,7 @@ import numpy
 import tempfile
 import shutil
 from pathlib import Path
-import msgpack
-import json
-
-import ujson
+import srsly
 
 from . import _align
 from .syntax import nonproj
@@ -21,7 +18,6 @@ from .tokens import Doc
 from .errors import Errors
 from . import util
 from .util import minibatch, itershuffle
-from .compat import json_dumps
 
 from libc.stdio cimport FILE, fopen, fclose, fread, fwrite, feof, fseek
 
@@ -123,12 +119,11 @@ class GoldCorpus(object):
             directory.mkdir()
         n = 0
         for i, doc_tuple in enumerate(doc_tuples):
-            with open(directory / '{}.msg'.format(i), 'wb') as file_:
-                msgpack.dump([doc_tuple], file_, use_bin_type=True)
+            srsly.write_msgpack(directory / '{}.msg'.format(i), [doc_tuple])
             n += len(doc_tuple[1])
             if limit and n >= limit:
                 break
-    
+
     @staticmethod
     def walk_corpus(path):
         path = util.ensure_path(path)
@@ -157,8 +152,7 @@ class GoldCorpus(object):
             if loc.parts[-1].endswith('json'):
                 gold_tuples = read_json_file(loc)
             elif loc.parts[-1].endswith('msg'):
-                with loc.open('rb') as file_:
-                    gold_tuples = msgpack.load(file_, raw=False)
+                gold_tuples = srsly.read_msgpack(loc)
             else:
                 msg = "Cannot read from file: %s. Supported formats: .json, .msg"
                 raise ValueError(msg % loc)
@@ -378,7 +372,7 @@ def _json_iterate(loc):
             if square_depth == 1 and curly_depth == 0:
                 py_str = py_raw[start : i+1].decode('utf8')
                 try:
-                    yield json.loads(py_str)
+                    yield srsly.json_loads(py_str)
                 except Exception:
                     print(py_str)
                     raise
diff --git a/spacy/language.py b/spacy/language.py
index f8afe84f7..4c3bfd5c8 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -2,7 +2,6 @@
 from __future__ import absolute_import, unicode_literals
 
 import random
-import ujson
 import itertools
 import weakref
 import functools
@@ -10,6 +9,7 @@ from collections import OrderedDict
 from contextlib import contextmanager
 from copy import copy
 from thinc.neural import Model
+import srsly
 
 from .tokenizer import Tokenizer
 from .vocab import Vocab
@@ -18,7 +18,7 @@ from .pipeline import DependencyParser, Tensorizer, Tagger, EntityRecognizer
 from .pipeline import SimilarityHook, TextCategorizer, SentenceSegmenter
 from .pipeline import merge_noun_chunks, merge_entities, merge_subtokens
 from .pipeline import EntityRuler
-from .compat import json_dumps, izip, basestring_
+from .compat import izip, basestring_
 from .gold import GoldParse
 from .scorer import Scorer
 from ._ml import link_vectors_to_models, create_default_optimizer
@@ -640,7 +640,7 @@ class Language(object):
         serializers = OrderedDict(
             (
                 ("tokenizer", lambda p: self.tokenizer.to_disk(p, vocab=False)),
-                ("meta.json", lambda p: p.open("w").write(json_dumps(self.meta))),
+                ("meta.json", lambda p: p.open("w").write(srsly.json_dumps(self.meta))),
             )
         )
         for name, proc in self.pipeline:
@@ -671,7 +671,7 @@ class Language(object):
         path = util.ensure_path(path)
         deserializers = OrderedDict(
             (
-                ("meta.json", lambda p: self.meta.update(util.read_json(p))),
+                ("meta.json", lambda p: self.meta.update(srsly.read_json(p))),
                 (
                     "vocab",
                     lambda p: (
@@ -705,7 +705,7 @@ class Language(object):
             (
                 ("vocab", lambda: self.vocab.to_bytes()),
                 ("tokenizer", lambda: self.tokenizer.to_bytes(vocab=False)),
-                ("meta", lambda: json_dumps(self.meta)),
+                ("meta", lambda: srsly.json_dumps(self.meta)),
             )
         )
         for i, (name, proc) in enumerate(self.pipeline):
@@ -725,7 +725,7 @@ class Language(object):
         """
         deserializers = OrderedDict(
             (
-                ("meta", lambda b: self.meta.update(ujson.loads(b))),
+                ("meta", lambda b: self.meta.update(srsly.json_loads(b))),
                 (
                     "vocab",
                     lambda b: (
diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 3a09af644..c3b8f5fae 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -7,10 +7,7 @@ import numpy
 cimport numpy as np
 import cytoolz
 from collections import OrderedDict, defaultdict
-import ujson
-
-from .util import msgpack
-from .util import msgpack_numpy
+import srsly
 
 from thinc.api import chain
 from thinc.v2v import Affine, Maxout, Softmax
@@ -27,7 +24,6 @@ from .syntax.arc_eager cimport ArcEager
 from .morphology cimport Morphology
 from .vocab cimport Vocab
 from .syntax import nonproj
-from .compat import json_dumps
 from .matcher import Matcher
 
 from .matcher import Matcher, PhraseMatcher
@@ -38,7 +34,7 @@ from ._ml import Tok2Vec, build_text_classifier, build_tagger_model
 from ._ml import link_vectors_to_models, zero_init, flatten
 from ._ml import create_default_optimizer
 from .errors import Errors, TempErrors
-from .compat import json_dumps, basestring_
+from .compat import basestring_
 from . import util
 
 
@@ -235,7 +231,7 @@ class EntityRuler(object):
         **kwargs: Other config paramters, mostly for consistency.
         RETURNS (EntityRuler): The loaded entity ruler.
         """
-        patterns = msgpack.loads(patterns_bytes, raw=False)
+        patterns = srsly.msgpack_loads(patterns_bytes)
         self.add_patterns(patterns)
         return self
 
@@ -244,7 +240,7 @@ class EntityRuler(object):
 
         RETURNS (bytes): The serialized patterns.
         """
-        return msgpack.dumps(self.patterns, use_bin_type=True)
+        return srsly.msgpack_dumps(self.patterns)
 
     def from_disk(self, path, **kwargs):
         """Load the entity ruler from a file. Expects a file containing
@@ -256,7 +252,7 @@ class EntityRuler(object):
         """
         path = util.ensure_path(path)
         path = path.with_suffix('.jsonl')
-        patterns = util.read_jsonl(path)
+        patterns = srsly.read_jsonl(path)
         self.add_patterns(patterns)
         return self
 
@@ -270,8 +266,7 @@ class EntityRuler(object):
         """
         path = util.ensure_path(path)
         path = path.with_suffix('.jsonl')
-        data = [json_dumps(line, indent=0) for line in self.patterns]
-        path.open('w').write('\n'.join(data))
+        srsly.write_jsonl(path, self.patterns)
 
 
 class Pipe(object):
@@ -368,7 +363,7 @@ class Pipe(object):
     def to_bytes(self, **exclude):
         """Serialize the pipe to a bytestring."""
         serialize = OrderedDict()
-        serialize['cfg'] = lambda: json_dumps(self.cfg)
+        serialize['cfg'] = lambda: srsly.json_dumps(self.cfg)
         if self.model in (True, False, None):
             serialize['model'] = lambda: self.model
         else:
@@ -387,7 +382,7 @@ class Pipe(object):
             self.model.from_bytes(b)
 
         deserialize = OrderedDict((
-            ('cfg', lambda b: self.cfg.update(ujson.loads(b))),
+            ('cfg', lambda b: self.cfg.update(srsly.json_loads(b))),
             ('vocab', lambda b: self.vocab.from_bytes(b)),
             ('model', load_model),
         ))
@@ -397,7 +392,7 @@ class Pipe(object):
     def to_disk(self, path, **exclude):
         """Serialize the pipe to disk."""
         serialize = OrderedDict()
-        serialize['cfg'] = lambda p: p.open('w').write(json_dumps(self.cfg))
+        serialize['cfg'] = lambda p: srsly.write_json(p, self.cfg)
         serialize['vocab'] = lambda p: self.vocab.to_disk(p)
         if self.model not in (None, True, False):
             serialize['model'] = lambda p: p.open('wb').write(self.model.to_bytes())
@@ -424,8 +419,7 @@ class Pipe(object):
 
 def _load_cfg(path):
     if path.exists():
-        with path.open() as file_:
-            return ujson.load(file_)
+        return srsly.read_json(path)
     else:
         return {}
 
@@ -745,10 +739,9 @@ class Tagger(Pipe):
         else:
             serialize['model'] = self.model.to_bytes
         serialize['vocab'] = self.vocab.to_bytes
-        serialize['cfg'] = lambda: ujson.dumps(self.cfg)
+        serialize['cfg'] = lambda: srsly.json_dumps(self.cfg)
         tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
-        serialize['tag_map'] = lambda: msgpack.dumps(
-            tag_map, use_bin_type=True)
+        serialize['tag_map'] = lambda: srsly.msgpack_dumps(tag_map)
         return util.to_bytes(serialize, exclude)
 
     def from_bytes(self, bytes_data, **exclude):
@@ -766,7 +759,7 @@ class Tagger(Pipe):
             self.model.from_bytes(b)
 
         def load_tag_map(b):
-            tag_map = msgpack.loads(b, raw=False)
+            tag_map = srsly.msgpack_loads(b)
             self.vocab.morphology = Morphology(
                 self.vocab.strings, tag_map=tag_map,
                 lemmatizer=self.vocab.morphology.lemmatizer,
@@ -775,7 +768,7 @@ class Tagger(Pipe):
         deserialize = OrderedDict((
             ('vocab', lambda b: self.vocab.from_bytes(b)),
             ('tag_map', load_tag_map),
-            ('cfg', lambda b: self.cfg.update(ujson.loads(b))),
+            ('cfg', lambda b: self.cfg.update(srsly.json_loads(b))),
             ('model', lambda b: load_model(b)),
         ))
         util.from_bytes(bytes_data, deserialize, exclude)
@@ -785,10 +778,9 @@ class Tagger(Pipe):
         tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
         serialize = OrderedDict((
             ('vocab', lambda p: self.vocab.to_disk(p)),
-            ('tag_map', lambda p: p.open('wb').write(msgpack.dumps(
-                tag_map, use_bin_type=True))),
+            ('tag_map', lambda p: srsly.write_msgpack(p, tag_map)),
             ('model', lambda p: p.open('wb').write(self.model.to_bytes())),
-            ('cfg', lambda p: p.open('w').write(json_dumps(self.cfg)))
+            ('cfg', lambda p: srsly.write_json(p, self.cfg))
         ))
         util.to_disk(path, serialize, exclude)
 
@@ -803,8 +795,7 @@ class Tagger(Pipe):
                 self.model.from_bytes(file_.read())
 
         def load_tag_map(p):
-            with p.open('rb') as file_:
-                tag_map = msgpack.loads(file_.read(), raw=False)
+            tag_map = srsly.read_msgpack(p)
             self.vocab.morphology = Morphology(
                 self.vocab.strings, tag_map=tag_map,
                 lemmatizer=self.vocab.morphology.lemmatizer,
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index b54e3f155..2c8d5fcb4 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -7,12 +7,11 @@ from libc.string cimport memcpy
 from libcpp.set cimport set
 from libc.stdint cimport uint32_t
 from murmurhash.mrmr cimport hash64, hash32
-import ujson
+import srsly
 
 from .symbols import IDS as SYMBOLS_BY_STR
 from .symbols import NAMES as SYMBOLS_BY_INT
 from .typedefs cimport hash_t
-from .compat import json_dumps
 from .errors import Errors
 from . import util
 
@@ -197,8 +196,7 @@ cdef class StringStore:
         """
         path = util.ensure_path(path)
         strings = list(self)
-        with path.open('w') as file_:
-            file_.write(json_dumps(strings))
+        srsly.write_json(path, strings)
 
     def from_disk(self, path):
         """Loads state from a directory. Modifies the object in place and
@@ -209,8 +207,7 @@ cdef class StringStore:
         RETURNS (StringStore): The modified `StringStore` object.
         """
         path = util.ensure_path(path)
-        with path.open('r') as file_:
-            strings = ujson.load(file_)
+        strings = srsly.read_json(path)
         prev = list(self)
         self._reset_and_load(strings)
         for word in prev:
@@ -223,7 +220,7 @@ cdef class StringStore:
         **exclude: Named attributes to prevent from being serialized.
         RETURNS (bytes): The serialized form of the `StringStore` object.
         """
-        return json_dumps(list(self))
+        return srsly.json_dumps(list(self))
 
     def from_bytes(self, bytes_data, **exclude):
         """Load state from a binary string.
@@ -232,7 +229,7 @@ cdef class StringStore:
         **exclude: Named attributes to prevent from being loaded.
         RETURNS (StringStore): The `StringStore` object.
         """
-        strings = ujson.loads(bytes_data)
+        strings = srsly.json_loads(bytes_data)
         prev = list(self)
         self._reset_and_load(strings)
         for word in prev:
diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx
index cfaa8ddf0..9796193f6 100644
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/syntax/_parser_model.pyx
@@ -5,8 +5,6 @@
 from __future__ import unicode_literals, print_function
 
 from collections import OrderedDict
-import ujson
-import json
 import numpy
 cimport cython.parallel
 import cytoolz
@@ -29,7 +27,7 @@ cimport blis.cy
 
 from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
 from .._ml import link_vectors_to_models, create_default_optimizer
-from ..compat import json_dumps, copy_array
+from ..compat import copy_array
 from ..tokens.doc cimport Doc
 from ..gold cimport GoldParse
 from ..errors import Errors, TempErrors
@@ -119,7 +117,7 @@ cdef void predict_states(ActivationsC* A, StateC** states,
         VecVec.add_i(&A.scores[i*n.classes],
             W.hidden_bias, 1., n.classes)
 
-            
+
 cdef void sum_state_features(float* output,
         const float* cached, const int* token_ids, int B, int F, int O) nogil:
     cdef int idx, b, f, i
@@ -165,7 +163,7 @@ cdef void cpu_log_loss(float* d_scores,
         else:
             d_scores[i] = exp(scores[i]-max_) / Z
 
- 
+
 cdef int arg_max_if_gold(const weight_t* scores, const weight_t* costs,
         const int* is_valid, int n) nogil:
     # Find minimum cost
@@ -218,15 +216,15 @@ class ParserModel(Model):
 
     def begin_training(self, X, y=None):
         self.lower.begin_training(X, y=y)
-   
+
     @property
     def tok2vec(self):
         return self._layers[0]
-    
+
     @property
     def lower(self):
         return self._layers[1]
-    
+
     @property
     def upper(self):
         return self._layers[2]
@@ -405,4 +403,3 @@ cdef class precompute_hiddens:
             else:
                 return self.ops.backprop_maxout(d_best, mask, self.nP)
         return state_vector, backprop_nonlinearity
-
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 0663c1289..186c5c16c 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -5,8 +5,6 @@
 from __future__ import unicode_literals, print_function
 
 from collections import OrderedDict
-import ujson
-import json
 import numpy
 cimport cython.parallel
 import cytoolz
@@ -27,6 +25,7 @@ from thinc.misc import LayerNorm
 from thinc.neural.ops import CupyOps
 from thinc.neural.util import get_array_module
 from thinc.linalg cimport Vec, VecVec
+import srsly
 
 from ._parser_model cimport resize_activations, predict_states, arg_max_if_valid
 from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
@@ -34,7 +33,7 @@ from ._parser_model cimport get_c_weights, get_c_sizes
 from ._parser_model import ParserModel
 from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
 from .._ml import link_vectors_to_models, create_default_optimizer
-from ..compat import json_dumps, copy_array
+from ..compat import copy_array
 from ..tokens.doc cimport Doc
 from ..gold cimport GoldParse
 from ..errors import Errors, TempErrors
@@ -539,7 +538,7 @@ cdef class Parser:
             'model': lambda p: (self.model.to_disk(p) if self.model is not True else True),
             'vocab': lambda p: self.vocab.to_disk(p),
             'moves': lambda p: self.moves.to_disk(p, strings=False),
-            'cfg': lambda p: p.open('w').write(json_dumps(self.cfg))
+            'cfg': lambda p: srsly.write_json(p, self.cfg)
         }
         util.to_disk(path, serializers, exclude)
 
@@ -547,7 +546,7 @@ cdef class Parser:
         deserializers = {
             'vocab': lambda p: self.vocab.from_disk(p),
             'moves': lambda p: self.moves.from_disk(p, strings=False),
-            'cfg': lambda p: self.cfg.update(util.read_json(p)),
+            'cfg': lambda p: self.cfg.update(srsly.read_json(p)),
             'model': lambda p: None
         }
         util.from_disk(path, deserializers, exclude)
@@ -568,7 +567,7 @@ cdef class Parser:
             ('model', lambda: (self.model.to_bytes() if self.model is not True else True)),
             ('vocab', lambda: self.vocab.to_bytes()),
             ('moves', lambda: self.moves.to_bytes(strings=False)),
-            ('cfg', lambda: json.dumps(self.cfg, indent=2, sort_keys=True))
+            ('cfg', lambda: srsly.json_dumps(self.cfg, indent=2, sort_keys=True))
         ))
         return util.to_bytes(serializers, exclude)
 
@@ -576,7 +575,7 @@ cdef class Parser:
         deserializers = OrderedDict((
             ('vocab', lambda b: self.vocab.from_bytes(b)),
             ('moves', lambda b: self.moves.from_bytes(b, strings=False)),
-            ('cfg', lambda b: self.cfg.update(json.loads(b))),
+            ('cfg', lambda b: self.cfg.update(srsly.json_loads(b))),
             ('model', lambda b: None)
         ))
         msg = util.from_bytes(bytes_data, deserializers, exclude)
diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx
index fc84fc23a..6d64a4fb4 100644
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@@ -7,14 +7,13 @@ from cymem.cymem cimport Pool
 from thinc.typedefs cimport weight_t
 from thinc.extra.search cimport Beam
 from collections import OrderedDict, Counter
-import ujson
+import srsly
 
 from . cimport _beam_utils
 from ..tokens.doc cimport Doc
 from ..structs cimport TokenC
 from .stateclass cimport StateClass
 from ..typedefs cimport attr_t
-from ..compat import json_dumps
 from ..errors import Errors
 from .. import util
 
@@ -153,13 +152,13 @@ cdef class TransitionSystem:
             # Make sure we take a copy here, and that we get a Counter
             self.labels[action] = Counter()
             # Have to be careful here: Sorting must be stable, or our model
-            # won't be read back in correctly. 
+            # won't be read back in correctly.
             sorted_labels = [(f, L) for L, f in label_freqs.items()]
             sorted_labels.sort()
             sorted_labels.reverse()
             for freq, label_str in sorted_labels:
                 self.add_action(int(action), label_str)
-                self.labels[action][label_str] = freq 
+                self.labels[action][label_str] = freq
 
     def add_action(self, int action, label_name):
         cdef attr_t label_id
@@ -204,7 +203,7 @@ cdef class TransitionSystem:
     def to_bytes(self, **exclude):
         transitions = []
         serializers = {
-            'moves': lambda: json_dumps(self.labels),
+            'moves': lambda: srsly.json_dumps(self.labels),
             'strings': lambda: self.strings.to_bytes()
         }
         return util.to_bytes(serializers, exclude)
@@ -212,7 +211,7 @@ cdef class TransitionSystem:
     def from_bytes(self, bytes_data, **exclude):
         labels = {}
         deserializers = {
-            'moves': lambda b: labels.update(ujson.loads(b)),
+            'moves': lambda b: labels.update(srsly.json_loads(b)),
             'strings': lambda b: self.strings.from_bytes(b)
         }
         msg = util.from_bytes(bytes_data, deserializers, exclude)
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index 80fbb5b1c..175480fe7 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -5,7 +5,7 @@ import numpy
 import tempfile
 import shutil
 import contextlib
-import msgpack
+import srsly
 from pathlib import Path
 from spacy.tokens import Doc, Span
 from spacy.attrs import POS, HEAD, DEP
@@ -100,8 +100,8 @@ def assert_docs_equal(doc1, doc2):
 
 def assert_packed_msg_equal(b1, b2):
     """Assert that two packed msgpack messages are equal."""
-    msg1 = msgpack.loads(b1, encoding="utf8")
-    msg2 = msgpack.loads(b2, encoding="utf8")
+    msg1 = srsly.msgpack_loads(b1)
+    msg2 = srsly.msgpack_loads(b2)
     assert sorted(msg1.keys()) == sorted(msg2.keys())
     for (k1, v1), (k2, v2) in zip(sorted(msg1.items()), sorted(msg2.items())):
         assert k1 == k2
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 683a3974f..5c3bf9c70 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -1,8 +1,8 @@
 from __future__ import unicode_literals
 
 import numpy
-import msgpack
 import gzip
+import srsly
 from thinc.neural.ops import NumpyOps
 
 from ..compat import copy_reg
@@ -74,11 +74,11 @@ class Binder(object):
             "lengths": numpy.asarray(lengths, dtype="int32").tobytes("C"),
             "strings": list(self.strings),
         }
-        return gzip.compress(msgpack.dumps(msg))
+        return gzip.compress(srsly.msgpack_dumps(msg))
 
     def from_bytes(self, string):
         """Deserialize the binder's annotations from a byte string."""
-        msg = msgpack.loads(gzip.decompress(string))
+        msg = srsly.msgpack_loads(gzip.decompress(string))
         self.attrs = msg["attrs"]
         self.strings = set(msg["strings"])
         lengths = numpy.fromstring(msg["lengths"], dtype="int32")
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index b845b4eb7..cd2428d79 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -10,8 +10,8 @@ import numpy
 import numpy.linalg
 import struct
 import dill
-import msgpack
 from thinc.neural.util import get_array_module, copy_array
+import srsly
 
 from libc.string cimport memcpy, memset
 from libc.math cimport sqrt
@@ -28,7 +28,7 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, CLUSTER
 from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB
 from ..attrs cimport ENT_TYPE, SENT_START
 from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
-from ..util import normalize_slice, is_json_serializable
+from ..util import normalize_slice
 from ..compat import is_config, copy_reg, pickle, basestring_
 from ..errors import deprecation_warning, models_warning, user_warning
 from ..errors import Errors, Warnings
@@ -807,8 +807,8 @@ cdef class Doc:
         }
         if 'user_data' not in exclude and self.user_data:
             user_data_keys, user_data_values = list(zip(*self.user_data.items()))
-            serializers['user_data_keys'] = lambda: msgpack.dumps(user_data_keys)
-            serializers['user_data_values'] = lambda: msgpack.dumps(user_data_values)
+            serializers['user_data_keys'] = lambda: srsly.msgpack_dumps(user_data_keys)
+            serializers['user_data_values'] = lambda: srsly.msgpack_dumps(user_data_values)
 
         return util.to_bytes(serializers, exclude)
 
@@ -836,9 +836,8 @@ cdef class Doc:
         # keys, we must have tuples. In values we just have to hope
         # users don't mind getting a list instead of a tuple.
         if 'user_data' not in exclude and 'user_data_keys' in msg:
-            user_data_keys = msgpack.loads(msg['user_data_keys'],
-                                           use_list=False, raw=False)
-            user_data_values = msgpack.loads(msg['user_data_values'], raw=False)
+            user_data_keys = srsly.msgpack_loads(msg['user_data_keys'], use_list=False)
+            user_data_values = srsly.msgpack_loads(msg['user_data_values'])
             for key, value in zip(user_data_keys, user_data_values):
                 self.user_data[key] = value
 
@@ -996,7 +995,7 @@ cdef class Doc:
                 if not self.has_extension(attr):
                     raise ValueError(Errors.E106.format(attr=attr, opts=underscore))
                 value = self._.get(attr)
-                if not is_json_serializable(value):
+                if not srsly.is_json_serializable(value):
                     raise ValueError(Errors.E107.format(attr=attr, value=repr(value)))
                 data['_'][attr] = value
         return data
diff --git a/spacy/util.py b/spacy/util.py
index d8c82da89..7e700be03 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals, print_function
 
 import os
-import ujson
 import pkg_resources
 import importlib
 import regex as re
@@ -15,18 +14,13 @@ import functools
 import cytoolz
 import itertools
 import numpy.random
-
+import srsly
 
 from .symbols import ORTH
 from .compat import cupy, CudaStream, path2str, basestring_, unicode_
-from .compat import import_file, json_dumps
+from .compat import import_file
 from .errors import Errors
 
-# Import these directly from Thinc, so that we're sure we always have the
-# same version.
-from thinc.neural._classes.model import msgpack  # noqa: F401
-from thinc.neural._classes.model import msgpack_numpy  # noqa: F401
-
 
 LANGUAGES = {}
 _data_path = Path(__file__).parent / "data"
@@ -185,7 +179,7 @@ def get_model_meta(path):
     meta_path = model_path / "meta.json"
     if not meta_path.is_file():
         raise IOError(Errors.E053.format(path=meta_path))
-    meta = read_json(meta_path)
+    meta = srsly.read_json(meta_path)
     for setting in ["lang", "name", "version"]:
         if setting not in meta or not meta[setting]:
             raise ValueError(Errors.E054.format(setting=setting))
@@ -529,74 +523,16 @@ def itershuffle(iterable, bufsize=1000):
         raise StopIteration
 
 
-def read_json(location):
-    """Open and load JSON from file.
-
-    location (Path): Path to JSON file.
-    RETURNS (dict): Loaded JSON content.
-    """
-    location = ensure_path(location)
-    with location.open("r", encoding="utf8") as f:
-        return ujson.load(f)
-
-
-def write_json(file_path, contents):
-    """Create a .json file and dump contents.
-
-    file_path (unicode / Path): The path to the output file.
-    contents: The JSON-serializable contents to output.
-    """
-    with Path(file_path).open("w", encoding="utf8") as f:
-        f.write(json_dumps(contents))
-
-
-def read_jsonl(file_path):
-    """Read a .jsonl file and yield its contents line by line.
-
-    file_path (unicode / Path): The file path.
-    YIELDS: The loaded JSON contents of each line.
-    """
-    with Path(file_path).open("r", encoding="utf8") as f:
-        for line in f:
-            try:  # hack to handle broken jsonl
-                yield ujson.loads(line.strip())
-            except ValueError:
-                continue
-
-
-def write_jsonl(file_path, lines):
-    """Create a .jsonl file and dump contents.
-
-    file_path (unicode / Path): The path to the output file.
-    lines (list): The JSON-serializable contents of each line.
-    """
-    data = [json_dumps(line) for line in lines]
-    with Path(file_path).open("w", encoding="utf-8") as f:
-        f.write("\n".join(data))
-
-
-def is_json_serializable(obj):
-    """Check if a Python object is JSON-serializable."""
-    if hasattr(obj, "__call__"):
-        # Check this separately here to prevent infinite recursions
-        return False
-    try:
-        ujson.dumps(obj)
-        return True
-    except TypeError:
-        return False
-
-
 def to_bytes(getters, exclude):
     serialized = OrderedDict()
     for key, getter in getters.items():
         if key not in exclude:
             serialized[key] = getter()
-    return msgpack.dumps(serialized, use_bin_type=True)
+    return srsly.msgpack_dumps(serialized)
 
 
 def from_bytes(bytes_data, setters, exclude):
-    msg = msgpack.loads(bytes_data, raw=False)
+    msg = srsly.msgpack_loads(bytes_data)
     for key, setter in setters.items():
         if key not in exclude and key in msg:
             setter(msg[key])
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index 3e3268bfa..911eff08e 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -4,9 +4,7 @@ from __future__ import unicode_literals
 import functools
 import numpy
 from collections import OrderedDict
-
-from .util import msgpack
-from .util import msgpack_numpy
+import srsly
 
 cimport numpy as np
 from thinc.neural.util import get_array_module
@@ -353,7 +351,7 @@ cdef class Vectors:
             save_array = lambda arr, file_: xp.save(file_, arr)
         serializers = OrderedDict((
             ('vectors', lambda p: save_array(self.data, p.open('wb'))),
-            ('key2row', lambda p: msgpack.dump(self.key2row, p.open('wb')))
+            ('key2row', lambda p: srsly.write_msgpack(p, self.key2row))
         ))
         return util.to_disk(path, serializers, exclude)
 
@@ -366,8 +364,7 @@ cdef class Vectors:
         """
         def load_key2row(path):
             if path.exists():
-                with path.open('rb') as file_:
-                    self.key2row = msgpack.load(file_)
+                self.key2row = srsly.read_msgpack(path)
             for key, row in self.key2row.items():
                 if self._unset.count(row):
                     self._unset.erase(self._unset.find(row))
@@ -401,9 +398,9 @@ cdef class Vectors:
             if hasattr(self.data, 'to_bytes'):
                 return self.data.to_bytes()
             else:
-                return msgpack.dumps(self.data)
+                return srsly.msgpack_dumps(self.data)
         serializers = OrderedDict((
-            ('key2row', lambda: msgpack.dumps(self.key2row)),
+            ('key2row', lambda: srsly.msgpack_dumps(self.key2row)),
             ('vectors', serialize_weights)
         ))
         return util.to_bytes(serializers, exclude)
@@ -419,10 +416,10 @@ cdef class Vectors:
             if hasattr(self.data, 'from_bytes'):
                 self.data.from_bytes()
             else:
-                self.data = msgpack.loads(b)
+                self.data = srsly.msgpack_loads(b)
 
         deserializers = OrderedDict((
-            ('key2row', lambda b: self.key2row.update(msgpack.loads(b))),
+            ('key2row', lambda b: self.key2row.update(srsly.msgpack_loads(b))),
             ('vectors', deserialize_weights)
         ))
         util.from_bytes(data, deserializers, exclude)
diff --git a/website/api/_top-level/_compat.jade b/website/api/_top-level/_compat.jade
index c9b023647..7de2f4102 100644
--- a/website/api/_top-level/_compat.jade
+++ b/website/api/_top-level/_compat.jade
@@ -9,10 +9,9 @@ p
     |  underscore, e.e #[code unicode_].
 
 +aside-code("Example").
-    from spacy.compat import unicode_, json_dumps
+    from spacy.compat import unicode_
 
     compatible_unicode = unicode_('hello world')
-    compatible_json = json_dumps({'key': 'value'})
 
 +table(["Name", "Python 2", "Python 3"])
     +row
@@ -35,11 +34,6 @@ p
         +cell #[code raw_input]
         +cell #[code input]
 
-    +row
-        +cell #[code compat.json_dumps]
-        +cell #[code ujson.dumps] with #[code .decode('utf8')]
-        +cell #[code ujson.dumps]
-
     +row
         +cell #[code compat.path2str]
         +cell #[code str(path)] with #[code .decode('utf8')]