diff --git a/pyproject.toml b/pyproject.toml
index fed528d4a..8a6ababf3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,11 @@
 [build-system]
-requires = ["setuptools"]
+requires = [
+    "setuptools",
+    "wheel",
+    "cython>=0.25",
+    "cymem>=2.0.2,<2.1.0",
+    "preshed>=3.0.2,<3.1.0",
+    "murmurhash>=0.28.0,<1.1.0",
+    "thinc==7.4.0.dev0",
+]
 build-backend = "setuptools.build_meta"
diff --git a/setup.cfg b/setup.cfg
index ac19f7bac..1429c77ac 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -59,7 +59,7 @@ install_requires =
 
 [options.extras_require]
 lookups =
-    spacy_lookups_data>=0.0.5<0.2.0
+    spacy_lookups_data>=0.0.5,<0.2.0
 cuda =
     cupy>=5.0.0b4
 cuda80 =
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 4b12052c3..0e12a594c 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -26,6 +26,7 @@ BLANK_MODEL_THRESHOLD = 2000
     lang=("model language", "positional", None, str),
     train_path=("location of JSON-formatted training data", "positional", None, Path),
     dev_path=("location of JSON-formatted development data", "positional", None, Path),
+    tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
     base_model=("name of model to update (optional)", "option", "b", str),
     pipeline=(
         "Comma-separated names of pipeline components to train",
@@ -41,6 +42,7 @@ def debug_data(
     lang,
     train_path,
     dev_path,
+    tag_map_path=None,
     base_model=None,
     pipeline="tagger,parser,ner",
     ignore_warnings=False,
@@ -60,6 +62,10 @@ def debug_data(
     if not dev_path.exists():
         msg.fail("Development data not found", dev_path, exits=1)
 
+    tag_map = {}
+    if tag_map_path is not None:
+        tag_map = srsly.read_json(tag_map_path)
+
     # Initialize the model and pipeline
     pipeline = [p.strip() for p in pipeline.split(",")]
     if base_model:
@@ -67,6 +73,8 @@ def debug_data(
     else:
         lang_cls = get_lang_class(lang)
         nlp = lang_cls()
+    # Update tag map with provided mapping
+    nlp.vocab.morphology.tag_map.update(tag_map)
 
     msg.divider("Data format validation")
 
@@ -344,7 +352,7 @@ def debug_data(
     if "tagger" in pipeline:
         msg.divider("Part-of-speech Tagging")
         labels = [label for label in gold_train_data["tags"]]
-        tag_map = nlp.Defaults.tag_map
+        tag_map = nlp.vocab.morphology.tag_map
         msg.info(
             "{} {} in data ({} {} in tag map)".format(
                 len(labels),
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 5af93a8f3..968a009f6 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -57,6 +57,7 @@ from .. import about
     textcat_multilabel=("Textcat classes aren't mutually exclusive (multilabel)", "flag", "TML", bool),
     textcat_arch=("Textcat model architecture", "option", "ta", str),
     textcat_positive_label=("Textcat positive label for binary classes with two labels", "option", "tpl", str),
+    tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
     verbose=("Display more information for debug", "flag", "VV", bool),
     debug=("Run data diagnostics before training", "flag", "D", bool),
     # fmt: on
@@ -95,6 +96,7 @@ def train(
     textcat_multilabel=False,
     textcat_arch="bow",
     textcat_positive_label=None,
+    tag_map_path=None,
     verbose=False,
     debug=False,
 ):
@@ -132,6 +134,9 @@ def train(
         output_path.mkdir()
         msg.good("Created output directory: {}".format(output_path))
 
+    tag_map = {}
+    if tag_map_path is not None:
+        tag_map = srsly.read_json(tag_map_path)
     # Take dropout and batch size as generators of values -- dropout
     # starts high and decays sharply, to force the optimizer to explore.
     # Batch size starts at 1 and grows, so that we make updates quickly
@@ -238,6 +243,9 @@ def train(
                 pipe_cfg = {}
             nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg))
 
+    # Update tag map with provided mapping
+    nlp.vocab.morphology.tag_map.update(tag_map)
+
     if vectors:
         msg.text("Loading vector from model '{}'".format(vectors))
         _load_vectors(nlp, vectors)
diff --git a/spacy/lang/lb/punctuation.py b/spacy/lang/lb/punctuation.py
index 1571e13d7..2a4587856 100644
--- a/spacy/lang/lb/punctuation.py
+++ b/spacy/lang/lb/punctuation.py
@@ -5,11 +5,13 @@ from ..char_classes import LIST_ELLIPSES, LIST_ICONS, ALPHA, ALPHA_LOWER, ALPHA_
 
 ELISION = " ' ’ ".strip().replace(" ", "")
 
+abbrev = ("d", "D")
+
 _infixes = (
     LIST_ELLIPSES
     + LIST_ICONS
     + [
-        r"(?<=[{a}][{el}])(?=[{a}])".format(a=ALPHA, el=ELISION),
+        r"(?<=^[{ab}][{el}])(?=[{a}])".format(ab=abbrev, a=ALPHA, el=ELISION),
         r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER),
         r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA),
         r"(?<=[{a}])[:<>=](?=[{a}])".format(a=ALPHA),
diff --git a/spacy/lang/lb/tokenizer_exceptions.py b/spacy/lang/lb/tokenizer_exceptions.py
index b32daa58c..1c9b2dde3 100644
--- a/spacy/lang/lb/tokenizer_exceptions.py
+++ b/spacy/lang/lb/tokenizer_exceptions.py
@@ -10,6 +10,8 @@ _exc = {}
 
 # translate / delete what is not necessary
 for exc_data in [
+    {ORTH: "’t", LEMMA: "et", NORM: "et"},
+    {ORTH: "’T", LEMMA: "et", NORM: "et"},
     {ORTH: "'t", LEMMA: "et", NORM: "et"},
     {ORTH: "'T", LEMMA: "et", NORM: "et"},
     {ORTH: "wgl.", LEMMA: "wannechgelift", NORM: "wannechgelift"},
diff --git a/spacy/language.py b/spacy/language.py
index 869fa09a7..16aa4967e 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -15,6 +15,7 @@ import multiprocessing as mp
 from itertools import chain, cycle
 
 from .tokenizer import Tokenizer
+from .tokens.underscore import Underscore
 from .vocab import Vocab
 from .lemmatizer import Lemmatizer
 from .lookups import Lookups
@@ -853,7 +854,10 @@ class Language(object):
         sender.send()
 
         procs = [
-            mp.Process(target=_apply_pipes, args=(self.make_doc, pipes, rch, sch))
+            mp.Process(
+                target=_apply_pipes,
+                args=(self.make_doc, pipes, rch, sch, Underscore.get_state()),
+            )
             for rch, sch in zip(texts_q, bytedocs_send_ch)
         ]
         for proc in procs:
@@ -1108,16 +1112,18 @@ def _pipe(docs, proc, kwargs):
         yield doc
 
 
-def _apply_pipes(make_doc, pipes, reciever, sender):
+def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state):
     """Worker for Language.pipe
 
     receiver (multiprocessing.Connection): Pipe to receive text. Usually
         created by `multiprocessing.Pipe()`
     sender (multiprocessing.Connection): Pipe to send doc. Usually created by
         `multiprocessing.Pipe()`
+    underscore_state (tuple): The data in the Underscore class of the parent
     """
+    Underscore.load_state(underscore_state)
     while True:
-        texts = reciever.get()
+        texts = receiver.get()
         docs = (make_doc(text) for text in texts)
         for pipe in pipes:
             docs = pipe(docs)
diff --git a/spacy/tests/doc/test_underscore.py b/spacy/tests/doc/test_underscore.py
index 2877bfeea..c1eff2c20 100644
--- a/spacy/tests/doc/test_underscore.py
+++ b/spacy/tests/doc/test_underscore.py
@@ -7,6 +7,15 @@ from spacy.tokens import Doc, Span, Token
 from spacy.tokens.underscore import Underscore
 
 
+@pytest.fixture(scope="function", autouse=True)
+def clean_underscore():
+    # reset the Underscore object after the test, to avoid having state copied across tests
+    yield
+    Underscore.doc_extensions = {}
+    Underscore.span_extensions = {}
+    Underscore.token_extensions = {}
+
+
 def test_create_doc_underscore():
     doc = Mock()
     doc.doc = doc
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index e4584d03a..a826a0a0e 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -6,6 +6,7 @@ import re
 from mock import Mock
 from spacy.matcher import Matcher, DependencyMatcher
 from spacy.tokens import Doc, Token
+from ..doc.test_underscore import clean_underscore
 
 
 @pytest.fixture
@@ -200,6 +201,7 @@ def test_matcher_any_token_operator(en_vocab):
     assert matches[2] == "test hello world"
 
 
+@pytest.mark.usefixtures("clean_underscore")
 def test_matcher_extension_attribute(en_vocab):
     matcher = Matcher(en_vocab)
     get_is_fruit = lambda token: token.text in ("apple", "banana")
diff --git a/spacy/tests/regression/test_issue4849.py b/spacy/tests/regression/test_issue4849.py
index 834219773..85d03fe9a 100644
--- a/spacy/tests/regression/test_issue4849.py
+++ b/spacy/tests/regression/test_issue4849.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 from spacy.lang.en import English
 from spacy.pipeline import EntityRuler
+from spacy.tokens.underscore import Underscore
 
 
 def test_issue4849():
diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py
new file mode 100644
index 000000000..9a3c10d61
--- /dev/null
+++ b/spacy/tests/regression/test_issue4903.py
@@ -0,0 +1,45 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+import spacy
+from spacy.lang.en import English
+from spacy.tokens import Span, Doc
+from spacy.tokens.underscore import Underscore
+
+
+class CustomPipe:
+    name = "my_pipe"
+
+    def __init__(self):
+        Span.set_extension("my_ext", getter=self._get_my_ext)
+        Doc.set_extension("my_ext", default=None)
+
+    def __call__(self, doc):
+        gathered_ext = []
+        for sent in doc.sents:
+            sent_ext = self._get_my_ext(sent)
+            sent._.set("my_ext", sent_ext)
+            gathered_ext.append(sent_ext)
+
+        doc._.set("my_ext", "\n".join(gathered_ext))
+
+        return doc
+
+    @staticmethod
+    def _get_my_ext(span):
+        return str(span.end)
+
+
+def test_issue4903():
+    # ensures that this runs correctly and doesn't hang or crash on Windows / macOS
+
+    nlp = English()
+    custom_component = CustomPipe()
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
+    nlp.add_pipe(custom_component, after="sentencizer")
+
+    text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
+    docs = list(nlp.pipe(text, n_process=2))
+    assert docs[0].text == "I like bananas."
+    assert docs[1].text == "Do you like them?"
+    assert docs[2].text == "No, I prefer wasabi."
diff --git a/spacy/tests/regression/test_issue4924.py b/spacy/tests/regression/test_issue4924.py
index 8aea2c3d5..0e45291a9 100644
--- a/spacy/tests/regression/test_issue4924.py
+++ b/spacy/tests/regression/test_issue4924.py
@@ -11,6 +11,6 @@ def nlp():
     return spacy.blank("en")
 
 
-def test_evaluate(nlp):
+def test_issue4924(nlp):
     docs_golds = [("", {})]
     nlp.evaluate(docs_golds)
diff --git a/spacy/tokens/underscore.py b/spacy/tokens/underscore.py
index b36fe9294..8dac8526e 100644
--- a/spacy/tokens/underscore.py
+++ b/spacy/tokens/underscore.py
@@ -79,6 +79,14 @@ class Underscore(object):
     def _get_key(self, name):
         return ("._.", name, self._start, self._end)
 
+    @classmethod
+    def get_state(cls):
+        return cls.token_extensions, cls.span_extensions, cls.doc_extensions
+
+    @classmethod
+    def load_state(cls, state):
+        cls.token_extensions, cls.span_extensions, cls.doc_extensions = state
+
 
 def get_ext_args(**kwargs):
     """Validate and convert arguments. Reused in Doc, Token and Span."""
diff --git a/website/docs/api/token.md b/website/docs/api/token.md
index 68402d1b4..c30c01c20 100644
--- a/website/docs/api/token.md
+++ b/website/docs/api/token.md
@@ -437,8 +437,8 @@ The L2 norm of the token's vector representation.
 | `norm_`                                      | unicode      | The token's norm, i.e. a normalized form of the token text. Usually set in the language's [tokenizer exceptions](/usage/adding-languages#tokenizer-exceptions) or [norm exceptions](/usage/adding-languages#norm-exceptions).                                 |
 | `lower`                                      | int          | Lowercase form of the token.                                                                                                                                                                                                                                  |
 | `lower_`                                     | unicode      | Lowercase form of the token text. Equivalent to `Token.text.lower()`.                                                                                                                                                                                         |
-| `shape`                                      | int          | Transform of the tokens's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
-| `shape_`                                     | unicode      | Transform of the tokens's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
+| `shape`                                      | int          | Transform of the tokens's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
+| `shape_`                                     | unicode      | Transform of the tokens's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
 | `prefix`                                     | int          | Hash value of a length-N substring from the start of the token. Defaults to `N=1`.                                                                                                                                                                            |
 | `prefix_`                                    | unicode      | A length-N substring from the start of the token. Defaults to `N=1`.                                                                                                                                                                                          |
 | `suffix`                                     | int          | Hash value of a length-N substring from the end of the token. Defaults to `N=3`.                                                                                                                                                                              |