From 1abeca90a62f164a2492b92fe7e15c9da51b7a88 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Thu, 7 Jan 2021 18:58:13 +0100
Subject: [PATCH 1/7] refer to _parser_internals.nonproj.DELIMITER

---
 spacy/cli/debug_data.py       | 3 ++-
 spacy/pipeline/dep_parser.pyx | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index d23cd3717..8eabf1f8f 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -12,6 +12,7 @@ from ..training import Example
 from ..training.initialize import get_sourced_components
 from ..schemas import ConfigSchemaTraining
 from ..pipeline._parser_internals import nonproj
+from ..pipeline._parser_internals.nonproj import DELIMITER
 from ..language import Language
 from ..util import registry, resolve_dot_names
 from .. import util
@@ -383,7 +384,7 @@ def debug_data(
         # rare labels in projectivized train
         rare_projectivized_labels = []
         for label in gold_train_data["deps"]:
-            if gold_train_data["deps"][label] <= DEP_LABEL_THRESHOLD and "||" in label:
+            if gold_train_data["deps"][label] <= DEP_LABEL_THRESHOLD and DELIMITER in label:
                 rare_projectivized_labels.append(
                     f"{label}: {gold_train_data['deps'][label]}"
                 )
diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index 1fe29eb9b..18c9fd25a 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -9,6 +9,7 @@ from ._parser_internals.arc_eager cimport ArcEager
 from .functions import merge_subtokens
 from ..language import Language
 from ._parser_internals import nonproj
+from ._parser_internals.nonproj import DELIMITER
 from ..scorer import Scorer
 from ..training import validate_examples
 
@@ -230,8 +231,8 @@ cdef class DependencyParser(Parser):
         for move in self.move_names:
             if "-" in move:
                 label = move.split("-")[1]
-                if "||" in label:
-                    label = label.split("||")[1]
+                if DELIMITER in label:
+                    label = label.split(DELIMITER)[1]
                 labels.add(label)
         return tuple(sorted(labels))
 

From dd12c6c8fda95c5346f8e8c74c30664bc37af6f0 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Thu, 7 Jan 2021 19:10:32 +0100
Subject: [PATCH 2/7] allow missing information in deps and heads annotations

---
 .../pipeline/_parser_internals/arc_eager.pyx  |  5 +--
 spacy/symbols.pxd                             |  1 +
 spacy/symbols.pyx                             |  1 +
 spacy/tests/doc/test_array.py                 |  8 +++-
 spacy/tests/parser/test_parse.py              | 38 ++++++++++++++++++-
 spacy/tokens/doc.pyx                          |  8 ++--
 spacy/tokens/token.pyx                        |  9 +++--
 spacy/training/example.pyx                    | 16 ++++++--
 8 files changed, 71 insertions(+), 15 deletions(-)

diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index 90a70b17b..463980051 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -195,8 +195,7 @@ cdef class ArcEagerGold:
     def __init__(self, ArcEager moves, StateClass stcls, Example example):
         self.mem = Pool()
         heads, labels = example.get_aligned_parse(projectivize=True)
-        labels = [label if label is not None else "" for label in labels]
-        labels = [example.x.vocab.strings.add(label) for label in labels]
+        labels = [example.x.vocab.strings.add(label) if label is not None else 0 for label in labels]
         sent_starts = example.get_aligned_sent_starts()
         assert len(heads) == len(labels) == len(sent_starts), (len(heads), len(labels), len(sent_starts))
         self.c = create_gold_state(self.mem, stcls.c, heads, labels, sent_starts)
@@ -783,7 +782,7 @@ cdef class ArcEager(TransitionSystem):
             for i in range(self.n_moves):
                 print(self.get_class_name(i), is_valid[i], costs[i])
             print("Gold sent starts?", is_sent_start(&gold_state, state.B(0)), is_sent_start(&gold_state, state.B(1)))
-            raise ValueError
+            raise ValueError("Could not find gold transition - see logs above.")
 
     def get_oracle_sequence_from_state(self, StateClass state, ArcEagerGold gold, _debug=None):
         cdef int i
diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd
index bc15d9b80..e28322cb5 100644
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@@ -467,3 +467,4 @@ cdef enum symbol_t:
 
     IDX
     _
+    MISSING_LABEL
diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx
index b0345c710..83c693c98 100644
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@@ -466,6 +466,7 @@ IDS = {
     "LAW": LAW,
     "MORPH": MORPH,
     "_": _,
+    "MISSING_LABEL": MISSING_LABEL,
 }
 
 
diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index ef54c581c..92b9620ff 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -98,10 +98,16 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
     doc_from_array = Doc(en_vocab, words=words)
     doc_from_array.from_array(["HEAD"], arr)
 
-    # head before start
+    # head before start is used to denote a missing value
     arr = doc.to_array(["HEAD"])
     arr[0] = -1
     doc_from_array = Doc(en_vocab, words=words)
+    doc_from_array.from_array(["HEAD"], arr)
+
+    # other negative values are invalid
+    arr = doc.to_array(["HEAD"])
+    arr[0] = -2
+    doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
 
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index e7728baaf..437cc760c 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -45,7 +45,17 @@ CONFLICTING_DATA = [
     ),
 ]
 
-eps = 0.01
+PARTIAL_DATA = [
+    (
+        "I like London.",
+        {
+            "heads": [1, 1, 1, None],
+            "deps": ["nsubj", "ROOT", "dobj", None],
+        },
+    ),
+]
+
+eps = 0.1
 
 
 def test_parser_root(en_vocab):
@@ -205,6 +215,32 @@ def test_parser_set_sent_starts(en_vocab):
             assert token.head in sent
 
 
+@pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"])
+def test_incomplete_data(pipe_name):
+    # Test that the parser works with incomplete information
+    nlp = English()
+    parser = nlp.add_pipe(pipe_name)
+    train_examples = []
+    for text, annotations in PARTIAL_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for dep in annotations.get("deps", []):
+            if dep is not None:
+                parser.add_label(dep)
+    optimizer = nlp.initialize(get_examples=lambda: train_examples)
+    for i in range(150):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+    assert losses[pipe_name] < 0.0001
+
+    # test the trained model
+    test_text = "I like securities."
+    doc = nlp(test_text)
+    assert doc[0].dep_ == "nsubj"
+    assert doc[2].dep_ == "dobj"
+    assert doc[0].head.i == 1
+    assert doc[2].head.i == 1
+
+
 @pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"])
 def test_overfitting_IO(pipe_name):
     # Simple test to try and quickly overfit the dependency parser (normal or beam)
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 9eedf214b..92344b6c8 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -266,7 +266,7 @@ cdef class Doc:
             self.push_back(lexeme, has_space)
 
         if heads is not None:
-            heads = [head - i for i, head in enumerate(heads)]
+            heads = [head - i if head is not None else None for i, head in enumerate(heads)]
         if deps and not heads:
             heads = [0] * len(deps)
         if sent_starts is not None:
@@ -328,7 +328,8 @@ cdef class Doc:
                 if annot is not heads and annot is not sent_starts and annot is not ent_iobs:
                     values.extend(annot)
         for value in values:
-            self.vocab.strings.add(value)
+            if value is not None:
+                self.vocab.strings.add(value)
 
         # if there are any other annotations, set them
         if headings:
@@ -1039,7 +1040,8 @@ cdef class Doc:
                 # cast index to signed int
                 abs_head_index = <int32_t>values[col * stride + i]
                 abs_head_index += i
-                if abs_head_index < 0 or abs_head_index >= length:
+                # abs_head_index -1 refers to missing value
+                if abs_head_index < -1 or abs_head_index >= length:
                     raise ValueError(
                         Errors.E190.format(
                             index=i,
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 2075c3cc8..c52f7da1b 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -639,13 +639,16 @@ cdef class Token:
         return any(ancestor.i == self.i for ancestor in descendant.ancestors)
 
     property head:
-        """The syntactic parent, or "governor", of this token.
+        """The syntactic parent, or "governor", of this token. 
 
         RETURNS (Token): The token predicted by the parser to be the head of
-            the current token.
+            the current token. Returns None if unknown.
         """
         def __get__(self):
-            return self.doc[self.i + self.c.head]
+            head_i = self.i + self.c.head
+            if head_i == -1:
+                return None
+            return self.doc[head_i]
 
         def __set__(self, Token new_head):
             # This function sets the head of self to new_head and updates the
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 21907e7dd..fc5cd8e26 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -11,6 +11,7 @@ from .alignment import Alignment
 from .iob_utils import biluo_to_iob, offsets_to_biluo_tags, doc_to_biluo_tags
 from .iob_utils import biluo_tags_to_spans
 from ..errors import Errors, Warnings
+from ..symbols import MISSING_LABEL
 from ..pipeline._parser_internals import nonproj
 from ..util import logger
 
@@ -179,14 +180,18 @@ cdef class Example:
         gold_to_cand = self.alignment.y2x
         aligned_heads = [None] * self.x.length
         aligned_deps = [None] * self.x.length
-        heads = [token.head.i for token in self.y]
+        heads = [token.head.i if token.head is not None else -1 for token in self.y]
         deps = [token.dep_ for token in self.y]
         if projectivize:
-            heads, deps = nonproj.projectivize(heads, deps)
+            proj_heads, proj_deps = nonproj.projectivize(heads, deps)
+            # don't touch the missing data
+            heads = [h if heads[i] != -1 else -1 for i, h in enumerate(proj_heads)]
+            MISSING = self.x.vocab.strings[MISSING_LABEL]
+            deps = [d if deps[i] != MISSING else MISSING for i, d in enumerate(proj_deps)]
         for cand_i in range(self.x.length):
             if cand_to_gold.lengths[cand_i] == 1:
                 gold_i = cand_to_gold[cand_i].dataXd[0, 0]
-                if gold_to_cand.lengths[heads[gold_i]] == 1:
+                if heads[gold_i] != -1 and gold_to_cand.lengths[heads[gold_i]] == 1:
                     aligned_heads[cand_i] = int(gold_to_cand[heads[gold_i]].dataXd[0, 0])
                     aligned_deps[cand_i] = deps[gold_i]
         return aligned_heads, aligned_deps
@@ -329,7 +334,10 @@ def _annot2array(vocab, tok_annot, doc_annot):
             pass
         elif key == "HEAD":
             attrs.append(key)
-            values.append([h-i for i, h in enumerate(value)])
+            values.append([h-i if h is not None else -(i+1) for i, h in enumerate(value)])
+        elif key == "DEP":
+            attrs.append(key)
+            values.append([vocab.strings.add(h) if h is not None else MISSING_LABEL for h in value])
         elif key == "SENT_START":
             attrs.append(key)
             values.append(value)

From a581d82f33742fbff264d1ded3b55c7e65d00abb Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Tue, 12 Jan 2021 17:17:06 +0100
Subject: [PATCH 3/7] introduce token.has_head and refer to MISSING_DEP_ (WIP)

---
 spacy/symbols.pxd                        |  1 -
 spacy/symbols.pyx                        |  1 -
 spacy/tests/doc/test_array.py            |  8 +-------
 spacy/tests/doc/test_retokenize_merge.py |  9 ++++++---
 spacy/tests/doc/test_retokenize_split.py |  3 ++-
 spacy/tests/doc/test_token_api.py        | 22 ++++++++++++++++++++++
 spacy/tests/parser/test_nonproj.py       |  2 +-
 spacy/tests/training/test_new_example.py | 19 +++++++++++++++++++
 spacy/tests/training/test_training.py    |  3 ++-
 spacy/tokens/doc.pyx                     |  8 +++++---
 spacy/tokens/token.pyx                   | 22 +++++++++++++++++-----
 spacy/training/example.pyx               | 18 ++++++++----------
 12 files changed, 83 insertions(+), 33 deletions(-)

diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd
index e28322cb5..bc15d9b80 100644
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@@ -467,4 +467,3 @@ cdef enum symbol_t:
 
     IDX
     _
-    MISSING_LABEL
diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx
index 83c693c98..b0345c710 100644
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@@ -466,7 +466,6 @@ IDS = {
     "LAW": LAW,
     "MORPH": MORPH,
     "_": _,
-    "MISSING_LABEL": MISSING_LABEL,
 }
 
 
diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index 92b9620ff..ef54c581c 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -98,16 +98,10 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
     doc_from_array = Doc(en_vocab, words=words)
     doc_from_array.from_array(["HEAD"], arr)
 
-    # head before start is used to denote a missing value
+    # head before start
     arr = doc.to_array(["HEAD"])
     arr[0] = -1
     doc_from_array = Doc(en_vocab, words=words)
-    doc_from_array.from_array(["HEAD"], arr)
-
-    # other negative values are invalid
-    arr = doc.to_array(["HEAD"])
-    arr[0] = -2
-    doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
 
diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py
index 60cc66d66..48cd33890 100644
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@@ -89,8 +89,9 @@ def test_doc_retokenize_lex_attrs(en_tokenizer):
 def test_doc_retokenize_spans_merge_tokens(en_tokenizer):
     text = "Los Angeles start."
     heads = [1, 2, 2, 2]
+    deps = ["dep"] * len(heads)
     tokens = en_tokenizer(text)
-    doc = Doc(tokens.vocab, words=[t.text for t in tokens], heads=heads)
+    doc = Doc(tokens.vocab, words=[t.text for t in tokens], heads=heads, deps=deps)
     assert len(doc) == 4
     assert doc[0].head.text == "Angeles"
     assert doc[1].head.text == "start"
@@ -145,7 +146,8 @@ def test_doc_retokenize_spans_merge_tokens_default_attrs(en_vocab):
 def test_doc_retokenize_spans_merge_heads(en_vocab):
     words = ["I", "found", "a", "pilates", "class", "near", "work", "."]
     heads = [1, 1, 4, 6, 1, 4, 5, 1]
-    doc = Doc(en_vocab, words=words, heads=heads)
+    deps = ["dep"] * len(heads)
+    doc = Doc(en_vocab, words=words, heads=heads, deps=deps)
     assert len(doc) == 8
     with doc.retokenize() as retokenizer:
         attrs = {"tag": doc[4].tag_, "lemma": "pilates class", "ent_type": "O"}
@@ -177,8 +179,9 @@ def test_doc_retokenize_spans_merge_non_disjoint(en_tokenizer):
 def test_doc_retokenize_span_np_merges(en_tokenizer):
     text = "displaCy is a parse tool built with Javascript"
     heads = [1, 1, 4, 4, 1, 4, 5, 6]
+    deps = ["dep"] * len(heads)
     tokens = en_tokenizer(text)
-    doc = Doc(tokens.vocab, words=[t.text for t in tokens], heads=heads)
+    doc = Doc(tokens.vocab, words=[t.text for t in tokens], heads=heads, deps=deps)
     assert doc[4].head.i == 1
     with doc.retokenize() as retokenizer:
         attrs = {"tag": "NP", "lemma": "tool", "ent_type": "O"}
diff --git a/spacy/tests/doc/test_retokenize_split.py b/spacy/tests/doc/test_retokenize_split.py
index 21c3ffd4b..6bfd508bc 100644
--- a/spacy/tests/doc/test_retokenize_split.py
+++ b/spacy/tests/doc/test_retokenize_split.py
@@ -6,7 +6,8 @@ from spacy.tokens import Doc, Token
 def test_doc_retokenize_split(en_vocab):
     words = ["LosAngeles", "start", "."]
     heads = [1, 2, 2]
-    doc = Doc(en_vocab, words=words, heads=heads)
+    deps = ["dep"] * len(heads)
+    doc = Doc(en_vocab, words=words, heads=heads, deps=deps)
     assert len(doc) == 3
     assert len(str(doc)) == 19
     assert doc[0].head.text == "start"
diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py
index 3c5c063bd..4587f5601 100644
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@@ -4,6 +4,8 @@ from spacy.attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STO
 from spacy.symbols import VERB
 from spacy.vocab import Vocab
 from spacy.tokens import Doc
+from spacy.tokens.token import MISSING_DEP_
+from spacy.training import Example
 
 
 @pytest.fixture
@@ -250,3 +252,23 @@ def test_token_api_non_conjuncts(en_vocab):
     doc = Doc(en_vocab, words=words, heads=heads, deps=deps)
     assert [w.text for w in doc[0].conjuncts] == []
     assert [w.text for w in doc[1].conjuncts] == []
+
+
+def test_missing_head_dep(en_vocab):
+    heads = [1, 1, 1, 1, 2, None]
+    deps = ["nsubj", "ROOT", "dobj", "cc", "conj", None]
+    words = ["I", "like", "London", "and", "Berlin", "."]
+    doc = Doc(en_vocab, words=words, heads=heads, deps=deps)
+    pred_has_heads =  [t.has_head() for t in doc]
+    pred_deps =  [t.dep_ for t in doc]
+    assert pred_has_heads == [True, True, True, True, True, False]
+    assert pred_deps == ["nsubj", "ROOT", "dobj", "cc", "conj", MISSING_DEP_]
+    example = Example.from_dict(doc, {"heads": heads, "deps": deps})
+    ref_heads = [t.head.i for t in example.reference]
+    ref_deps = [t.dep_ for t in example.reference]
+    ref_has_heads = [t.has_head() for t in example.reference]
+    assert ref_deps == ["nsubj", "ROOT", "dobj", "cc", "conj", MISSING_DEP_]
+    assert ref_has_heads == [True, True, True, True, True, False]
+    aligned_heads, aligned_deps = example.get_aligned_parse(projectivize=True)
+    assert aligned_heads[5] == ref_heads[5]
+    assert aligned_deps[5] == MISSING_DEP_
\ No newline at end of file
diff --git a/spacy/tests/parser/test_nonproj.py b/spacy/tests/parser/test_nonproj.py
index 544701a4c..3957e4d77 100644
--- a/spacy/tests/parser/test_nonproj.py
+++ b/spacy/tests/parser/test_nonproj.py
@@ -121,7 +121,7 @@ def test_parser_pseudoprojectivity(en_vocab):
     assert undeco_labels == ["det", "nsubj", "root", "det", "dobj", "aux",
                              "nsubj", "acl", "punct"]
     # if there are two potential new heads, the first one is chosen even if
-    # it"s wrong
+    # it's wrong
     proj_heads = [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1]
     deco_labels = ["advmod||aux", "root", "det", "aux", "advmod", "det",
                    "dobj", "det", "nmod", "aux", "nmod||dobj", "advmod",
diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py
index 06db86a12..01ed5b5b6 100644
--- a/spacy/tests/training/test_new_example.py
+++ b/spacy/tests/training/test_new_example.py
@@ -263,3 +263,22 @@ def test_Example_from_dict_sentences():
     annots = {"sent_starts": [1, -1, 0, 0, 0]}
     ex = Example.from_dict(predicted, annots)
     assert len(list(ex.reference.sents)) == 1
+
+
+def test_Example_from_dict_with_parse():
+    vocab = Vocab()
+    words = ["I", "like", "London", "and", "Berlin", "."]
+    deps = ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"]
+    heads = [1, 1, 1, 2, 2, 1]
+    annots_head_only = {"words": words, "heads": heads}
+    annots_head_dep = {"words": words, "heads": heads, "deps": deps}
+    predicted = Doc(vocab, words=words)
+
+    # when not providing deps, the head information is considered to be missing
+    # in this case, the token's heads refer to themselves
+    example_1 = Example.from_dict(predicted, annots_head_only)
+    assert [t.head.i for t in example_1.reference] == [0, 1, 2, 3, 4, 5]
+
+    # when providing deps, the head information is actually used
+    example_2 = Example.from_dict(predicted, annots_head_dep)
+    assert [t.head.i for t in example_2.reference] == heads
diff --git a/spacy/tests/training/test_training.py b/spacy/tests/training/test_training.py
index 2e83580b5..c7a85bf87 100644
--- a/spacy/tests/training/test_training.py
+++ b/spacy/tests/training/test_training.py
@@ -436,7 +436,8 @@ def test_gold_ner_missing_tags(en_tokenizer):
 def test_projectivize(en_tokenizer):
     doc = en_tokenizer("He pretty quickly walks away")
     heads = [3, 2, 3, 0, 2]
-    example = Example.from_dict(doc, {"heads": heads})
+    deps = ["dep"] * len(heads)
+    example = Example.from_dict(doc, {"heads": heads, "deps": deps})
     proj_heads, proj_labels = example.get_aligned_parse(projectivize=True)
     nonproj_heads, nonproj_labels = example.get_aligned_parse(projectivize=False)
     assert proj_heads == [3, 2, 3, 0, 3]
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 92344b6c8..fc14fb506 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -16,6 +16,7 @@ from thinc.util import copy_array
 import warnings
 
 from .span cimport Span
+from .token import MISSING_DEP_
 from .token cimport Token
 from ..lexeme cimport Lexeme, EMPTY_LEXEME
 from ..typedefs cimport attr_t, flags_t
@@ -266,7 +267,9 @@ cdef class Doc:
             self.push_back(lexeme, has_space)
 
         if heads is not None:
-            heads = [head - i if head is not None else None for i, head in enumerate(heads)]
+            heads = [head - i if head is not None else 0 for i, head in enumerate(heads)]
+        if deps is not None:
+            deps = [dep if dep is not None else MISSING_DEP_ for dep in deps]
         if deps and not heads:
             heads = [0] * len(deps)
         if sent_starts is not None:
@@ -1040,8 +1043,7 @@ cdef class Doc:
                 # cast index to signed int
                 abs_head_index = <int32_t>values[col * stride + i]
                 abs_head_index += i
-                # abs_head_index -1 refers to missing value
-                if abs_head_index < -1 or abs_head_index >= length:
+                if abs_head_index < 0 or abs_head_index >= length:
                     raise ValueError(
                         Errors.E190.format(
                             index=i,
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index c52f7da1b..a6f9a2a0c 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -22,6 +22,8 @@ from .. import parts_of_speech
 from ..errors import Errors, Warnings
 from .underscore import Underscore, get_ext_args
 
+MISSING_DEP_ = ""
+
 
 cdef class Token:
     """An individual token – i.e. a word, punctuation symbol, whitespace,
@@ -638,17 +640,27 @@ cdef class Token:
             return False
         return any(ancestor.i == self.i for ancestor in descendant.ancestors)
 
+
+    def has_head(self):
+        """Check whether the token has annotated head information.
+
+        RETURNS (bool): Whether the head annotation is valid or not.
+        """
+        return self.dep_ != MISSING_DEP_
+
+
     property head:
         """The syntactic parent, or "governor", of this token. 
+        If token.has_head() is `False`, this method will return itself. 
 
         RETURNS (Token): The token predicted by the parser to be the head of
-            the current token. Returns None if unknown.
+            the current token.
         """
         def __get__(self):
-            head_i = self.i + self.c.head
-            if head_i == -1:
-                return None
-            return self.doc[head_i]
+            if not self.has_head():
+                return self
+            else:
+                return self.doc[self.i + self.c.head]
 
         def __set__(self, Token new_head):
             # This function sets the head of self to new_head and updates the
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index fc5cd8e26..856719893 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -11,8 +11,8 @@ from .alignment import Alignment
 from .iob_utils import biluo_to_iob, offsets_to_biluo_tags, doc_to_biluo_tags
 from .iob_utils import biluo_tags_to_spans
 from ..errors import Errors, Warnings
-from ..symbols import MISSING_LABEL
 from ..pipeline._parser_internals import nonproj
+from ..tokens.token import MISSING_DEP_
 from ..util import logger
 
 
@@ -180,18 +180,15 @@ cdef class Example:
         gold_to_cand = self.alignment.y2x
         aligned_heads = [None] * self.x.length
         aligned_deps = [None] * self.x.length
-        heads = [token.head.i if token.head is not None else -1 for token in self.y]
+        has_heads = [token.has_head() for token in self.y]
+        heads = [token.head.i for token in self.y]
         deps = [token.dep_ for token in self.y]
         if projectivize:
-            proj_heads, proj_deps = nonproj.projectivize(heads, deps)
-            # don't touch the missing data
-            heads = [h if heads[i] != -1 else -1 for i, h in enumerate(proj_heads)]
-            MISSING = self.x.vocab.strings[MISSING_LABEL]
-            deps = [d if deps[i] != MISSING else MISSING for i, d in enumerate(proj_deps)]
+            heads, deps = nonproj.projectivize(heads, deps)
         for cand_i in range(self.x.length):
             if cand_to_gold.lengths[cand_i] == 1:
                 gold_i = cand_to_gold[cand_i].dataXd[0, 0]
-                if heads[gold_i] != -1 and gold_to_cand.lengths[heads[gold_i]] == 1:
+                if gold_to_cand.lengths[heads[gold_i]] == 1:
                     aligned_heads[cand_i] = int(gold_to_cand[heads[gold_i]].dataXd[0, 0])
                     aligned_deps[cand_i] = deps[gold_i]
         return aligned_heads, aligned_deps
@@ -334,10 +331,11 @@ def _annot2array(vocab, tok_annot, doc_annot):
             pass
         elif key == "HEAD":
             attrs.append(key)
-            values.append([h-i if h is not None else -(i+1) for i, h in enumerate(value)])
+            values.append([h-i if h is not None else 0 for i, h in enumerate(value)])
         elif key == "DEP":
             attrs.append(key)
-            values.append([vocab.strings.add(h) if h is not None else MISSING_LABEL for h in value])
+            value = [v if v is not None else MISSING_DEP_ for v in value]
+            values.append([vocab.strings.add(h) for h in value])
         elif key == "SENT_START":
             attrs.append(key)
             values.append(value)

From 5b598bd1d56c9a40748754174fbc63f66559ca2f Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Tue, 12 Jan 2021 17:28:41 +0100
Subject: [PATCH 4/7] formatting

---
 spacy/pipeline/_parser_internals/arc_eager.pyx | 4 +++-
 spacy/tests/doc/test_token_api.py              | 6 +++---
 spacy/tests/training/test_new_example.py       | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index 463980051..50b620b7a 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -9,6 +9,7 @@ from ...typedefs cimport hash_t, attr_t
 from ...strings cimport hash_string
 from ...structs cimport TokenC
 from ...tokens.doc cimport Doc, set_children_from_heads
+from ...tokens.token import MISSING_DEP_
 from ...training.example cimport Example
 from .stateclass cimport StateClass
 from ._state cimport StateC, ArcC
@@ -195,7 +196,8 @@ cdef class ArcEagerGold:
     def __init__(self, ArcEager moves, StateClass stcls, Example example):
         self.mem = Pool()
         heads, labels = example.get_aligned_parse(projectivize=True)
-        labels = [example.x.vocab.strings.add(label) if label is not None else 0 for label in labels]
+        labels = [label if label is not None else MISSING_DEP_ for label in labels]
+        labels = [example.x.vocab.strings.add(label) for label in labels]
         sent_starts = example.get_aligned_sent_starts()
         assert len(heads) == len(labels) == len(sent_starts), (len(heads), len(labels), len(sent_starts))
         self.c = create_gold_state(self.mem, stcls.c, heads, labels, sent_starts)
diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py
index 4587f5601..d3fb044ee 100644
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@@ -259,8 +259,8 @@ def test_missing_head_dep(en_vocab):
     deps = ["nsubj", "ROOT", "dobj", "cc", "conj", None]
     words = ["I", "like", "London", "and", "Berlin", "."]
     doc = Doc(en_vocab, words=words, heads=heads, deps=deps)
-    pred_has_heads =  [t.has_head() for t in doc]
-    pred_deps =  [t.dep_ for t in doc]
+    pred_has_heads = [t.has_head() for t in doc]
+    pred_deps = [t.dep_ for t in doc]
     assert pred_has_heads == [True, True, True, True, True, False]
     assert pred_deps == ["nsubj", "ROOT", "dobj", "cc", "conj", MISSING_DEP_]
     example = Example.from_dict(doc, {"heads": heads, "deps": deps})
@@ -271,4 +271,4 @@ def test_missing_head_dep(en_vocab):
     assert ref_has_heads == [True, True, True, True, True, False]
     aligned_heads, aligned_deps = example.get_aligned_parse(projectivize=True)
     assert aligned_heads[5] == ref_heads[5]
-    assert aligned_deps[5] == MISSING_DEP_
\ No newline at end of file
+    assert aligned_deps[5] == MISSING_DEP_
diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py
index 01ed5b5b6..6b6486b2b 100644
--- a/spacy/tests/training/test_new_example.py
+++ b/spacy/tests/training/test_new_example.py
@@ -265,7 +265,7 @@ def test_Example_from_dict_sentences():
     assert len(list(ex.reference.sents)) == 1
 
 
-def test_Example_from_dict_with_parse():
+def test_Example_missing_deps():
     vocab = Vocab()
     words = ["I", "like", "London", "and", "Berlin", "."]
     deps = ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"]

From 232e953b148e1dd9259a34bcbc64aec1d7786e08 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Tue, 12 Jan 2021 20:32:57 +0100
Subject: [PATCH 5/7] pytest.approx with absolute eps

---
 spacy/tests/parser/test_parse.py | 72 ++++++++++++++++----------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 437cc760c..5b68bbc37 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -360,25 +360,25 @@ def test_beam_overfitting_IO():
     head_scores = head_scores[0]
     label_scores = label_scores[0]
     # test label annotations: 0=nsubj, 2=dobj, 3=punct
-    assert label_scores[(0, "nsubj")] == pytest.approx(1.0, eps)
-    assert label_scores[(0, "dobj")] == pytest.approx(0.0, eps)
-    assert label_scores[(0, "punct")] == pytest.approx(0.0, eps)
-    assert label_scores[(2, "nsubj")] == pytest.approx(0.0, eps)
-    assert label_scores[(2, "dobj")] == pytest.approx(1.0, eps)
-    assert label_scores[(2, "punct")] == pytest.approx(0.0, eps)
-    assert label_scores[(3, "nsubj")] == pytest.approx(0.0, eps)
-    assert label_scores[(3, "dobj")] == pytest.approx(0.0, eps)
-    assert label_scores[(3, "punct")] == pytest.approx(1.0, eps)
+    assert label_scores[(0, "nsubj")] == pytest.approx(1.0, abs=eps)
+    assert label_scores[(0, "dobj")] == pytest.approx(0.0, abs=eps)
+    assert label_scores[(0, "punct")] == pytest.approx(0.0, abs=eps)
+    assert label_scores[(2, "nsubj")] == pytest.approx(0.0, abs=eps)
+    assert label_scores[(2, "dobj")] == pytest.approx(1.0, abs=eps)
+    assert label_scores[(2, "punct")] == pytest.approx(0.0, abs=eps)
+    assert label_scores[(3, "nsubj")] == pytest.approx(0.0, abs=eps)
+    assert label_scores[(3, "dobj")] == pytest.approx(0.0, abs=eps)
+    assert label_scores[(3, "punct")] == pytest.approx(1.0, abs=eps)
     # test head annotations: the root is token at index 1
-    assert head_scores[(0, 0)] == pytest.approx(0.0, eps)
-    assert head_scores[(0, 1)] == pytest.approx(1.0, eps)
-    assert head_scores[(0, 2)] == pytest.approx(0.0, eps)
-    assert head_scores[(2, 0)] == pytest.approx(0.0, eps)
-    assert head_scores[(2, 1)] == pytest.approx(1.0, eps)
-    assert head_scores[(2, 2)] == pytest.approx(0.0, eps)
-    assert head_scores[(3, 0)] == pytest.approx(0.0, eps)
-    assert head_scores[(3, 1)] == pytest.approx(1.0, eps)
-    assert head_scores[(3, 2)] == pytest.approx(0.0, eps)
+    assert head_scores[(0, 0)] == pytest.approx(0.0, abs=eps)
+    assert head_scores[(0, 1)] == pytest.approx(1.0, abs=eps)
+    assert head_scores[(0, 2)] == pytest.approx(0.0, abs=eps)
+    assert head_scores[(2, 0)] == pytest.approx(0.0, abs=eps)
+    assert head_scores[(2, 1)] == pytest.approx(1.0, abs=eps)
+    assert head_scores[(2, 2)] == pytest.approx(0.0, abs=eps)
+    assert head_scores[(3, 0)] == pytest.approx(0.0, abs=eps)
+    assert head_scores[(3, 1)] == pytest.approx(1.0, abs=eps)
+    assert head_scores[(3, 2)] == pytest.approx(0.0, abs=eps)
 
     # Also test the results are still the same after IO
     with make_tempdir() as tmp_dir:
@@ -392,21 +392,21 @@ def test_beam_overfitting_IO():
         head_scores2 = head_scores2[0]
         label_scores2 = label_scores2[0]
         # check the results again
-        assert label_scores2[(0, "nsubj")] == pytest.approx(1.0, eps)
-        assert label_scores2[(0, "dobj")] == pytest.approx(0.0, eps)
-        assert label_scores2[(0, "punct")] == pytest.approx(0.0, eps)
-        assert label_scores2[(2, "nsubj")] == pytest.approx(0.0, eps)
-        assert label_scores2[(2, "dobj")] == pytest.approx(1.0, eps)
-        assert label_scores2[(2, "punct")] == pytest.approx(0.0, eps)
-        assert label_scores2[(3, "nsubj")] == pytest.approx(0.0, eps)
-        assert label_scores2[(3, "dobj")] == pytest.approx(0.0, eps)
-        assert label_scores2[(3, "punct")] == pytest.approx(1.0, eps)
-        assert head_scores2[(0, 0)] == pytest.approx(0.0, eps)
-        assert head_scores2[(0, 1)] == pytest.approx(1.0, eps)
-        assert head_scores2[(0, 2)] == pytest.approx(0.0, eps)
-        assert head_scores2[(2, 0)] == pytest.approx(0.0, eps)
-        assert head_scores2[(2, 1)] == pytest.approx(1.0, eps)
-        assert head_scores2[(2, 2)] == pytest.approx(0.0, eps)
-        assert head_scores2[(3, 0)] == pytest.approx(0.0, eps)
-        assert head_scores2[(3, 1)] == pytest.approx(1.0, eps)
-        assert head_scores2[(3, 2)] == pytest.approx(0.0, eps)
+        assert label_scores2[(0, "nsubj")] == pytest.approx(1.0, abs=eps)
+        assert label_scores2[(0, "dobj")] == pytest.approx(0.0, abs=eps)
+        assert label_scores2[(0, "punct")] == pytest.approx(0.0, abs=eps)
+        assert label_scores2[(2, "nsubj")] == pytest.approx(0.0, abs=eps)
+        assert label_scores2[(2, "dobj")] == pytest.approx(1.0, abs=eps)
+        assert label_scores2[(2, "punct")] == pytest.approx(0.0, abs=eps)
+        assert label_scores2[(3, "nsubj")] == pytest.approx(0.0, abs=eps)
+        assert label_scores2[(3, "dobj")] == pytest.approx(0.0, abs=eps)
+        assert label_scores2[(3, "punct")] == pytest.approx(1.0, abs=eps)
+        assert head_scores2[(0, 0)] == pytest.approx(0.0, abs=eps)
+        assert head_scores2[(0, 1)] == pytest.approx(1.0, abs=eps)
+        assert head_scores2[(0, 2)] == pytest.approx(0.0, abs=eps)
+        assert head_scores2[(2, 0)] == pytest.approx(0.0, abs=eps)
+        assert head_scores2[(2, 1)] == pytest.approx(1.0, abs=eps)
+        assert head_scores2[(2, 2)] == pytest.approx(0.0, abs=eps)
+        assert head_scores2[(3, 0)] == pytest.approx(0.0, abs=eps)
+        assert head_scores2[(3, 1)] == pytest.approx(1.0, abs=eps)
+        assert head_scores2[(3, 2)] == pytest.approx(0.0, abs=eps)

From 86a4e316b873a2752d06f3bc222a97074f69e716 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 13 Jan 2021 13:47:25 +0100
Subject: [PATCH 6/7] fix sent_starts

---
 spacy/tests/doc/test_token_api.py        |  4 ++++
 spacy/tests/training/test_new_example.py | 21 +++++++++++++++++++++
 spacy/tokens/doc.pyx                     |  2 +-
 spacy/tokens/token.pxd                   |  7 +++++++
 spacy/training/example.pyx               |  5 ++++-
 5 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py
index d3fb044ee..0795080a5 100644
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@@ -261,14 +261,18 @@ def test_missing_head_dep(en_vocab):
     doc = Doc(en_vocab, words=words, heads=heads, deps=deps)
     pred_has_heads = [t.has_head() for t in doc]
     pred_deps = [t.dep_ for t in doc]
+    pred_sent_starts = [t.is_sent_start for t in doc]
     assert pred_has_heads == [True, True, True, True, True, False]
     assert pred_deps == ["nsubj", "ROOT", "dobj", "cc", "conj", MISSING_DEP_]
+    assert pred_sent_starts == [True, False, False, False, False, False]
     example = Example.from_dict(doc, {"heads": heads, "deps": deps})
     ref_heads = [t.head.i for t in example.reference]
     ref_deps = [t.dep_ for t in example.reference]
     ref_has_heads = [t.has_head() for t in example.reference]
+    ref_sent_starts = [t.is_sent_start for t in example.reference]
     assert ref_deps == ["nsubj", "ROOT", "dobj", "cc", "conj", MISSING_DEP_]
     assert ref_has_heads == [True, True, True, True, True, False]
+    assert ref_sent_starts == [True, False, False, False, False, False]
     aligned_heads, aligned_deps = example.get_aligned_parse(projectivize=True)
     assert aligned_heads[5] == ref_heads[5]
     assert aligned_deps[5] == MISSING_DEP_
diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py
index 6b6486b2b..0a3184071 100644
--- a/spacy/tests/training/test_new_example.py
+++ b/spacy/tests/training/test_new_example.py
@@ -282,3 +282,24 @@ def test_Example_missing_deps():
     # when providing deps, the head information is actually used
     example_2 = Example.from_dict(predicted, annots_head_dep)
     assert [t.head.i for t in example_2.reference] == heads
+
+
+def test_Example_missing_heads():
+    vocab = Vocab()
+    words = ["I", "like", "London", "and", "Berlin", "."]
+    deps = ["nsubj", "ROOT", "dobj", None, "conj", "punct"]
+    heads = [1, 1, 1, None, 2, 1]
+    annots = {"words": words, "heads": heads, "deps": deps}
+    predicted = Doc(vocab, words=words)
+
+    example = Example.from_dict(predicted, annots)
+    parsed_heads = [t.head.i for t in example.reference]
+    assert parsed_heads[0] == heads[0]
+    assert parsed_heads[1] == heads[1]
+    assert parsed_heads[2] == heads[2]
+    assert parsed_heads[4] == heads[4]
+    assert parsed_heads[5] == heads[5]
+    assert [t.has_head() for t in example.reference] == [True, True, True, False, True, True]
+
+    # Ensure that the missing head doesn't create an artificial new sentence start
+    assert example.get_aligned_sent_starts() == [True, False, False, False, False, False]
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index fc14fb506..221e78b2e 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -1540,7 +1540,7 @@ cdef int set_children_from_heads(TokenC* tokens, int start, int end) except -1:
     for i in range(start, end):
         tokens[i].sent_start = -1
     for i in range(start, end):
-        if tokens[i].head == 0:
+        if tokens[i].head == 0 and not Token.missing_head(&tokens[i]):
             tokens[tokens[i].l_edge].sent_start = 1
 
 
diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd
index 45c906a82..9006c874c 100644
--- a/spacy/tokens/token.pxd
+++ b/spacy/tokens/token.pxd
@@ -94,3 +94,10 @@ cdef class Token:
             token.ent_kb_id = value
         elif feat_name == SENT_START:
             token.sent_start = value
+
+    @staticmethod
+    cdef inline int missing_head(const TokenC* token) nogil:
+        if token.dep == 0:
+            return 1
+        else:
+            return 0
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 856719893..3303a8456 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -184,7 +184,10 @@ cdef class Example:
         heads = [token.head.i for token in self.y]
         deps = [token.dep_ for token in self.y]
         if projectivize:
-            heads, deps = nonproj.projectivize(heads, deps)
+            proj_heads, proj_deps = nonproj.projectivize(heads, deps)
+            # ensure that data that was previously missing, remains missing
+            heads = [h if has_heads[i] else heads[i] for i, h in enumerate(proj_heads)]
+            deps = [d if deps[i] != MISSING_DEP_ else MISSING_DEP_ for i, d in enumerate(proj_deps)]
         for cand_i in range(self.x.length):
             if cand_to_gold.lengths[cand_i] == 1:
                 gold_i = cand_to_gold[cand_i].dataXd[0, 0]

From ed53bb979d2b8e62577ff3d45140ca6a9a6bb1c5 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 13 Jan 2021 14:20:05 +0100
Subject: [PATCH 7/7] cleanup

---
 .../pipeline/_parser_internals/arc_eager.pyx  |  5 ++--
 spacy/tests/doc/test_token_api.py             | 30 ++++++++++++-------
 spacy/tests/parser/test_parse.py              |  2 +-
 spacy/tokens/doc.pyx                          |  3 +-
 spacy/tokens/token.pxd                        | 12 +++++---
 spacy/tokens/token.pyx                        | 15 ++++++----
 spacy/training/example.pyx                    | 10 +++----
 7 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index 50b620b7a..f50f91f21 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -9,7 +9,7 @@ from ...typedefs cimport hash_t, attr_t
 from ...strings cimport hash_string
 from ...structs cimport TokenC
 from ...tokens.doc cimport Doc, set_children_from_heads
-from ...tokens.token import MISSING_DEP_
+from ...tokens.token cimport MISSING_DEP
 from ...training.example cimport Example
 from .stateclass cimport StateClass
 from ._state cimport StateC, ArcC
@@ -196,8 +196,7 @@ cdef class ArcEagerGold:
     def __init__(self, ArcEager moves, StateClass stcls, Example example):
         self.mem = Pool()
         heads, labels = example.get_aligned_parse(projectivize=True)
-        labels = [label if label is not None else MISSING_DEP_ for label in labels]
-        labels = [example.x.vocab.strings.add(label) for label in labels]
+        labels = [example.x.vocab.strings.add(label) if label is not None else MISSING_DEP for label in labels]
         sent_starts = example.get_aligned_sent_starts()
         assert len(heads) == len(labels) == len(sent_starts), (len(heads), len(labels), len(sent_starts))
         self.c = create_gold_state(self.mem, stcls.c, heads, labels, sent_starts)
diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py
index 0795080a5..dda28809d 100644
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@@ -4,7 +4,6 @@ from spacy.attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STO
 from spacy.symbols import VERB
 from spacy.vocab import Vocab
 from spacy.tokens import Doc
-from spacy.tokens.token import MISSING_DEP_
 from spacy.training import Example
 
 
@@ -255,24 +254,35 @@ def test_token_api_non_conjuncts(en_vocab):
 
 
 def test_missing_head_dep(en_vocab):
-    heads = [1, 1, 1, 1, 2, None]
-    deps = ["nsubj", "ROOT", "dobj", "cc", "conj", None]
+    """ Check that the Doc constructor and Example.from_dict parse missing information the same"""
+    heads = [1, 1, 1, 1, 2, None]                           # element 5 is missing
+    deps = ["", "ROOT", "dobj", "cc", "conj", None]         # element 0 and 5 are missing
     words = ["I", "like", "London", "and", "Berlin", "."]
     doc = Doc(en_vocab, words=words, heads=heads, deps=deps)
     pred_has_heads = [t.has_head() for t in doc]
+    pred_has_deps = [t.has_dep() for t in doc]
+    pred_heads = [t.head.i for t in doc]
     pred_deps = [t.dep_ for t in doc]
     pred_sent_starts = [t.is_sent_start for t in doc]
-    assert pred_has_heads == [True, True, True, True, True, False]
-    assert pred_deps == ["nsubj", "ROOT", "dobj", "cc", "conj", MISSING_DEP_]
+    assert pred_has_heads == [False, True, True, True, True, False]
+    assert pred_has_deps == [False, True, True, True, True, False]
+    assert pred_heads[1:5] == [1, 1, 1, 2]
+    assert pred_deps[1:5] == ["ROOT", "dobj", "cc", "conj"]
     assert pred_sent_starts == [True, False, False, False, False, False]
     example = Example.from_dict(doc, {"heads": heads, "deps": deps})
+    ref_has_heads = [t.has_head() for t in example.reference]
+    ref_has_deps = [t.has_dep() for t in example.reference]
     ref_heads = [t.head.i for t in example.reference]
     ref_deps = [t.dep_ for t in example.reference]
-    ref_has_heads = [t.has_head() for t in example.reference]
     ref_sent_starts = [t.is_sent_start for t in example.reference]
-    assert ref_deps == ["nsubj", "ROOT", "dobj", "cc", "conj", MISSING_DEP_]
-    assert ref_has_heads == [True, True, True, True, True, False]
-    assert ref_sent_starts == [True, False, False, False, False, False]
+    assert ref_has_heads == pred_has_heads
+    assert ref_has_deps == pred_has_heads
+    assert ref_heads == pred_heads
+    assert ref_deps == pred_deps
+    assert ref_sent_starts == pred_sent_starts
+    # check that the aligned parse preserves the missing information
     aligned_heads, aligned_deps = example.get_aligned_parse(projectivize=True)
+    assert aligned_deps[0] == ref_deps[0]
+    assert aligned_heads[0] == ref_heads[0]
+    assert aligned_deps[5] == ref_deps[5]
     assert aligned_heads[5] == ref_heads[5]
-    assert aligned_deps[5] == MISSING_DEP_
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 5b68bbc37..dc878dd7a 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -253,7 +253,7 @@ def test_overfitting_IO(pipe_name):
             parser.add_label(dep)
     optimizer = nlp.initialize()
     # run overfitting
-    for i in range(150):
+    for i in range(200):
         losses = {}
         nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses[pipe_name] < 0.0001
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 221e78b2e..456d0a732 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -16,7 +16,7 @@ from thinc.util import copy_array
 import warnings
 
 from .span cimport Span
-from .token import MISSING_DEP_
+from .token cimport MISSING_DEP
 from .token cimport Token
 from ..lexeme cimport Lexeme, EMPTY_LEXEME
 from ..typedefs cimport attr_t, flags_t
@@ -269,6 +269,7 @@ cdef class Doc:
         if heads is not None:
             heads = [head - i if head is not None else 0 for i, head in enumerate(heads)]
         if deps is not None:
+            MISSING_DEP_ = self.vocab.strings[MISSING_DEP]
             deps = [dep if dep is not None else MISSING_DEP_ for dep in deps]
         if deps and not heads:
             heads = [0] * len(deps)
diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd
index 9006c874c..58b727764 100644
--- a/spacy/tokens/token.pxd
+++ b/spacy/tokens/token.pxd
@@ -9,6 +9,7 @@ from ..lexeme cimport Lexeme
 
 from ..errors import Errors
 
+cdef int MISSING_DEP = 0
 
 cdef class Token:
     cdef readonly Vocab vocab
@@ -95,9 +96,12 @@ cdef class Token:
         elif feat_name == SENT_START:
             token.sent_start = value
 
+
+    @staticmethod
+    cdef inline int missing_dep(const TokenC* token) nogil:
+        return token.dep == MISSING_DEP
+
+
     @staticmethod
     cdef inline int missing_head(const TokenC* token) nogil:
-        if token.dep == 0:
-            return 1
-        else:
-            return 0
+        return Token.missing_dep(token)
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index a6f9a2a0c..27aa30199 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -22,8 +22,6 @@ from .. import parts_of_speech
 from ..errors import Errors, Warnings
 from .underscore import Underscore, get_ext_args
 
-MISSING_DEP_ = ""
-
 
 cdef class Token:
     """An individual token – i.e. a word, punctuation symbol, whitespace,
@@ -640,14 +638,13 @@ cdef class Token:
             return False
         return any(ancestor.i == self.i for ancestor in descendant.ancestors)
 
-
     def has_head(self):
         """Check whether the token has annotated head information.
+        Return False when the head annotation is unset/missing.
 
         RETURNS (bool): Whether the head annotation is valid or not.
         """
-        return self.dep_ != MISSING_DEP_
-
+        return not Token.missing_head(self.c)
 
     property head:
         """The syntactic parent, or "governor", of this token. 
@@ -873,6 +870,14 @@ cdef class Token:
         def __set__(self, tag):
             self.tag = self.vocab.strings.add(tag)
 
+    def has_dep(self):
+        """Check whether the token has annotated dep information.
+        Returns False when the dep label is unset/missing.
+
+        RETURNS (bool): Whether the dep label is valid or not.
+        """
+        return not Token.missing_dep(self.c)
+
     property dep_:
         """RETURNS (str): The syntactic dependency label."""
         def __get__(self):
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 3303a8456..fe4ee6ff4 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -12,7 +12,7 @@ from .iob_utils import biluo_to_iob, offsets_to_biluo_tags, doc_to_biluo_tags
 from .iob_utils import biluo_tags_to_spans
 from ..errors import Errors, Warnings
 from ..pipeline._parser_internals import nonproj
-from ..tokens.token import MISSING_DEP_
+from ..tokens.token cimport MISSING_DEP
 from ..util import logger
 
 
@@ -180,14 +180,15 @@ cdef class Example:
         gold_to_cand = self.alignment.y2x
         aligned_heads = [None] * self.x.length
         aligned_deps = [None] * self.x.length
+        has_deps = [token.has_dep() for token in self.y]
         has_heads = [token.has_head() for token in self.y]
         heads = [token.head.i for token in self.y]
         deps = [token.dep_ for token in self.y]
         if projectivize:
             proj_heads, proj_deps = nonproj.projectivize(heads, deps)
-            # ensure that data that was previously missing, remains missing
+            # ensure that missing data remains missing
             heads = [h if has_heads[i] else heads[i] for i, h in enumerate(proj_heads)]
-            deps = [d if deps[i] != MISSING_DEP_ else MISSING_DEP_ for i, d in enumerate(proj_deps)]
+            deps = [d if has_deps[i] else deps[i] for i, d in enumerate(proj_deps)]
         for cand_i in range(self.x.length):
             if cand_to_gold.lengths[cand_i] == 1:
                 gold_i = cand_to_gold[cand_i].dataXd[0, 0]
@@ -337,8 +338,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
             values.append([h-i if h is not None else 0 for i, h in enumerate(value)])
         elif key == "DEP":
             attrs.append(key)
-            value = [v if v is not None else MISSING_DEP_ for v in value]
-            values.append([vocab.strings.add(h) for h in value])
+            values.append([vocab.strings.add(h) if h is not None else MISSING_DEP for h in value])
         elif key == "SENT_START":
             attrs.append(key)
             values.append(value)