From 98c29b79127ae62e9d8b69d9513cdded7a81ceb2 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 4 Nov 2017 00:23:23 +0100
Subject: [PATCH 1/9] Add padding vector in parser, to make gradient more
 correct

---
 spacy/syntax/nn_parser.pyx | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 68301238d..554addd53 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -532,7 +532,9 @@ cdef class Parser:
             return None
 
         backprops = []
-        d_tokvecs = state2vec.ops.allocate(tokvecs.shape)
+        # Add a padding vector to the d_tokvecs gradient, so that missing
+        # values don't affect the real gradient.
+        d_tokvecs = state2vec.ops.allocate((tokvecs.shape[0]+1, tokvecs.shape[1]))
         cdef float loss = 0.
         n_steps = 0
         while todo:
@@ -615,7 +617,9 @@ cdef class Parser:
                     bp_vectors))
             else:
                 backprop_lower.append((ids, d_vector, bp_vectors))
-        d_tokvecs = self.model[0].ops.allocate(tokvecs.shape)
+        # Add a padding vector to the d_tokvecs gradient, so that missing
+        # values don't affect the real gradient.
+        d_tokvecs = state2vec.ops.allocate((tokvecs.shape[0]+1, tokvecs.shape[1]))
         self._make_updates(d_tokvecs, bp_tokvecs, backprop_lower, sgd,
                            cuda_stream)
 
@@ -668,7 +672,8 @@ cdef class Parser:
                 (ids.size, d_state_features.shape[2]))
             self.model[0].ops.scatter_add(d_tokvecs, ids,
                 d_state_features)
-        bp_tokvecs(d_tokvecs, sgd=sgd)
+        # Padded -- see update()
+        bp_tokvecs(d_tokvecs[:-1], sgd=sgd)
 
     @property
     def move_names(self):

From e4ec4be9485c2293fd15e3deb4fe27f6bb72d334 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 4 Nov 2017 00:23:45 +0100
Subject: [PATCH 2/9] Fix parser test

---
 spacy/tests/test_misc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 5c69dae3e..fa571ce90 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -69,7 +69,7 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
     Y, get_dX = model.begin_update(tensor)
     assert Y.shape == (tensor.shape[0]+1, nF, nO, nP)
     assert model.d_pad.shape == (1, nF, nO, nP)
-    dY = model.ops.allocate((15, nF, nO, nP))
+    dY = model.ops.allocate((15, nO, nP))
     ids = model.ops.allocate((15, nF))
     ids[1,2] = -1
     dY[1,2] = 1

From a2162b89086aeecf8b92891ca516f40fc666efb1 Mon Sep 17 00:00:00 2001
From: uwol <ulrich@wolffgang.de>
Date: Sun, 5 Nov 2017 12:25:10 +0100
Subject: [PATCH 3/9] tensorizer return parameter fix

---
 spacy/pipeline.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index a159fad50..5a72dc946 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -318,7 +318,7 @@ class Tensorizer(Pipe):
         loss, d_scores = self.get_loss(docs, golds, scores)
         d_inputs = bp_scores(d_scores, sgd=sgd)
         d_inputs = self.model.ops.xp.split(d_inputs, len(self.input_models), axis=1)
-        for d_input, bp_input in zip(d_inputs, bp_inputs): 
+        for d_input, bp_input in zip(d_inputs, bp_inputs):
             bp_input(d_input, sgd=sgd)
         if losses is not None:
             losses.setdefault(self.name, 0.)
@@ -777,7 +777,8 @@ class TextCategorizer(Pipe):
     def predict(self, docs):
         scores = self.model(docs)
         scores = self.model.ops.asarray(scores)
-        return scores
+        tensors = [doc.tensor for doc in docs]
+        return scores, tensors
 
     def set_annotations(self, docs, scores, tensors=None):
         for i, doc in enumerate(docs):

From 9c9ed7890a57eccd6390632a73f13fc33565b513 Mon Sep 17 00:00:00 2001
From: uwol <ulrich@wolffgang.de>
Date: Sun, 5 Nov 2017 12:33:43 +0100
Subject: [PATCH 4/9] added contributor agreement

---
 .github/contributors/uwol.md | 106 +++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 .github/contributors/uwol.md

diff --git a/.github/contributors/uwol.md b/.github/contributors/uwol.md
new file mode 100644
index 000000000..ddc82d220
--- /dev/null
+++ b/.github/contributors/uwol.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                              |
+|------------------------------- | --------------------               |
+| Name                           | Ulrich Wolffgang                   |
+| Company name (if applicable)   |                                    |
+| Title or role (if applicable)  |                                    |
+| Date                           | 2017-11-05                         |
+| GitHub username                | uwol                               |
+| Website (optional)             | https://uwol.github.io/            |

From 0d4bd6414e011ff16b9987cf914978e91de91085 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 5 Nov 2017 14:11:03 +0100
Subject: [PATCH 5/9] Fix Italian tag map

---
 spacy/lang/it/tag_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/lang/it/tag_map.py b/spacy/lang/it/tag_map.py
index ef4fcf1da..420165f24 100644
--- a/spacy/lang/it/tag_map.py
+++ b/spacy/lang/it/tag_map.py
@@ -316,5 +316,5 @@ TAG_MAP = {
     "V__VerbForm=Ger": {"pos": "VERB"},
     "V__VerbForm=Inf": {"pos": "VERB"},
     "X___": {"pos": "X"},
-    "_SP": {"pos": "_SP"}
+    "_SP": {"pos": "SPACE"}
 }

From 00435d8f0cc906878cd6084c78c17cbc5a49b66e Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 5 Nov 2017 14:39:57 +0100
Subject: [PATCH 6/9] Add extra beam parsing test

---
 spacy/tests/parser/test_beam_parse.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/spacy/tests/parser/test_beam_parse.py b/spacy/tests/parser/test_beam_parse.py
index dd77c6805..59e307bcb 100644
--- a/spacy/tests/parser/test_beam_parse.py
+++ b/spacy/tests/parser/test_beam_parse.py
@@ -2,10 +2,22 @@
 from __future__ import unicode_literals
 
 import pytest
+from ...language import Language
+from ...pipeline import DependencyParser
 
 
 @pytest.mark.models('en')
-def test_beam_parse(EN):
+def test_beam_parse_en(EN):
     doc = EN(u'Australia is a country', disable=['ner'])
     ents = EN.entity(doc, beam_width=2)
     print(ents)
+
+
+def test_beam_parse():
+    nlp = Language()
+    nlp.add_pipe(DependencyParser(nlp.vocab), name='parser')
+    nlp.parser.add_label('nsubj')
+    nlp.begin_training()
+
+    doc = nlp.make_doc(u'Australia is a country')
+    nlp.parser(doc, beam_width=2)

From 225cc249c920471ff677cb69d8eefa4f289dd7c6 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 5 Nov 2017 14:42:46 +0100
Subject: [PATCH 7/9] Pass string path to numpy, to fix #1479

---
 spacy/vectors.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index a96913109..8b85bba9c 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -358,7 +358,7 @@ cdef class Vectors:
         def load_vectors(path):
             xp = Model.ops.xp
             if path.exists():
-                self.data = xp.load(path)
+                self.data = xp.load(str(path))
 
         serializers = OrderedDict((
             ('key2row', load_key2row),

From 6f438b17c1ba27d7122ab53e7dd4633114be382f Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 5 Nov 2017 14:43:36 +0100
Subject: [PATCH 8/9] Increment version to v2.0.0a19

---
 spacy/about.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/about.py b/spacy/about.py
index 6f029bd9d..e9614eb40 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -3,7 +3,7 @@
 # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
 
 __title__ = 'spacy-nightly'
-__version__ = '2.0.0a18'
+__version__ = '2.0.0a19'
 __summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
 __uri__ = 'https://spacy.io'
 __author__ = 'Explosion AI'

From 2b35bb76addc664d722cff0d00a2cf597610c347 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 5 Nov 2017 15:34:40 +0100
Subject: [PATCH 9/9] Fix tensorizer on GPU

---
 spacy/pipeline.pyx         | 6 +++++-
 spacy/syntax/nn_parser.pyx | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 5a72dc946..f3defeeb9 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -415,7 +415,11 @@ class Tagger(Pipe):
                     vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
                 idx += 1
             if tensors is not None:
-                doc.extend_tensor(tensors[i])
+                if isinstance(doc.tensor, numpy.ndarray) \
+                and not isinstance(tensors[i], numpy.ndarray):
+                    doc.extend_tensor(tensors[i].get())
+                else:
+                    doc.extend_tensor(tensors[i])
         doc.is_tagged = True
 
     def update(self, docs, golds, drop=0., sgd=None, losses=None):
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 6bfd729eb..08b01a88f 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -751,7 +751,11 @@ cdef class Parser:
             for j in range(doc.length):
                 doc.c[j] = state.c._sent[j]
             if tensors is not None:
-                doc.extend_tensor(tensors[i])
+                if isinstance(doc.tensor, numpy.ndarray) \
+                and not isinstance(tensors[i], numpy.ndarray):
+                    doc.extend_tensor(tensors[i].get())
+                else:
+                    doc.extend_tensor(tensors[i])
             self.moves.finalize_doc(doc)
 
             for hook in self.postprocesses: