From 56c4e07a59ae7cd35b00a9de0ee0666938396104 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 27 Aug 2015 08:53:48 +1000
Subject: [PATCH 1/5] Update gazetteer.json

---
 lang_data/en/gazetteer.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lang_data/en/gazetteer.json b/lang_data/en/gazetteer.json
index 1aa6b9514..dce2e1f2a 100644
--- a/lang_data/en/gazetteer.json
+++ b/lang_data/en/gazetteer.json
@@ -14,8 +14,8 @@
 				{"orth": "9/11"}
 			],
 			[
-				{"lower": "Septmber"},
-				{"lower": "Eleven"}
+				{"lower": "septmber"},
+				{"lower": "eleven"}
 			],
 			[
 				{"lower": "september"},

From fd1eeb3102d65504d0267861355d61e24e731086 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 6 Sep 2015 04:13:03 +0200
Subject: [PATCH 2/5] * Add POS attribute support in get_attr

---
 spacy/tokens/doc.pyx | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 7994c97c3..955e9b45f 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -12,6 +12,7 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUST
 from ..attrs cimport POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
 from ..parts_of_speech import UNIV_POS_NAMES
 from ..parts_of_speech cimport CONJ, PUNCT, NOUN
+from ..parts_of_speech cimport univ_pos_t
 from ..lexeme cimport check_flag
 from ..lexeme cimport get_attr as get_lex_attr
 from .spans cimport Span
@@ -327,6 +328,9 @@ cdef class Doc:
             elif attr_id == TAG:
                 for i in range(length):
                     tokens[i].tag = values[i]
+            elif attr_id == POS:
+                for i in range(length):
+                    tokens[i].pos = <univ_pos_t>values[i]
             elif attr_id == DEP:
                 for i in range(length):
                     tokens[i].dep = values[i]

From 5edac11225b4435daac5776dd52ca105bc1d5233 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 6 Sep 2015 04:15:00 +0200
Subject: [PATCH 3/5] * Wrap self.parse in nogil, and break if an invalid move
 is predicted. The invalid break is a work-around that papers over likely
 bugs, but we can't easily break in the nogil block, and otherwise we'll get
 an infinite loop. Need to set this as an error flag.

---
 spacy/syntax/parser.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx
index 6282339bd..59b90920c 100644
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@@ -84,8 +84,7 @@ cdef class Parser:
 
         cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE,
                                   self.model.n_feats, self.model.n_feats)
-        with nogil:
-            self.parse(stcls, eg.c)
+        self.parse(stcls, eg.c)
         tokens.set_parse(stcls._sent)
 
     cdef void predict(self, StateClass stcls, ExampleC* eg) nogil:
@@ -98,6 +97,8 @@ cdef class Parser:
     cdef void parse(self, StateClass stcls, ExampleC eg) nogil:
         while not stcls.is_final():
             self.predict(stcls, &eg)
+            if not eg.is_valid[eg.guess]:
+                break
             self.moves.c[eg.guess].do(stcls, self.moves.c[eg.guess].label)
         self.moves.finalize_state(stcls)
 

From 571b6eda88bb72078b88b9a600455cb8ed3ab622 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 6 Sep 2015 05:40:10 +0200
Subject: [PATCH 4/5] * Upd tests

---
 tests/parser/test_initial_actions_parse.py |  5 ++++-
 tests/test_matcher.py                      | 12 ++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/parser/test_initial_actions_parse.py b/tests/parser/test_initial_actions_parse.py
index c1603cd93..9f570d8be 100644
--- a/tests/parser/test_initial_actions_parse.py
+++ b/tests/parser/test_initial_actions_parse.py
@@ -4,7 +4,10 @@ import pytest
 def test_initial(EN):
     doc = EN.tokenizer(u'I ate the pizza with anchovies.')
     EN.tagger(doc)
-    next_actions = EN.parser.partial(doc, ['L-nsubj', 'S', 'L-det'])
+    with EN.parser.step_through(doc) as stepwise:
+        stepwise.transition('L-nsubj')
+        stepwise.transition('S')
+        stepwise.transition('L-det')
     assert doc[0].head.i == 1
     assert doc[1].head.i == 1
     assert doc[2].head.i == 3
diff --git a/tests/test_matcher.py b/tests/test_matcher.py
index 0014e1110..1b748cb53 100644
--- a/tests/test_matcher.py
+++ b/tests/test_matcher.py
@@ -3,7 +3,7 @@ import pytest
 
 from spacy.strings import StringStore
 from spacy.matcher import *
-from spacy.attrs import ORTH
+from spacy.attrs import LOWER
 from spacy.tokens.doc import Doc
 from spacy.vocab import Vocab
 
@@ -13,7 +13,7 @@ def matcher(EN):
     patterns = {
         'Javascript': ['PRODUCT', {}, [[{'ORTH': 'JavaScript'}]]],
         'GoogleNow':  ['PRODUCT', {}, [[{'ORTH': 'Google'}, {'ORTH': 'Now'}]]],
-        'Java':       ['PRODUCT', {}, [[{'ORTH': 'Java'}]]],
+        'Java':       ['PRODUCT', {}, [[{'LOWER': 'java'}]]],
     }
     return Matcher(EN.vocab, patterns)
 
@@ -33,7 +33,7 @@ def test_match_start(matcher, EN):
 
 
 def test_match_end(matcher, EN):
-    tokens = EN('I like Java')
+    tokens = EN('I like java')
     assert matcher(tokens) == [(EN.vocab.strings['PRODUCT'], 2, 3)]
 
 
@@ -43,17 +43,17 @@ def test_match_middle(matcher, EN):
 
 
 def test_match_multi(matcher, EN):
-    tokens = EN('I like Google Now and Java best')
+    tokens = EN('I like Google Now and java best')
     assert matcher(tokens) == [(EN.vocab.strings['PRODUCT'], 2, 4),
                                (EN.vocab.strings['PRODUCT'], 5, 6)]
 
 
 def test_match_preserved(matcher, EN):
-    doc = EN.tokenizer('I like Java')
+    doc = EN.tokenizer('I like java')
     EN.tagger(doc)
     EN.entity(doc)
     assert len(doc.ents) == 0
-    doc = EN.tokenizer('I like Java')
+    doc = EN.tokenizer('I like java')
     matcher(doc)
     assert len(doc.ents) == 1
     EN.tagger(doc)

From 7e4fea67d39dd85b9aeed396a055cdd7e4e31971 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 6 Sep 2015 10:48:36 +0200
Subject: [PATCH 5/5] * Fix bug in token subtree, introduced by duplication of
 L/R code in Stateclass. Need to consolidate the two methods.

---
 spacy/tokens/token.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index f1f2696cb..cc50fdd08 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -142,7 +142,7 @@ cdef class Token:
             """The leftward immediate children of the word, in the syntactic
             dependency parse.
             """
-            cdef const TokenC* ptr = self.c - self.i
+            cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
             while ptr < self.c:
                 # If this head is still to the right of us, we can skip to it
                 # No token that's between this token and this head could be our
@@ -160,7 +160,7 @@ cdef class Token:
         def __get__(self):
             """The rightward immediate children of the word, in the syntactic
             dependency parse."""
-            cdef const TokenC* ptr = (self.c - self.i) + (self.array_len - 1)
+            cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
             tokens = []
             while ptr > self.c:
                 # If this head is still to the right of us, we can skip to it