From ad068f51be6a2579c19c35bcd8b5c1767441ffc1 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 27 Oct 2018 00:46:30 +0200
Subject: [PATCH] Fix out-of-bounds access in NER training

The helper method state.B(1) gets the index of the first token of the
buffer, or -1 if no such token exists. Normally this is safe because we
pass this to functions like state.safe_get(), which returns an empty
token. Here we used it directly as an array index, which is not okay!

This error may have been the cause of out-of-bounds access errors during
training. Similar errors may still be around, so much be hunted down.
Hunting this one down took a long time...I printed out values across
training runs and diffed, looking for points of divergence between
runs, when no randomness should be allowed.
---
 spacy/syntax/ner.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx
index c51428fe9..b92b02559 100644
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@@ -338,7 +338,7 @@ cdef class In:
     @staticmethod
     cdef weight_t cost(StateClass s, const GoldParseC* gold, attr_t label) nogil:
         move = IN
-        cdef int next_act = gold.ner[s.B(1)].move if s.B(0) < s.c.length else OUT
+        cdef int next_act = gold.ner[s.B(1)].move if s.B(1) >= 0 else OUT
         cdef int g_act = gold.ner[s.B(0)].move
         cdef attr_t g_tag = gold.ner[s.B(0)].label
         cdef bint is_sunk = _entity_is_sunk(s, gold.ner)