From 3105c7f8ba0a528064ea01d256c8ef5bd4e91113 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sat, 14 Mar 2015 14:53:50 -0400
Subject: [PATCH] * Don't pass label_ids dict to Tokens, since we now use the
 StringStore to manage string-to-int mapping for labels

---
 spacy/tokens.pxd |  4 ++--
 spacy/tokens.pyx | 17 ++++++++---------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd
index 169ed0b1b..c64104256 100644
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@@ -49,7 +49,7 @@ cdef class Tokens:
 
     cpdef long[:,:] to_array(self, object features)
 
-    cdef int set_parse(self, const TokenC* parsed, dict label_ids) except -1
+    cdef int set_parse(self, const TokenC* parsed) except -1
 
 
 cdef class Span:
@@ -73,7 +73,7 @@ cdef class Token:
     @staticmethod
     cdef inline Token cinit(Vocab vocab, unicode string,
                             const TokenC* token, int offset, int array_len,
-                            Tokens parent_seq, self._tag_strings):
+                            Tokens parent_seq):
         if offset < 0 or offset >= array_len:
 
             msg = "Attempt to access token at %d, max length %d"
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index 696e14ea3..e6f5d3068 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -92,7 +92,6 @@ cdef class Tokens:
         self.is_tagged = False
         self.is_parsed = False
         self._py_tokens = []
-        self._tag_strings = tuple() # These will be set by the POS tagger and parser
 
     def __getitem__(self, object i):
         """Retrieve a token.
@@ -108,7 +107,7 @@ cdef class Tokens:
         bounds_check(i, self.length, PADDING)
         return Token.cinit(self.vocab, self._string,
                            &self.data[i], i, self.length,
-                           self, self._tag_strings)
+                           self)
 
     def __iter__(self):
         """Iterate over the tokens.
@@ -119,7 +118,7 @@ cdef class Tokens:
         for i in range(self.length):
             yield Token.cinit(self.vocab, self._string,
                               &self.data[i], i, self.length,
-                              self, self._tag_strings)
+                              self)
 
     def __len__(self):
         return self.length
@@ -245,7 +244,7 @@ cdef class Tokens:
         if start is not None:
             yield Span(self, start, self.length) 
 
-    cdef int set_parse(self, const TokenC* parsed, dict label_ids) except -1:
+    cdef int set_parse(self, const TokenC* parsed) except -1:
         self._py_tokens = [None] * self.length
         self.is_parsed = True
         for i in range(self.length):
@@ -313,7 +312,7 @@ cdef class Token:
     def nbor(self, int i=1):
         return Token.cinit(self.vocab, self._string,
                            self.c, self.i, self.array_len,
-                           self._seq, self._tag_strings)
+                           self._seq)
 
     property string:
         def __get__(self):
@@ -414,7 +413,7 @@ cdef class Token:
                 elif ptr + ptr.head == self.c:
                     yield Token.cinit(self.vocab, self._string,
                                       ptr, ptr - (self.c - self.i), self.array_len,
-                                      self._seq, self._tag_strings)
+                                      self._seq)
                     ptr += 1
                 else:
                     ptr += 1
@@ -433,7 +432,7 @@ cdef class Token:
                 elif ptr + ptr.head == self.c:
                     yield Token.cinit(self.vocab, self._string,
                                       ptr, ptr - (self.c - self.i), self.array_len,
-                                      self._seq, self._tag_strings)
+                                      self._seq)
                     ptr -= 1
                 else:
                     ptr -= 1
@@ -456,7 +455,7 @@ cdef class Token:
             """The token predicted by the parser to be the head of the current token."""
             return Token.cinit(self.vocab, self._string,
                                self.c + self.c.head, self.i + self.c.head, self.array_len,
-                               self._seq, self._tag_strings)
+                               self._seq)
 
     property whitespace_:
         def __get__(self):
@@ -496,7 +495,7 @@ cdef class Token:
 
     property tag_:
         def __get__(self):
-            return self._tag_strings[self.c.tag]
+            return self.vocab.strings[self.c.tag]
 
     property dep_:
         def __get__(self):