mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
* Don't pass label_ids dict to Tokens, since we now use the StringStore to manage string-to-int mapping for labels
This commit is contained in:
parent
27d9df49e7
commit
3105c7f8ba
|
@ -49,7 +49,7 @@ cdef class Tokens:
|
||||||
|
|
||||||
cpdef long[:,:] to_array(self, object features)
|
cpdef long[:,:] to_array(self, object features)
|
||||||
|
|
||||||
cdef int set_parse(self, const TokenC* parsed, dict label_ids) except -1
|
cdef int set_parse(self, const TokenC* parsed) except -1
|
||||||
|
|
||||||
|
|
||||||
cdef class Span:
|
cdef class Span:
|
||||||
|
@ -73,7 +73,7 @@ cdef class Token:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
cdef inline Token cinit(Vocab vocab, unicode string,
|
cdef inline Token cinit(Vocab vocab, unicode string,
|
||||||
const TokenC* token, int offset, int array_len,
|
const TokenC* token, int offset, int array_len,
|
||||||
Tokens parent_seq, self._tag_strings):
|
Tokens parent_seq):
|
||||||
if offset < 0 or offset >= array_len:
|
if offset < 0 or offset >= array_len:
|
||||||
|
|
||||||
msg = "Attempt to access token at %d, max length %d"
|
msg = "Attempt to access token at %d, max length %d"
|
||||||
|
|
|
@ -92,7 +92,6 @@ cdef class Tokens:
|
||||||
self.is_tagged = False
|
self.is_tagged = False
|
||||||
self.is_parsed = False
|
self.is_parsed = False
|
||||||
self._py_tokens = []
|
self._py_tokens = []
|
||||||
self._tag_strings = tuple() # These will be set by the POS tagger and parser
|
|
||||||
|
|
||||||
def __getitem__(self, object i):
|
def __getitem__(self, object i):
|
||||||
"""Retrieve a token.
|
"""Retrieve a token.
|
||||||
|
@ -108,7 +107,7 @@ cdef class Tokens:
|
||||||
bounds_check(i, self.length, PADDING)
|
bounds_check(i, self.length, PADDING)
|
||||||
return Token.cinit(self.vocab, self._string,
|
return Token.cinit(self.vocab, self._string,
|
||||||
&self.data[i], i, self.length,
|
&self.data[i], i, self.length,
|
||||||
self, self._tag_strings)
|
self)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
"""Iterate over the tokens.
|
"""Iterate over the tokens.
|
||||||
|
@ -119,7 +118,7 @@ cdef class Tokens:
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
yield Token.cinit(self.vocab, self._string,
|
yield Token.cinit(self.vocab, self._string,
|
||||||
&self.data[i], i, self.length,
|
&self.data[i], i, self.length,
|
||||||
self, self._tag_strings)
|
self)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.length
|
return self.length
|
||||||
|
@ -245,7 +244,7 @@ cdef class Tokens:
|
||||||
if start is not None:
|
if start is not None:
|
||||||
yield Span(self, start, self.length)
|
yield Span(self, start, self.length)
|
||||||
|
|
||||||
cdef int set_parse(self, const TokenC* parsed, dict label_ids) except -1:
|
cdef int set_parse(self, const TokenC* parsed) except -1:
|
||||||
self._py_tokens = [None] * self.length
|
self._py_tokens = [None] * self.length
|
||||||
self.is_parsed = True
|
self.is_parsed = True
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
|
@ -313,7 +312,7 @@ cdef class Token:
|
||||||
def nbor(self, int i=1):
|
def nbor(self, int i=1):
|
||||||
return Token.cinit(self.vocab, self._string,
|
return Token.cinit(self.vocab, self._string,
|
||||||
self.c, self.i, self.array_len,
|
self.c, self.i, self.array_len,
|
||||||
self._seq, self._tag_strings)
|
self._seq)
|
||||||
|
|
||||||
property string:
|
property string:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
@ -414,7 +413,7 @@ cdef class Token:
|
||||||
elif ptr + ptr.head == self.c:
|
elif ptr + ptr.head == self.c:
|
||||||
yield Token.cinit(self.vocab, self._string,
|
yield Token.cinit(self.vocab, self._string,
|
||||||
ptr, ptr - (self.c - self.i), self.array_len,
|
ptr, ptr - (self.c - self.i), self.array_len,
|
||||||
self._seq, self._tag_strings)
|
self._seq)
|
||||||
ptr += 1
|
ptr += 1
|
||||||
else:
|
else:
|
||||||
ptr += 1
|
ptr += 1
|
||||||
|
@ -433,7 +432,7 @@ cdef class Token:
|
||||||
elif ptr + ptr.head == self.c:
|
elif ptr + ptr.head == self.c:
|
||||||
yield Token.cinit(self.vocab, self._string,
|
yield Token.cinit(self.vocab, self._string,
|
||||||
ptr, ptr - (self.c - self.i), self.array_len,
|
ptr, ptr - (self.c - self.i), self.array_len,
|
||||||
self._seq, self._tag_strings)
|
self._seq)
|
||||||
ptr -= 1
|
ptr -= 1
|
||||||
else:
|
else:
|
||||||
ptr -= 1
|
ptr -= 1
|
||||||
|
@ -456,7 +455,7 @@ cdef class Token:
|
||||||
"""The token predicted by the parser to be the head of the current token."""
|
"""The token predicted by the parser to be the head of the current token."""
|
||||||
return Token.cinit(self.vocab, self._string,
|
return Token.cinit(self.vocab, self._string,
|
||||||
self.c + self.c.head, self.i + self.c.head, self.array_len,
|
self.c + self.c.head, self.i + self.c.head, self.array_len,
|
||||||
self._seq, self._tag_strings)
|
self._seq)
|
||||||
|
|
||||||
property whitespace_:
|
property whitespace_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
@ -496,7 +495,7 @@ cdef class Token:
|
||||||
|
|
||||||
property tag_:
|
property tag_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self._tag_strings[self.c.tag]
|
return self.vocab.strings[self.c.tag]
|
||||||
|
|
||||||
property dep_:
|
property dep_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user