Merge branch 'master' of ssh://github.com/honnibal/spaCy

This commit is contained in:
Matthew Honnibal 2015-07-01 15:37:11 +02:00
commit 2f46015c34
3 changed files with 66 additions and 94 deletions

View File

@ -103,7 +103,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args):
def run_setup(exts):
setup(
name='spacy',
packages=['spacy', 'spacy.en', 'spacy.syntax'],
packages=['spacy', 'spacy.en', 'spacy.syntax', 'spacy.munge'],
description="Industrial-strength NLP",
author='Matthew Honnibal',
author_email='honnibal@gmail.com',

View File

@ -41,45 +41,80 @@ cdef class StateClass:
if (i + self._b_i) >= self.length:
return -1
return self._buffer[self._b_i + i]
cdef int H(self, int i) nogil
cdef inline const TokenC* S_(self, int i) nogil:
return self.safe_get(self.S(i))
cdef inline const TokenC* B_(self, int i) nogil:
return self.safe_get(self.B(i))
cdef inline const TokenC* H_(self, int i) nogil:
return self.safe_get(self.H(i))
cdef inline const TokenC* E_(self, int i) nogil:
return self.safe_get(self.E(i))
cdef inline const TokenC* L_(self, int i, int idx) nogil:
return self.safe_get(self.L(i, idx))
cdef inline const TokenC* R_(self, int i, int idx) nogil:
return self.safe_get(self.R(i, idx))
cdef inline const TokenC* safe_get(self, int i) nogil:
if i < 0 or i >= self.length:
return &self._empty_token
else:
return &self._sent[i]
cdef inline int H(self, int i) nogil:
if i < 0 or i >= self.length:
return -1
return self._sent[i].head + i
cdef int E(self, int i) nogil
cdef int R(self, int i, int idx) nogil
cdef int L(self, int i, int idx) nogil
cdef int R(self, int i, int idx) nogil
cdef const TokenC* S_(self, int i) nogil
cdef const TokenC* B_(self, int i) nogil
cdef inline bint empty(self) nogil:
return self._s_i <= 0
cdef const TokenC* H_(self, int i) nogil
cdef const TokenC* E_(self, int i) nogil
cdef inline bint eol(self) nogil:
return self.buffer_length() == 0
cdef const TokenC* L_(self, int i, int idx) nogil
cdef const TokenC* R_(self, int i, int idx) nogil
cdef inline bint at_break(self) nogil:
return self._break != -1
cdef const TokenC* safe_get(self, int i) nogil
cdef inline bint is_final(self) nogil:
return self.stack_depth() <= 0 and self._b_i >= self.length
cdef bint empty(self) nogil
cdef bint entity_is_open(self) nogil
cdef inline bint has_head(self, int i) nogil:
return self.safe_get(i).head != 0
cdef bint eol(self) nogil
cdef bint at_break(self) nogil
cdef inline int n_L(self, int i) nogil:
return self.safe_get(i).l_kids
cdef bint is_final(self) nogil
cdef inline int n_R(self, int i) nogil:
return self.safe_get(i).r_kids
cdef bint has_head(self, int i) nogil
cdef inline bint stack_is_connected(self) nogil:
return False
cdef int n_L(self, int i) nogil
cdef inline bint entity_is_open(self) nogil:
if self._e_i < 1:
return False
return self._ents[self._e_i-1].end == -1
cdef int n_R(self, int i) nogil
cdef inline int stack_depth(self) nogil:
return self._s_i
cdef bint stack_is_connected(self) nogil
cdef int stack_depth(self) nogil
cdef int buffer_length(self) nogil
cdef inline int buffer_length(self) nogil:
if self._break != -1:
return self._break - self._b_i
else:
return self.length - self._b_i
cdef void push(self) nogil

View File

@ -34,11 +34,6 @@ cdef class StateClass:
self._buffer[i] = i
self._empty_token.lex = &EMPTY_LEXEME
cdef int H(self, int i) nogil:
if i < 0 or i >= self.length:
return -1
return self._sent[i].head + i
cdef int E(self, int i) nogil:
if self._e_i <= 0 or self._e_i >= self.length:
return 0
@ -52,6 +47,8 @@ cdef class StateClass:
if i < 0 or i >= self.length:
return -1
cdef const TokenC* target = &self._sent[i]
if target.l_kids < idx:
return -1
cdef const TokenC* ptr = self._sent
while ptr < target:
@ -75,8 +72,10 @@ cdef class StateClass:
return -1
if i < 0 or i >= self.length:
return -1
cdef const TokenC* ptr = self._sent + (self.length - 1)
cdef const TokenC* target = &self._sent[i]
if target.r_kids < idx:
return -1
cdef const TokenC* ptr = self._sent + (self.length - 1)
while ptr > target:
# If this head is still to the right of us, we can skip to it
# No token that's between this token and this head could be our
@ -92,68 +91,6 @@ cdef class StateClass:
ptr -= 1
return -1
cdef const TokenC* S_(self, int i) nogil:
return self.safe_get(self.S(i))
cdef const TokenC* B_(self, int i) nogil:
return self.safe_get(self.B(i))
cdef const TokenC* H_(self, int i) nogil:
return self.safe_get(self.H(i))
cdef const TokenC* E_(self, int i) nogil:
return self.safe_get(self.E(i))
cdef const TokenC* L_(self, int i, int idx) nogil:
return self.safe_get(self.L(i, idx))
cdef const TokenC* R_(self, int i, int idx) nogil:
return self.safe_get(self.R(i, idx))
cdef const TokenC* safe_get(self, int i) nogil:
if i < 0 or i >= self.length:
return &self._empty_token
else:
return &self._sent[i]
cdef bint empty(self) nogil:
return self._s_i <= 0
cdef bint eol(self) nogil:
return self.buffer_length() == 0
cdef bint at_break(self) nogil:
return self._break != -1
cdef bint is_final(self) nogil:
return self.stack_depth() <= 0 and self._b_i >= self.length
cdef bint has_head(self, int i) nogil:
return self.safe_get(i).head != 0
cdef int n_L(self, int i) nogil:
return self.safe_get(i).l_kids
cdef int n_R(self, int i) nogil:
return self.safe_get(i).r_kids
cdef bint stack_is_connected(self) nogil:
return False
cdef bint entity_is_open(self) nogil:
if self._e_i < 1:
return False
return self._ents[self._e_i-1].end == -1
cdef int stack_depth(self) nogil:
return self._s_i
cdef int buffer_length(self) nogil:
if self._break != -1:
return self._break - self._b_i
else:
return self.length - self._b_i
cdef void push(self) nogil:
if self.B(0) != -1:
self._stack[self._s_i] = self.B(0)