Merge branch 'master' of ssh://github.com/honnibal/spaCy

This commit is contained in:
Matthew Honnibal 2015-07-01 15:37:11 +02:00
commit 2f46015c34
3 changed files with 66 additions and 94 deletions

View File

@ -103,7 +103,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args):
def run_setup(exts):
setup(
name='spacy',
packages=['spacy', 'spacy.en', 'spacy.syntax'],
packages=['spacy', 'spacy.en', 'spacy.syntax', 'spacy.munge'],
description="Industrial-strength NLP",
author='Matthew Honnibal',
author_email='honnibal@gmail.com',

View File

@ -42,44 +42,79 @@ cdef class StateClass:
return -1
return self._buffer[self._b_i + i]
cdef int H(self, int i) nogil
cdef inline const TokenC* S_(self, int i) nogil:
return self.safe_get(self.S(i))
cdef inline const TokenC* B_(self, int i) nogil:
return self.safe_get(self.B(i))
cdef inline const TokenC* H_(self, int i) nogil:
return self.safe_get(self.H(i))
cdef inline const TokenC* E_(self, int i) nogil:
return self.safe_get(self.E(i))
cdef inline const TokenC* L_(self, int i, int idx) nogil:
return self.safe_get(self.L(i, idx))
cdef inline const TokenC* R_(self, int i, int idx) nogil:
return self.safe_get(self.R(i, idx))
cdef inline const TokenC* safe_get(self, int i) nogil:
if i < 0 or i >= self.length:
return &self._empty_token
else:
return &self._sent[i]
cdef inline int H(self, int i) nogil:
if i < 0 or i >= self.length:
return -1
return self._sent[i].head + i
cdef int E(self, int i) nogil
cdef int L(self, int i, int idx) nogil
cdef int R(self, int i, int idx) nogil
cdef const TokenC* S_(self, int i) nogil
cdef const TokenC* B_(self, int i) nogil
cdef int L(self, int i, int idx) nogil
cdef const TokenC* H_(self, int i) nogil
cdef const TokenC* E_(self, int i) nogil
cdef inline bint empty(self) nogil:
return self._s_i <= 0
cdef const TokenC* L_(self, int i, int idx) nogil
cdef const TokenC* R_(self, int i, int idx) nogil
cdef inline bint eol(self) nogil:
return self.buffer_length() == 0
cdef const TokenC* safe_get(self, int i) nogil
cdef inline bint at_break(self) nogil:
return self._break != -1
cdef bint empty(self) nogil
cdef inline bint is_final(self) nogil:
return self.stack_depth() <= 0 and self._b_i >= self.length
cdef bint entity_is_open(self) nogil
cdef inline bint has_head(self, int i) nogil:
return self.safe_get(i).head != 0
cdef bint eol(self) nogil
cdef inline int n_L(self, int i) nogil:
return self.safe_get(i).l_kids
cdef bint at_break(self) nogil
cdef inline int n_R(self, int i) nogil:
return self.safe_get(i).r_kids
cdef bint is_final(self) nogil
cdef inline bint stack_is_connected(self) nogil:
return False
cdef bint has_head(self, int i) nogil
cdef inline bint entity_is_open(self) nogil:
if self._e_i < 1:
return False
return self._ents[self._e_i-1].end == -1
cdef int n_L(self, int i) nogil
cdef inline int stack_depth(self) nogil:
return self._s_i
cdef int n_R(self, int i) nogil
cdef bint stack_is_connected(self) nogil
cdef int stack_depth(self) nogil
cdef int buffer_length(self) nogil
cdef inline int buffer_length(self) nogil:
if self._break != -1:
return self._break - self._b_i
else:
return self.length - self._b_i
cdef void push(self) nogil

View File

@ -34,11 +34,6 @@ cdef class StateClass:
self._buffer[i] = i
self._empty_token.lex = &EMPTY_LEXEME
cdef int H(self, int i) nogil:
if i < 0 or i >= self.length:
return -1
return self._sent[i].head + i
cdef int E(self, int i) nogil:
if self._e_i <= 0 or self._e_i >= self.length:
return 0
@ -52,6 +47,8 @@ cdef class StateClass:
if i < 0 or i >= self.length:
return -1
cdef const TokenC* target = &self._sent[i]
if target.l_kids < idx:
return -1
cdef const TokenC* ptr = self._sent
while ptr < target:
@ -75,8 +72,10 @@ cdef class StateClass:
return -1
if i < 0 or i >= self.length:
return -1
cdef const TokenC* ptr = self._sent + (self.length - 1)
cdef const TokenC* target = &self._sent[i]
if target.r_kids < idx:
return -1
cdef const TokenC* ptr = self._sent + (self.length - 1)
while ptr > target:
# If this head is still to the right of us, we can skip to it
# No token that's between this token and this head could be our
@ -92,68 +91,6 @@ cdef class StateClass:
ptr -= 1
return -1
cdef const TokenC* S_(self, int i) nogil:
return self.safe_get(self.S(i))
cdef const TokenC* B_(self, int i) nogil:
return self.safe_get(self.B(i))
cdef const TokenC* H_(self, int i) nogil:
return self.safe_get(self.H(i))
cdef const TokenC* E_(self, int i) nogil:
return self.safe_get(self.E(i))
cdef const TokenC* L_(self, int i, int idx) nogil:
return self.safe_get(self.L(i, idx))
cdef const TokenC* R_(self, int i, int idx) nogil:
return self.safe_get(self.R(i, idx))
cdef const TokenC* safe_get(self, int i) nogil:
if i < 0 or i >= self.length:
return &self._empty_token
else:
return &self._sent[i]
cdef bint empty(self) nogil:
return self._s_i <= 0
cdef bint eol(self) nogil:
return self.buffer_length() == 0
cdef bint at_break(self) nogil:
return self._break != -1
cdef bint is_final(self) nogil:
return self.stack_depth() <= 0 and self._b_i >= self.length
cdef bint has_head(self, int i) nogil:
return self.safe_get(i).head != 0
cdef int n_L(self, int i) nogil:
return self.safe_get(i).l_kids
cdef int n_R(self, int i) nogil:
return self.safe_get(i).r_kids
cdef bint stack_is_connected(self) nogil:
return False
cdef bint entity_is_open(self) nogil:
if self._e_i < 1:
return False
return self._ents[self._e_i-1].end == -1
cdef int stack_depth(self) nogil:
return self._s_i
cdef int buffer_length(self) nogil:
if self._break != -1:
return self._break - self._b_i
else:
return self.length - self._b_i
cdef void push(self) nogil:
if self.B(0) != -1:
self._stack[self._s_i] = self.B(0)