* More slight cleaning for lang.pyx

This commit is contained in:
Matthew Honnibal 2014-10-10 20:11:22 +11:00
parent 3d82ed1e5e
commit ff79dbac2e

View File

@ -176,18 +176,12 @@ cdef class Language:
cdef int _find_prefix(self, Py_UNICODE* chars, size_t length) except -1: cdef int _find_prefix(self, Py_UNICODE* chars, size_t length) except -1:
cdef unicode string = chars[:length] cdef unicode string = chars[:length]
match = self.prefix_re.search(string) match = self.prefix_re.search(string)
if match is None: return (match.end() - match.start()) if match is not None else 0
return 0
else:
return match.end() - match.start()
cdef int _find_suffix(self, Py_UNICODE* chars, size_t length): cdef int _find_suffix(self, Py_UNICODE* chars, size_t length):
cdef unicode string = chars[:length] cdef unicode string = chars[:length]
match = self.suffix_re.search(string) match = self.suffix_re.search(string)
if match is None: return (match.end() - match.start()) if match is not None else 0
return 0
else:
return match.end() - match.start()
def _load_special_tokenization(self, token_rules): def _load_special_tokenization(self, token_rules):
'''Load special-case tokenization rules. '''Load special-case tokenization rules.