Maintaining backward compatibility.

This commit is contained in:
Gyorgy Orosz 2016-12-21 23:30:49 +01:00
parent 1748549aeb
commit d9c59c4751

View File

@ -28,8 +28,8 @@ from .tokens.doc cimport Doc
cdef class Tokenizer: cdef class Tokenizer:
"""Segment text, and create Doc objects with the discovered segment boundaries.""" """Segment text, and create Doc objects with the discovered segment boundaries."""
@classmethod @classmethod
def load(cls, path, Vocab vocab, rules=None, rule_match = None, prefix_search=None, suffix_search=None, def load(cls, path, Vocab vocab, rules=None, prefix_search=None, suffix_search=None,
infix_finditer=None): infix_finditer=None, rule_match = None):
'''Load a Tokenizer, reading unsupplied components from the path. '''Load a Tokenizer, reading unsupplied components from the path.
Arguments: Arguments:
@ -69,7 +69,7 @@ cdef class Tokenizer:
infix_finditer = util.compile_infix_regex(entries).finditer infix_finditer = util.compile_infix_regex(entries).finditer
return cls(vocab, rules, rule_match, prefix_search, suffix_search, infix_finditer) return cls(vocab, rules, rule_match, prefix_search, suffix_search, infix_finditer)
def __init__(self, Vocab vocab, rules, rule_match, prefix_search, suffix_search, infix_finditer): def __init__(self, Vocab vocab, rules, prefix_search, suffix_search, infix_finditer, rule_match=None):
'''Create a Tokenizer, to create Doc objects given unicode text. '''Create a Tokenizer, to create Doc objects given unicode text.
Arguments: Arguments:
@ -77,9 +77,6 @@ cdef class Tokenizer:
A storage container for lexical types. A storage container for lexical types.
rules (dict): rules (dict):
Exceptions and special-cases for the tokenizer. Exceptions and special-cases for the tokenizer.
rule_match:
A function matching the signature of re.compile(string).match
to match special cases for the tokenizer.
prefix_search: prefix_search:
A function matching the signature of re.compile(string).search A function matching the signature of re.compile(string).search
to match prefixes. to match prefixes.
@ -89,6 +86,9 @@ cdef class Tokenizer:
infix_finditer: infix_finditer:
A function matching the signature of re.compile(string).finditer A function matching the signature of re.compile(string).finditer
to find infixes. to find infixes.
rule_match:
A function matching the signature of re.compile(string).match
to match special cases for the tokenizer.
''' '''
self.mem = Pool() self.mem = Pool()
self._cache = PreshMap() self._cache = PreshMap()