mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
Maintaining backward compatibility.
This commit is contained in:
parent
1748549aeb
commit
d9c59c4751
|
@ -28,8 +28,8 @@ from .tokens.doc cimport Doc
|
||||||
cdef class Tokenizer:
|
cdef class Tokenizer:
|
||||||
"""Segment text, and create Doc objects with the discovered segment boundaries."""
|
"""Segment text, and create Doc objects with the discovered segment boundaries."""
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, path, Vocab vocab, rules=None, rule_match = None, prefix_search=None, suffix_search=None,
|
def load(cls, path, Vocab vocab, rules=None, prefix_search=None, suffix_search=None,
|
||||||
infix_finditer=None):
|
infix_finditer=None, rule_match = None):
|
||||||
'''Load a Tokenizer, reading unsupplied components from the path.
|
'''Load a Tokenizer, reading unsupplied components from the path.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -69,7 +69,7 @@ cdef class Tokenizer:
|
||||||
infix_finditer = util.compile_infix_regex(entries).finditer
|
infix_finditer = util.compile_infix_regex(entries).finditer
|
||||||
return cls(vocab, rules, rule_match, prefix_search, suffix_search, infix_finditer)
|
return cls(vocab, rules, rule_match, prefix_search, suffix_search, infix_finditer)
|
||||||
|
|
||||||
def __init__(self, Vocab vocab, rules, rule_match, prefix_search, suffix_search, infix_finditer):
|
def __init__(self, Vocab vocab, rules, prefix_search, suffix_search, infix_finditer, rule_match=None):
|
||||||
'''Create a Tokenizer, to create Doc objects given unicode text.
|
'''Create a Tokenizer, to create Doc objects given unicode text.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -77,9 +77,6 @@ cdef class Tokenizer:
|
||||||
A storage container for lexical types.
|
A storage container for lexical types.
|
||||||
rules (dict):
|
rules (dict):
|
||||||
Exceptions and special-cases for the tokenizer.
|
Exceptions and special-cases for the tokenizer.
|
||||||
rule_match:
|
|
||||||
A function matching the signature of re.compile(string).match
|
|
||||||
to match special cases for the tokenizer.
|
|
||||||
prefix_search:
|
prefix_search:
|
||||||
A function matching the signature of re.compile(string).search
|
A function matching the signature of re.compile(string).search
|
||||||
to match prefixes.
|
to match prefixes.
|
||||||
|
@ -89,6 +86,9 @@ cdef class Tokenizer:
|
||||||
infix_finditer:
|
infix_finditer:
|
||||||
A function matching the signature of re.compile(string).finditer
|
A function matching the signature of re.compile(string).finditer
|
||||||
to find infixes.
|
to find infixes.
|
||||||
|
rule_match:
|
||||||
|
A function matching the signature of re.compile(string).match
|
||||||
|
to match special cases for the tokenizer.
|
||||||
'''
|
'''
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._cache = PreshMap()
|
self._cache = PreshMap()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user