mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Add Vocab.cfg attr, to hold stuff like oov probs
This commit is contained in:
parent
ab5d5ed880
commit
7b56b2f04b
|
@ -32,6 +32,7 @@ cdef class Vocab:
|
|||
cdef readonly int length
|
||||
cdef public object data_dir
|
||||
cdef public object lex_attr_getters
|
||||
cdef public object cfg
|
||||
|
||||
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
||||
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
|
||||
|
|
|
@ -27,7 +27,7 @@ cdef class Vocab:
|
|||
C-data that is shared between `Doc` objects.
|
||||
"""
|
||||
def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None,
|
||||
strings=tuple(), **deprecated_kwargs):
|
||||
strings=tuple(), oov_prob=-20., **deprecated_kwargs):
|
||||
"""Create the vocabulary.
|
||||
|
||||
lex_attr_getters (dict): A dictionary mapping attribute IDs to
|
||||
|
@ -43,6 +43,7 @@ cdef class Vocab:
|
|||
tag_map = tag_map if tag_map is not None else {}
|
||||
if lemmatizer in (None, True, False):
|
||||
lemmatizer = Lemmatizer({}, {}, {})
|
||||
self.cfg = {'oov_prob': oov_prob}
|
||||
self.mem = Pool()
|
||||
self._by_hash = PreshMap()
|
||||
self._by_orth = PreshMap()
|
||||
|
|
Loading…
Reference in New Issue
Block a user