mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-01 03:33:12 +03:00
* Set initial freqs, to avoid missing values in serializer
This commit is contained in:
parent
680bb47b55
commit
12699a1152
|
@ -262,6 +262,9 @@ cdef class EnPosTagger:
|
||||||
'morphs.json'))))
|
'morphs.json'))))
|
||||||
self.lemmatizer = Lemmatizer(path.join(data_dir, 'wordnet'), NOUN, VERB, ADJ)
|
self.lemmatizer = Lemmatizer(path.join(data_dir, 'wordnet'), NOUN, VERB, ADJ)
|
||||||
self.freqs = {TAG: defaultdict(int)}
|
self.freqs = {TAG: defaultdict(int)}
|
||||||
|
for tag in self.tag_names:
|
||||||
|
self.freqs[TAG][self.strings[tag]] = 1
|
||||||
|
self.freqs[TAG][0] = 1
|
||||||
|
|
||||||
def __call__(self, Doc tokens):
|
def __call__(self, Doc tokens):
|
||||||
"""Apply the tagger, setting the POS tags onto the Doc object.
|
"""Apply the tagger, setting the POS tags onto the Doc object.
|
||||||
|
|
|
@ -33,6 +33,11 @@ cdef class TransitionSystem:
|
||||||
self.freqs = {}
|
self.freqs = {}
|
||||||
for attr in (TAG, HEAD, DEP, ENT_TYPE, ENT_IOB):
|
for attr in (TAG, HEAD, DEP, ENT_TYPE, ENT_IOB):
|
||||||
self.freqs[attr] = defaultdict(int)
|
self.freqs[attr] = defaultdict(int)
|
||||||
|
self.freqs[attr][0] = 1
|
||||||
|
# Ensure we've seen heads. Need an official dependency length limit...
|
||||||
|
for i in range(512):
|
||||||
|
self.freqs[HEAD][i] = 1
|
||||||
|
self.freqs[HEAD][-i] = 1
|
||||||
|
|
||||||
cdef int initialize_state(self, StateClass state) except -1:
|
cdef int initialize_state(self, StateClass state) except -1:
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in New Issue
Block a user