diff --git a/spacy/en/pos.pyx b/spacy/en/pos.pyx index 3dab084a8..569b209fc 100644 --- a/spacy/en/pos.pyx +++ b/spacy/en/pos.pyx @@ -262,6 +262,9 @@ cdef class EnPosTagger: 'morphs.json')))) self.lemmatizer = Lemmatizer(path.join(data_dir, 'wordnet'), NOUN, VERB, ADJ) self.freqs = {TAG: defaultdict(int)} + for tag in self.tag_names: + self.freqs[TAG][self.strings[tag]] = 1 + self.freqs[TAG][0] = 1 def __call__(self, Doc tokens): """Apply the tagger, setting the POS tags onto the Doc object. diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx index 4d32a4e54..67cf0ca06 100644 --- a/spacy/syntax/transition_system.pyx +++ b/spacy/syntax/transition_system.pyx @@ -33,6 +33,11 @@ cdef class TransitionSystem: self.freqs = {} for attr in (TAG, HEAD, DEP, ENT_TYPE, ENT_IOB): self.freqs[attr] = defaultdict(int) + self.freqs[attr][0] = 1 + # Ensure we've seen heads. Need an official dependency length limit... + for i in range(512): + self.freqs[HEAD][i] = 1 + self.freqs[HEAD][-i] = 1 cdef int initialize_state(self, StateClass state) except -1: pass