* Use the symbols file in vocab instead of the symbols subfiles like attrs.pxd

This commit is contained in:
Matthew Honnibal 2015-10-10 22:12:06 +11:00
parent ce65ec698c
commit 37b909b6b6

View File

@ -20,7 +20,7 @@ from .cfile cimport CFile
from .lemmatizer import Lemmatizer
from . import attrs
from . import parts_of_speech
from . import symbols
from cymem.cymem cimport Address
from . import util
@ -75,18 +75,9 @@ cdef class Vocab:
# is the frequency rank of the word, plus a certain offset. The structural
# strings are loaded first, because the vocab is open-class, and these
# symbols are closed class.
for name in attrs.NAMES:
for name in symbols.NAMES + list(sorted(tag_map.keys())):
if name:
_ = self.strings[name]
for name in parts_of_speech.NAMES:
if name:
_ = self.strings[name]
#for morph_name in UNIV_MORPH_NAMES:
# _ = self.strings[morph_name]
#for entity_type_name in entity_types.NAMES:
# _ = self.strings[entity_type_name]
#for tag_name in sorted(tag_map.keys()):
# _ = self.strings[tag_name]
self.get_lex_attr = get_lex_attr
self.morphology = Morphology(self.strings, tag_map, lemmatizer)
self.serializer_freqs = serializer_freqs