mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
* Use the symbols file in vocab instead of the symbols subfiles like attrs.pxd
This commit is contained in:
parent
6b30d1cf7b
commit
8f0f47b9a6
|
@ -20,7 +20,7 @@ from .cfile cimport CFile
|
||||||
from .lemmatizer import Lemmatizer
|
from .lemmatizer import Lemmatizer
|
||||||
|
|
||||||
from . import attrs
|
from . import attrs
|
||||||
from . import parts_of_speech
|
from . import symbols
|
||||||
|
|
||||||
from cymem.cymem cimport Address
|
from cymem.cymem cimport Address
|
||||||
from . import util
|
from . import util
|
||||||
|
@ -75,18 +75,9 @@ cdef class Vocab:
|
||||||
# is the frequency rank of the word, plus a certain offset. The structural
|
# is the frequency rank of the word, plus a certain offset. The structural
|
||||||
# strings are loaded first, because the vocab is open-class, and these
|
# strings are loaded first, because the vocab is open-class, and these
|
||||||
# symbols are closed class.
|
# symbols are closed class.
|
||||||
for name in attrs.NAMES:
|
for name in symbols.NAMES + list(sorted(tag_map.keys())):
|
||||||
if name:
|
if name:
|
||||||
_ = self.strings[name]
|
_ = self.strings[name]
|
||||||
for name in parts_of_speech.NAMES:
|
|
||||||
if name:
|
|
||||||
_ = self.strings[name]
|
|
||||||
#for morph_name in UNIV_MORPH_NAMES:
|
|
||||||
# _ = self.strings[morph_name]
|
|
||||||
#for entity_type_name in entity_types.NAMES:
|
|
||||||
# _ = self.strings[entity_type_name]
|
|
||||||
#for tag_name in sorted(tag_map.keys()):
|
|
||||||
# _ = self.strings[tag_name]
|
|
||||||
self.get_lex_attr = get_lex_attr
|
self.get_lex_attr = get_lex_attr
|
||||||
self.morphology = Morphology(self.strings, tag_map, lemmatizer)
|
self.morphology = Morphology(self.strings, tag_map, lemmatizer)
|
||||||
self.serializer_freqs = serializer_freqs
|
self.serializer_freqs = serializer_freqs
|
||||||
|
|
Loading…
Reference in New Issue
Block a user