* Add vocab.pyx to setup, and ensure we can import spacy.en.lang

This commit is contained in:
Matthew Honnibal 2014-12-21 06:03:53 +11:00
parent b34a1325d3
commit 2a89d70429
3 changed files with 24 additions and 22 deletions

View File

@ -46,26 +46,29 @@ else:
# If you're not using virtualenv, set your include dir here.
pass
ext_args = {'language': "c++", "include_dirs": includes}
exts = [
Extension("spacy.lang", ["spacy/lang.pyx"], language="c++", include_dirs=includes),
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++", include_dirs=includes),
Extension("spacy.en", ["spacy/en.pyx"], language="c++", include_dirs=includes),
Extension("spacy.tokens", ["spacy/tokens.pyx"], language="c++", include_dirs=includes),
Extension("spacy.utf8string", ["spacy/utf8string.pyx"], language="c++", include_dirs=includes),
Extension("spacy.index", ["spacy/index.pyx"], language="c++", include_dirs=includes),
Extension("spacy.tagger", ["spacy/tagger.pyx"], language="c++", include_dirs=includes),
Extension("spacy.morphology", ["spacy/morphology.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.strings", ["spacy/strings.pyx"], **ext_args),
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], **ext_args),
Extension("spacy.vocab", ["spacy/vocab.pyx"], **ext_args),
Extension("spacy.tokens", ["spacy/tokens.pyx"], **ext_args),
Extension("spacy.morphology", ["spacy/morphology.pyx"], **ext_args),
Extension("spacy.tagger", ["spacy/tagger.pyx"], **ext_args),
Extension("spacy.tokenizer", ["spacy/tokenizer.pyx"], **ext_args),
Extension("spacy.en.lang", ["spacy/en/lang.pyx"], **ext_args),
Extension("spacy.en.pos", ["spacy/en/pos.pyx"], **ext_args),
Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], **ext_args),
Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], **ext_args),
#Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], **ext_args),
#Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++",
# include_dirs=includes),
#Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++",
# include_dirs=includes),
#Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++",
# include_dirs=includes),
# Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++",
# include_dirs=includes),
#Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes),
#Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes),

View File

@ -3,7 +3,6 @@
from os import path
import json
from .lemmatizer import Lemmatizer
from .typedefs cimport id_t, univ_tag_t
from .typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT
from .typedefs cimport VERB, X, PUNCT, EOL

View File

@ -12,12 +12,12 @@ cdef class Vocab:
Also interns UTF-8 strings, and maps them to consecutive integer IDs.
'''
def __init__(self, object get_props):
def __init__(self, object get_lex_props):
self.mem = Pool()
self._map = PreshMap(2 ** 20)
self.strings = StringStore()
self.lexemes.push_back(&EMPTY_LEXEME)
self.get_lex_props = get_props
self.get_lex_props = get_lex_props
@classmethod
def from_dir(cls, object data_dir, object get_lex_props=None):
@ -25,7 +25,7 @@ cdef class Vocab:
raise IOError("Directory %s not found -- cannot load Vocab." % data_dir)
if not path.isdir(data_dir):
raise IOError("Path %s is a file, not a dir -- cannot load Vocab." % data_dir)
cdef Vocab self = cls(get_props)
cdef Vocab self = cls(get_lex_props)
self.strings.load(path.join(data_dir, 'strings'))
self.load(path.join(data_dir, 'lexemes'))
return self