mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-27 12:23:42 +03:00
* Add vocab.pyx to setup, and ensure we can import spacy.en.lang
This commit is contained in:
parent
b34a1325d3
commit
2a89d70429
39
setup.py
39
setup.py
|
@ -46,26 +46,29 @@ else:
|
||||||
# If you're not using virtualenv, set your include dir here.
|
# If you're not using virtualenv, set your include dir here.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
ext_args = {'language': "c++", "include_dirs": includes}
|
||||||
|
|
||||||
exts = [
|
exts = [
|
||||||
Extension("spacy.lang", ["spacy/lang.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.strings", ["spacy/strings.pyx"], **ext_args),
|
||||||
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], **ext_args),
|
||||||
Extension("spacy.en", ["spacy/en.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.vocab", ["spacy/vocab.pyx"], **ext_args),
|
||||||
Extension("spacy.tokens", ["spacy/tokens.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.tokens", ["spacy/tokens.pyx"], **ext_args),
|
||||||
Extension("spacy.utf8string", ["spacy/utf8string.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.morphology", ["spacy/morphology.pyx"], **ext_args),
|
||||||
Extension("spacy.index", ["spacy/index.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.tagger", ["spacy/tagger.pyx"], **ext_args),
|
||||||
Extension("spacy.tagger", ["spacy/tagger.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.tokenizer", ["spacy/tokenizer.pyx"], **ext_args),
|
||||||
Extension("spacy.morphology", ["spacy/morphology.pyx"], language="c++",
|
Extension("spacy.en.lang", ["spacy/en/lang.pyx"], **ext_args),
|
||||||
include_dirs=includes),
|
Extension("spacy.en.pos", ["spacy/en/pos.pyx"], **ext_args),
|
||||||
|
Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], **ext_args),
|
||||||
Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++",
|
Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], **ext_args),
|
||||||
include_dirs=includes),
|
#Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], **ext_args),
|
||||||
Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++",
|
#Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++",
|
||||||
include_dirs=includes),
|
# include_dirs=includes),
|
||||||
Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++",
|
#Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++",
|
||||||
include_dirs=includes),
|
# include_dirs=includes),
|
||||||
Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++",
|
#Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++",
|
||||||
include_dirs=includes),
|
# include_dirs=includes),
|
||||||
|
# Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++",
|
||||||
|
# include_dirs=includes),
|
||||||
|
|
||||||
#Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes),
|
#Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes),
|
||||||
#Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes),
|
#Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes),
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
from os import path
|
from os import path
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .lemmatizer import Lemmatizer
|
|
||||||
from .typedefs cimport id_t, univ_tag_t
|
from .typedefs cimport id_t, univ_tag_t
|
||||||
from .typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT
|
from .typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT
|
||||||
from .typedefs cimport VERB, X, PUNCT, EOL
|
from .typedefs cimport VERB, X, PUNCT, EOL
|
||||||
|
|
|
@ -12,12 +12,12 @@ cdef class Vocab:
|
||||||
|
|
||||||
Also interns UTF-8 strings, and maps them to consecutive integer IDs.
|
Also interns UTF-8 strings, and maps them to consecutive integer IDs.
|
||||||
'''
|
'''
|
||||||
def __init__(self, object get_props):
|
def __init__(self, object get_lex_props):
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._map = PreshMap(2 ** 20)
|
self._map = PreshMap(2 ** 20)
|
||||||
self.strings = StringStore()
|
self.strings = StringStore()
|
||||||
self.lexemes.push_back(&EMPTY_LEXEME)
|
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||||
self.get_lex_props = get_props
|
self.get_lex_props = get_lex_props
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dir(cls, object data_dir, object get_lex_props=None):
|
def from_dir(cls, object data_dir, object get_lex_props=None):
|
||||||
|
@ -25,7 +25,7 @@ cdef class Vocab:
|
||||||
raise IOError("Directory %s not found -- cannot load Vocab." % data_dir)
|
raise IOError("Directory %s not found -- cannot load Vocab." % data_dir)
|
||||||
if not path.isdir(data_dir):
|
if not path.isdir(data_dir):
|
||||||
raise IOError("Path %s is a file, not a dir -- cannot load Vocab." % data_dir)
|
raise IOError("Path %s is a file, not a dir -- cannot load Vocab." % data_dir)
|
||||||
cdef Vocab self = cls(get_props)
|
cdef Vocab self = cls(get_lex_props)
|
||||||
self.strings.load(path.join(data_dir, 'strings'))
|
self.strings.load(path.join(data_dir, 'strings'))
|
||||||
self.load(path.join(data_dir, 'lexemes'))
|
self.load(path.join(data_dir, 'lexemes'))
|
||||||
return self
|
return self
|
||||||
|
|
Loading…
Reference in New Issue
Block a user