mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
* Switch to using sparsehash and murmurhash libraries out of pip
This commit is contained in:
parent
3f7cbb93e0
commit
b9016c4633
|
@ -1,3 +1,6 @@
|
||||||
cython
|
cython
|
||||||
|
sparsehash
|
||||||
|
murmurhash
|
||||||
fabric
|
fabric
|
||||||
pytest
|
pytest
|
||||||
|
|
||||||
|
|
41
setup.py
41
setup.py
|
@ -7,6 +7,7 @@ import sys
|
||||||
import os
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
from os import path
|
from os import path
|
||||||
|
from glob import glob
|
||||||
|
|
||||||
|
|
||||||
def clean(ext):
|
def clean(ext):
|
||||||
|
@ -34,32 +35,22 @@ libs = []
|
||||||
|
|
||||||
includes = []
|
includes = []
|
||||||
|
|
||||||
|
|
||||||
|
if 'VIRTUAL_ENV' in os.environ:
|
||||||
|
includes += glob(path.join(os.environ['VIRTUAL_ENV'], 'include', 'site', '*'))
|
||||||
|
else:
|
||||||
|
# If you're not using virtualenv, set your include dir here.
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
exts = [
|
exts = [
|
||||||
Extension("ext.sparsehash", ["ext/sparsehash.pyx"], language="c++"),
|
Extension("spacy.en", ["spacy/en.pyx"], language="c++", include_dirs=includes),
|
||||||
Extension('ext.murmurhash',
|
Extension("spacy.en_ptb", ["spacy/en_ptb.pyx"], language="c++", include_dirs=includes),
|
||||||
["ext/murmurhash.pyx", "ext/MurmurHash2.cpp",
|
|
||||||
"ext/MurmurHash3.cpp"], language="c++",
|
|
||||||
include_dirs=[path.join(HERE, 'ext')]),
|
|
||||||
|
|
||||||
Extension("spacy.en",
|
|
||||||
["spacy/en.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
|
||||||
language="c++",
|
|
||||||
include_dirs=[path.join(HERE, 'ext')]),
|
|
||||||
Extension("spacy.en_ptb",
|
|
||||||
["spacy/en_ptb.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
|
||||||
language="c++",
|
|
||||||
include_dirs=[path.join(HERE, 'ext')]),
|
|
||||||
|
|
||||||
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++", include_dirs=includes),
|
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++", include_dirs=includes),
|
||||||
Extension("spacy.spacy",
|
Extension("spacy.spacy", ["spacy/spacy.pyx"], language="c++", include_dirs=includes),
|
||||||
["spacy/spacy.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
Extension("spacy.tokens", ["spacy/tokens.pyx"], language="c++", include_dirs=includes),
|
||||||
language="c++", include_dirs=includes),
|
Extension("spacy.string_tools", ["spacy/string_tools.pyx"], language="c++",
|
||||||
Extension("spacy.tokens",
|
include_dirs=includes),
|
||||||
["spacy/tokens.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
|
||||||
language="c++", include_dirs=includes),
|
|
||||||
Extension("spacy.string_tools",
|
|
||||||
["spacy/string_tools.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
|
||||||
language="c++", include_dirs=includes),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -68,7 +59,7 @@ if sys.argv[1] == 'clean':
|
||||||
map(clean, exts)
|
map(clean, exts)
|
||||||
|
|
||||||
distutils.core.setup(
|
distutils.core.setup(
|
||||||
name='Sparse linear models with Cython',
|
name='Lightning fast, full-cream NL tokenizer',
|
||||||
packages=['thinc'],
|
packages=['thinc'],
|
||||||
author='Matthew Honnibal',
|
author='Matthew Honnibal',
|
||||||
author_email='honnibal@gmail.com',
|
author_email='honnibal@gmail.com',
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
from libc.stdint cimport uint64_t
|
from libc.stdint cimport uint64_t
|
||||||
|
|
||||||
from ext.sparsehash cimport dense_hash_map
|
from sparsehash.dense_hash_map cimport dense_hash_map
|
||||||
|
|
||||||
|
|
||||||
# Circular import problems here
|
# Circular import problems here
|
||||||
|
|
|
@ -3,9 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from libc.stdlib cimport calloc, free
|
from libc.stdlib cimport calloc, free
|
||||||
|
|
||||||
from ext.murmurhash cimport MurmurHash64A
|
from murmurhash cimport mrmr
|
||||||
from ext.murmurhash cimport MurmurHash64B
|
|
||||||
|
|
||||||
from spacy.lexeme cimport Lexeme
|
from spacy.lexeme cimport Lexeme
|
||||||
from spacy.lexeme cimport BLANK_WORD
|
from spacy.lexeme cimport BLANK_WORD
|
||||||
|
|
||||||
|
@ -100,7 +98,7 @@ cdef class Language:
|
||||||
|
|
||||||
cdef StringHash hash_string(self, Py_UNICODE* s, size_t length) except 0:
|
cdef StringHash hash_string(self, Py_UNICODE* s, size_t length) except 0:
|
||||||
'''Hash unicode with MurmurHash64A'''
|
'''Hash unicode with MurmurHash64A'''
|
||||||
return MurmurHash64A(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0)
|
return mrmr.hash64(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0)
|
||||||
|
|
||||||
cdef unicode unhash(self, StringHash hash_value):
|
cdef unicode unhash(self, StringHash hash_value):
|
||||||
'''Fetch a string from the reverse index, given its hash value.'''
|
'''Fetch a string from the reverse index, given its hash value.'''
|
||||||
|
|
Loading…
Reference in New Issue
Block a user