mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
* Switch to using sparsehash and murmurhash libraries out of pip
This commit is contained in:
parent
3f7cbb93e0
commit
b9016c4633
|
@ -1,3 +1,6 @@
|
|||
cython
|
||||
sparsehash
|
||||
murmurhash
|
||||
fabric
|
||||
pytest
|
||||
|
||||
|
|
41
setup.py
41
setup.py
|
@ -7,6 +7,7 @@ import sys
|
|||
import os
|
||||
import os.path
|
||||
from os import path
|
||||
from glob import glob
|
||||
|
||||
|
||||
def clean(ext):
|
||||
|
@ -34,32 +35,22 @@ libs = []
|
|||
|
||||
includes = []
|
||||
|
||||
exts = [
|
||||
Extension("ext.sparsehash", ["ext/sparsehash.pyx"], language="c++"),
|
||||
Extension('ext.murmurhash',
|
||||
["ext/murmurhash.pyx", "ext/MurmurHash2.cpp",
|
||||
"ext/MurmurHash3.cpp"], language="c++",
|
||||
include_dirs=[path.join(HERE, 'ext')]),
|
||||
|
||||
Extension("spacy.en",
|
||||
["spacy/en.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
||||
language="c++",
|
||||
include_dirs=[path.join(HERE, 'ext')]),
|
||||
Extension("spacy.en_ptb",
|
||||
["spacy/en_ptb.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
||||
language="c++",
|
||||
include_dirs=[path.join(HERE, 'ext')]),
|
||||
|
||||
if 'VIRTUAL_ENV' in os.environ:
|
||||
includes += glob(path.join(os.environ['VIRTUAL_ENV'], 'include', 'site', '*'))
|
||||
else:
|
||||
# If you're not using virtualenv, set your include dir here.
|
||||
pass
|
||||
|
||||
|
||||
exts = [
|
||||
Extension("spacy.en", ["spacy/en.pyx"], language="c++", include_dirs=includes),
|
||||
Extension("spacy.en_ptb", ["spacy/en_ptb.pyx"], language="c++", include_dirs=includes),
|
||||
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++", include_dirs=includes),
|
||||
Extension("spacy.spacy",
|
||||
["spacy/spacy.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
||||
language="c++", include_dirs=includes),
|
||||
Extension("spacy.tokens",
|
||||
["spacy/tokens.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
||||
language="c++", include_dirs=includes),
|
||||
Extension("spacy.string_tools",
|
||||
["spacy/string_tools.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
|
||||
language="c++", include_dirs=includes),
|
||||
Extension("spacy.spacy", ["spacy/spacy.pyx"], language="c++", include_dirs=includes),
|
||||
Extension("spacy.tokens", ["spacy/tokens.pyx"], language="c++", include_dirs=includes),
|
||||
Extension("spacy.string_tools", ["spacy/string_tools.pyx"], language="c++",
|
||||
include_dirs=includes),
|
||||
]
|
||||
|
||||
|
||||
|
@ -68,7 +59,7 @@ if sys.argv[1] == 'clean':
|
|||
map(clean, exts)
|
||||
|
||||
distutils.core.setup(
|
||||
name='Sparse linear models with Cython',
|
||||
name='Lightning fast, full-cream NL tokenizer',
|
||||
packages=['thinc'],
|
||||
author='Matthew Honnibal',
|
||||
author_email='honnibal@gmail.com',
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from libcpp.vector cimport vector
|
||||
from libc.stdint cimport uint64_t
|
||||
|
||||
from ext.sparsehash cimport dense_hash_map
|
||||
from sparsehash.dense_hash_map cimport dense_hash_map
|
||||
|
||||
|
||||
# Circular import problems here
|
||||
|
|
|
@ -3,9 +3,7 @@ from __future__ import unicode_literals
|
|||
|
||||
from libc.stdlib cimport calloc, free
|
||||
|
||||
from ext.murmurhash cimport MurmurHash64A
|
||||
from ext.murmurhash cimport MurmurHash64B
|
||||
|
||||
from murmurhash cimport mrmr
|
||||
from spacy.lexeme cimport Lexeme
|
||||
from spacy.lexeme cimport BLANK_WORD
|
||||
|
||||
|
@ -100,7 +98,7 @@ cdef class Language:
|
|||
|
||||
cdef StringHash hash_string(self, Py_UNICODE* s, size_t length) except 0:
|
||||
'''Hash unicode with MurmurHash64A'''
|
||||
return MurmurHash64A(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0)
|
||||
return mrmr.hash64(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0)
|
||||
|
||||
cdef unicode unhash(self, StringHash hash_value):
|
||||
'''Fetch a string from the reverse index, given its hash value.'''
|
||||
|
|
Loading…
Reference in New Issue
Block a user