* Switch to using sparsehash and murmurhash libraries out of pip

This commit is contained in:
Matthew Honnibal 2014-07-25 15:47:27 +01:00
parent 3f7cbb93e0
commit b9016c4633
4 changed files with 22 additions and 30 deletions

View File

@ -1,3 +1,6 @@
cython
sparsehash
murmurhash
fabric
pytest

View File

@ -7,6 +7,7 @@ import sys
import os
import os.path
from os import path
from glob import glob
def clean(ext):
@ -34,32 +35,22 @@ libs = []
includes = []
exts = [
Extension("ext.sparsehash", ["ext/sparsehash.pyx"], language="c++"),
Extension('ext.murmurhash',
["ext/murmurhash.pyx", "ext/MurmurHash2.cpp",
"ext/MurmurHash3.cpp"], language="c++",
include_dirs=[path.join(HERE, 'ext')]),
Extension("spacy.en",
["spacy/en.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
language="c++",
include_dirs=[path.join(HERE, 'ext')]),
Extension("spacy.en_ptb",
["spacy/en_ptb.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
language="c++",
include_dirs=[path.join(HERE, 'ext')]),
if 'VIRTUAL_ENV' in os.environ:
includes += glob(path.join(os.environ['VIRTUAL_ENV'], 'include', 'site', '*'))
else:
# If you're not using virtualenv, set your include dir here.
pass
exts = [
Extension("spacy.en", ["spacy/en.pyx"], language="c++", include_dirs=includes),
Extension("spacy.en_ptb", ["spacy/en_ptb.pyx"], language="c++", include_dirs=includes),
Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++", include_dirs=includes),
Extension("spacy.spacy",
["spacy/spacy.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
language="c++", include_dirs=includes),
Extension("spacy.tokens",
["spacy/tokens.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
language="c++", include_dirs=includes),
Extension("spacy.string_tools",
["spacy/string_tools.pyx", "ext/MurmurHash3.cpp", "ext/MurmurHash2.cpp"],
language="c++", include_dirs=includes),
Extension("spacy.spacy", ["spacy/spacy.pyx"], language="c++", include_dirs=includes),
Extension("spacy.tokens", ["spacy/tokens.pyx"], language="c++", include_dirs=includes),
Extension("spacy.string_tools", ["spacy/string_tools.pyx"], language="c++",
include_dirs=includes),
]
@ -68,7 +59,7 @@ if sys.argv[1] == 'clean':
map(clean, exts)
distutils.core.setup(
name='Sparse linear models with Cython',
name='Lightning fast, full-cream NL tokenizer',
packages=['thinc'],
author='Matthew Honnibal',
author_email='honnibal@gmail.com',

View File

@ -1,7 +1,7 @@
from libcpp.vector cimport vector
from libc.stdint cimport uint64_t
from ext.sparsehash cimport dense_hash_map
from sparsehash.dense_hash_map cimport dense_hash_map
# Circular import problems here

View File

@ -3,9 +3,7 @@ from __future__ import unicode_literals
from libc.stdlib cimport calloc, free
from ext.murmurhash cimport MurmurHash64A
from ext.murmurhash cimport MurmurHash64B
from murmurhash cimport mrmr
from spacy.lexeme cimport Lexeme
from spacy.lexeme cimport BLANK_WORD
@ -100,7 +98,7 @@ cdef class Language:
cdef StringHash hash_string(self, Py_UNICODE* s, size_t length) except 0:
'''Hash unicode with MurmurHash64A'''
return MurmurHash64A(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0)
return mrmr.hash64(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0)
cdef unicode unhash(self, StringHash hash_value):
'''Fetch a string from the reverse index, given its hash value.'''