spaCy/setup.py

232 lines
7.4 KiB
Python
Raw Normal View History

2014-07-05 22:49:34 +04:00
#!/usr/bin/env python
2015-01-04 21:30:56 +03:00
from setuptools import setup
2015-03-09 08:46:35 +03:00
import shutil
2015-01-04 21:30:56 +03:00
2014-07-05 22:49:34 +04:00
import sys
import os
from os import path
2015-01-04 21:30:56 +03:00
from setuptools import Extension
from distutils import sysconfig
from distutils.core import setup, Extension
from distutils.command.build_ext import build_ext
import platform
2015-11-03 09:07:43 +03:00
PACKAGE_DATA = {
"spacy": ["*.pxd"],
"spacy.tokens": ["*.pxd"],
"spacy.serialize": ["*.pxd"],
"spacy.syntax": ["*.pxd"],
"spacy.en": [
"*.pxd",
"data/wordnet/*.exc",
"data/wordnet/index.*",
"data/tokenizer/*",
"data/vocab/serializer.json"
]
}
# By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
# http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
compile_options = {'msvc' : ['/Ox', '/EHsc'] ,
'other' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function'] }
link_options = {'msvc' : [] ,
'other' : [] }
class build_ext_options:
def build_options(self):
c_type = None
if self.compiler.compiler_type in compile_options:
c_type = self.compiler.compiler_type
elif 'other' in compile_options:
c_type = 'other'
if c_type is not None:
for e in self.extensions:
e.extra_compile_args = compile_options[c_type]
l_type = None
if self.compiler.compiler_type in link_options:
l_type = self.compiler.compiler_type
elif 'other' in link_options:
l_type = 'other'
if l_type is not None:
for e in self.extensions:
e.extra_link_args = link_options[l_type]
class build_ext_subclass( build_ext, build_ext_options ):
def build_extensions(self):
build_ext_options.build_options(self)
build_ext.build_extensions(self)
2015-01-17 08:19:54 +03:00
# PyPy --- NB! PyPy doesn't really work, it segfaults all over the place. But,
# this is necessary to get it compile.
# We have to resort to monkey-patching to set the compiler, because pypy broke
# all the everything.
2015-01-06 05:05:43 +03:00
pre_patch_customize_compiler = sysconfig.customize_compiler
def my_customize_compiler(compiler):
2015-01-06 05:05:43 +03:00
pre_patch_customize_compiler(compiler)
compiler.compiler_cxx = ['c++']
if platform.python_implementation() == 'PyPy':
sysconfig.customize_compiler = my_customize_compiler
2015-01-03 13:02:10 +03:00
2015-01-25 06:49:10 +03:00
#def install_headers():
# dest_dir = path.join(sys.prefix, 'include', 'murmurhash')
# if not path.exists(dest_dir):
# shutil.copytree('murmurhash/headers/murmurhash', dest_dir)
#
# dest_dir = path.join(sys.prefix, 'include', 'numpy')
2015-01-17 08:19:54 +03:00
includes = ['.', path.join(sys.prefix, 'include')]
2015-01-25 06:49:10 +03:00
try:
import numpy
numpy_headers = path.join(numpy.get_include(), 'numpy')
shutil.copytree(numpy_headers, path.join(sys.prefix, 'include', 'numpy'))
2015-01-25 06:49:10 +03:00
except ImportError:
pass
except OSError:
pass
2015-01-25 06:49:10 +03:00
2015-11-03 09:07:43 +03:00
2015-03-09 08:46:35 +03:00
def clean(mod_names):
for name in mod_names:
name = name.replace('.', '/')
so = name + '.so'
html = name + '.html'
cpp = name + '.cpp'
c = name + '.c'
for file_path in [so, html, cpp, c]:
if os.path.exists(file_path):
2015-03-09 14:06:33 +03:00
os.unlink(file_path)
2014-07-05 22:49:34 +04:00
2015-01-04 21:30:56 +03:00
def name_to_path(mod_name, ext):
return '%s.%s' % (mod_name.replace('.', '/'), ext)
def c_ext(mod_name, language, includes):
2015-01-04 21:30:56 +03:00
mod_path = name_to_path(mod_name, language)
return Extension(mod_name, [mod_path], include_dirs=includes)
def cython_setup(mod_names, language, includes):
2015-01-04 21:30:56 +03:00
import Cython.Distutils
import Cython.Build
import distutils.core
class build_ext_cython_subclass( Cython.Distutils.build_ext, build_ext_options ):
def build_extensions(self):
build_ext_options.build_options(self)
Cython.Distutils.build_ext.build_extensions(self)
2015-01-04 21:30:56 +03:00
if language == 'cpp':
language = 'c++'
2015-01-28 06:00:20 +03:00
exts = []
for mod_name in mod_names:
mod_path = mod_name.replace('.', '/') + '.pyx'
e = Extension(mod_name, [mod_path], language=language, include_dirs=includes)
2015-01-28 06:00:20 +03:00
exts.append(e)
distutils.core.setup(
name='spacy',
2015-07-27 02:51:37 +03:00
packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
'spacy.syntax', 'spacy.munge'],
description="Industrial-strength NLP",
author='Matthew Honnibal',
author_email='honnibal@gmail.com',
2015-06-07 20:05:28 +03:00
version=VERSION,
2015-11-24 22:01:43 +03:00
url="http://spacy.io",
2015-11-03 09:07:43 +03:00
package_data=PACKAGE_DATA,
ext_modules=exts,
cmdclass={'build_ext': build_ext_cython_subclass},
2015-10-25 16:16:37 +03:00
license="MIT",
)
2015-01-04 21:30:56 +03:00
def run_setup(exts):
setup(
name='spacy',
2015-07-27 02:51:37 +03:00
packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
'spacy.syntax', 'spacy.munge',
'spacy.tests',
'spacy.tests.matcher',
'spacy.tests.morphology',
'spacy.tests.munge',
'spacy.tests.parser',
'spacy.tests.serialize',
2015-11-06 19:23:14 +03:00
'spacy.tests.spans',
'spacy.tests.tagger',
'spacy.tests.tokenizer',
'spacy.tests.tokens',
'spacy.tests.vectors',
'spacy.tests.vocab'],
2015-01-04 21:30:56 +03:00
description="Industrial-strength NLP",
author='Matthew Honnibal',
author_email='honnibal@gmail.com',
2015-06-07 20:05:28 +03:00
version=VERSION,
2015-12-03 22:17:13 +03:00
url="https://spacy.io",
2015-11-03 09:07:43 +03:00
package_data=PACKAGE_DATA,
2015-01-04 21:30:56 +03:00
ext_modules=exts,
2015-09-29 16:02:37 +03:00
license="MIT",
2015-11-19 12:59:51 +03:00
install_requires=['numpy', 'murmurhash == 0.24', 'cymem == 1.30', 'preshed == 0.44',
2015-11-08 13:32:21 +03:00
'thinc == 4.0.0', "text_unidecode", 'plac', 'six',
2015-11-21 21:04:57 +03:00
'ujson', 'cloudpickle', 'sputnik == 0.5.2'],
2015-01-04 21:30:56 +03:00
setup_requires=["headers_workaround"],
cmdclass = {'build_ext': build_ext_subclass },
2015-01-04 21:30:56 +03:00
)
import headers_workaround
headers_workaround.fix_venv_pypy_include()
headers_workaround.install_headers('murmurhash')
2015-01-17 08:19:54 +03:00
headers_workaround.install_headers('numpy')
2015-01-04 21:30:56 +03:00
2015-11-18 19:42:56 +03:00
VERSION = '0.100'
2015-01-04 21:30:56 +03:00
def main(modules, is_pypy):
language = "cpp"
includes = ['.', path.join(sys.prefix, 'include')]
2015-10-15 12:46:08 +03:00
if sys.platform.startswith('darwin'):
2015-10-18 09:19:07 +03:00
compile_options['other'].append('-mmacosx-version-min=10.8')
compile_options['other'].append('-stdlib=libc++')
link_options['other'].append('-lc++')
2015-01-28 06:00:20 +03:00
if use_cython:
cython_setup(modules, language, includes)
2015-01-28 06:00:20 +03:00
else:
exts = [c_ext(mn, language, includes)
for mn in modules]
run_setup(exts)
2015-01-04 21:30:56 +03:00
2015-01-25 08:32:48 +03:00
MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
2015-07-17 17:39:25 +03:00
'spacy.lexeme', 'spacy.vocab', 'spacy.attrs',
'spacy.morphology', 'spacy.tagger',
2015-06-09 22:20:33 +03:00
'spacy.syntax.stateclass',
2015-10-08 06:00:34 +03:00
'spacy.tokenizer',
2015-10-10 17:32:44 +03:00
'spacy.syntax.parser',
2015-04-19 11:31:31 +03:00
'spacy.syntax.transition_system',
2015-06-14 21:28:14 +03:00
'spacy.syntax.arc_eager',
'spacy.syntax._parse_features',
'spacy.gold', 'spacy.orth',
2015-11-03 16:14:40 +03:00
'spacy.tokens.doc', 'spacy.tokens.span', 'spacy.tokens.token',
'spacy.serialize.packer', 'spacy.serialize.huffman', 'spacy.serialize.bits',
2015-08-06 00:48:11 +03:00
'spacy.cfile', 'spacy.matcher',
'spacy.syntax.ner',
'spacy.symbols']
2015-01-04 21:30:56 +03:00
if __name__ == '__main__':
2015-03-09 08:46:35 +03:00
if sys.argv[1] == 'clean':
clean(MOD_NAMES)
else:
use_cython = sys.argv[1] == 'build_ext'
main(MOD_NAMES, use_cython)