spaCy/setup.py

277 lines
9.0 KiB
Python
Raw Normal View History

2014-07-05 22:49:34 +04:00
#!/usr/bin/env python
2016-01-15 20:57:01 +03:00
from __future__ import print_function
2016-10-19 01:27:57 +03:00
import io
2015-12-13 13:49:17 +03:00
import os
import subprocess
2014-07-05 22:49:34 +04:00
import sys
2015-12-14 01:32:23 +03:00
import contextlib
from distutils.command.build_ext import build_ext
2015-12-13 13:49:17 +03:00
from distutils.sysconfig import get_python_inc
2018-12-04 02:06:42 +03:00
import distutils.util
2016-04-28 23:10:43 +03:00
from distutils import ccompiler, msvccompiler
2017-05-03 21:10:59 +03:00
from setuptools import Extension, setup, find_packages
2015-12-13 13:49:17 +03:00
2018-12-04 02:06:42 +03:00
def is_new_osx():
"""Check whether we're on OSX >= 10.10"""
2018-12-04 02:06:42 +03:00
name = distutils.util.get_platform()
if sys.platform != "darwin":
2018-12-04 02:06:42 +03:00
return False
elif name.startswith("macosx-10"):
minor_version = int(name.split("-")[1].split(".")[1])
2018-12-04 02:06:42 +03:00
if minor_version >= 7:
return True
else:
return False
else:
return False
2018-12-19 15:54:02 +03:00
PACKAGE_DATA = {"": ["*.pyx", "*.pxd", "*.txt", "*.tokens", "*.json"]}
2017-05-03 21:10:59 +03:00
PACKAGES = find_packages()
2015-12-13 13:49:17 +03:00
MOD_NAMES = [
"spacy._align",
"spacy.parts_of_speech",
"spacy.strings",
"spacy.lexeme",
"spacy.vocab",
"spacy.attrs",
2019-03-18 19:27:51 +03:00
"spacy.kb",
"spacy.morphology",
"spacy.pipeline.pipes",
"spacy.syntax.stateclass",
"spacy.syntax._state",
"spacy.tokenizer",
"spacy.syntax.nn_parser",
"spacy.syntax._parser_model",
"spacy.syntax._beam_utils",
"spacy.syntax.nonproj",
"spacy.syntax.transition_system",
"spacy.syntax.arc_eager",
"spacy.gold",
"spacy.tokens.doc",
"spacy.tokens.span",
"spacy.tokens.token",
"spacy.tokens._retokenize",
"spacy.matcher.matcher",
"spacy.matcher.phrasematcher",
"spacy.matcher.dependencymatcher",
"spacy.syntax.ner",
"spacy.symbols",
"spacy.vectors",
2017-06-05 13:45:29 +03:00
]
2015-12-13 13:49:17 +03:00
COMPILE_OPTIONS = {
"msvc": ["/Ox", "/EHsc"],
"mingw32": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
"other": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
2016-02-05 16:43:52 +03:00
}
2016-02-05 16:43:52 +03:00
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
2016-02-05 16:43:52 +03:00
2016-12-20 13:05:06 +03:00
2018-12-04 02:06:42 +03:00
if is_new_osx():
# On Mac, use libc++ because Apple deprecated use of
# libstdc
COMPILE_OPTIONS["other"].append("-stdlib=libc++")
LINK_OPTIONS["other"].append("-lc++")
# g++ (used by unix compiler on mac) links to libstdc++ as a default lib.
# See: https://stackoverflow.com/questions/1653047/avoid-linking-to-libstdc
LINK_OPTIONS["other"].append("-nodefaultlibs")
USE_OPENMP_DEFAULT = "0" if sys.platform != "darwin" else None
if os.environ.get("USE_OPENMP", USE_OPENMP_DEFAULT) == "1":
if sys.platform == "darwin":
COMPILE_OPTIONS["other"].append("-fopenmp")
LINK_OPTIONS["other"].append("-fopenmp")
PACKAGE_DATA["spacy.platform.darwin.lib"] = ["*.dylib"]
PACKAGES.append("spacy.platform.darwin.lib")
elif sys.platform == "win32":
COMPILE_OPTIONS["msvc"].append("/openmp")
else:
COMPILE_OPTIONS["other"].append("-fopenmp")
LINK_OPTIONS["other"].append("-fopenmp")
2017-05-07 19:36:35 +03:00
2016-04-19 20:50:42 +03:00
# By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
# http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
2015-12-13 13:49:17 +03:00
class build_ext_options:
def build_options(self):
for e in self.extensions:
e.extra_compile_args += COMPILE_OPTIONS.get(
self.compiler.compiler_type, COMPILE_OPTIONS["other"]
)
2015-12-13 13:49:17 +03:00
for e in self.extensions:
e.extra_link_args += LINK_OPTIONS.get(
self.compiler.compiler_type, LINK_OPTIONS["other"]
)
2015-01-17 08:19:54 +03:00
2015-12-13 13:49:17 +03:00
class build_ext_subclass(build_ext, build_ext_options):
def build_extensions(self):
build_ext_options.build_options(self)
build_ext.build_extensions(self)
2015-01-17 08:19:54 +03:00
2015-12-14 01:32:23 +03:00
def generate_cython(root, source):
print("Cythonizing sources")
p = subprocess.call(
[sys.executable, os.path.join(root, "bin", "cythonize.py"), source],
env=os.environ,
)
2015-12-13 13:49:17 +03:00
if p != 0:
raise RuntimeError("Running cythonize failed")
2015-12-13 13:49:17 +03:00
2015-12-14 01:32:23 +03:00
def is_source_release(path):
return os.path.exists(os.path.join(path, "PKG-INFO"))
2015-12-14 01:32:23 +03:00
def clean(path):
2015-12-13 13:49:17 +03:00
for name in MOD_NAMES:
name = name.replace(".", "/")
for ext in [".so", ".html", ".cpp", ".c"]:
2015-12-14 01:32:23 +03:00
file_path = os.path.join(path, name + ext)
if os.path.exists(file_path):
os.unlink(file_path)
2015-01-25 06:49:10 +03:00
2015-12-14 01:32:23 +03:00
@contextlib.contextmanager
def chdir(new_dir):
old_dir = os.getcwd()
2015-12-13 13:49:17 +03:00
try:
2015-12-14 01:32:23 +03:00
os.chdir(new_dir)
sys.path.insert(0, new_dir)
yield
2015-12-13 13:49:17 +03:00
finally:
del sys.path[0]
2015-12-14 01:32:23 +03:00
os.chdir(old_dir)
def setup_package():
root = os.path.abspath(os.path.dirname(__file__))
if len(sys.argv) > 1 and sys.argv[1] == "clean":
2015-12-14 01:32:23 +03:00
return clean(root)
with chdir(root):
with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f:
2016-03-13 20:12:32 +03:00
about = {}
2016-01-15 20:57:01 +03:00
exec(f.read(), about)
2015-12-14 01:32:23 +03:00
with io.open(os.path.join(root, "README.md"), encoding="utf8") as f:
2016-03-13 20:12:32 +03:00
readme = f.read()
2015-12-14 01:32:23 +03:00
include_dirs = [
get_python_inc(plat_specific=True),
os.path.join(root, "include"),
]
2015-12-14 01:32:23 +03:00
if (
ccompiler.new_compiler().compiler_type == "msvc"
and msvccompiler.get_build_version() == 9
):
include_dirs.append(os.path.join(root, "include", "msvc9"))
2016-04-28 23:10:43 +03:00
2015-12-14 01:32:23 +03:00
ext_modules = []
for mod_name in MOD_NAMES:
mod_path = mod_name.replace(".", "/") + ".cpp"
extra_link_args = []
2018-12-01 04:36:56 +03:00
extra_compile_args = []
# ???
# Imported from patch from @mikepb
# See Issue #267. Running blind here...
if sys.platform == "darwin":
dylib_path = [".." for _ in range(mod_name.count("."))]
dylib_path = "/".join(dylib_path)
dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path
extra_link_args.append("-Wl,-rpath,%s" % dylib_path)
2015-12-14 01:32:23 +03:00
ext_modules.append(
Extension(
mod_name,
[mod_path],
language="c++",
include_dirs=include_dirs,
extra_link_args=extra_link_args,
)
)
2015-12-14 01:32:23 +03:00
if not is_source_release(root):
generate_cython(root, "spacy")
2015-12-14 01:32:23 +03:00
setup(
name="spacy",
zip_safe=False,
2015-12-14 01:32:23 +03:00
packages=PACKAGES,
package_data=PACKAGE_DATA,
description=about["__summary__"],
2016-03-13 20:12:32 +03:00
long_description=readme,
long_description_content_type="text/markdown",
author=about["__author__"],
author_email=about["__email__"],
version=about["__version__"],
url=about["__uri__"],
license=about["__license__"],
2015-12-14 01:32:23 +03:00
ext_modules=ext_modules,
scripts=["bin/spacy"],
2016-03-13 20:12:32 +03:00
install_requires=[
"numpy>=1.15.0",
💫 Use Blis for matrix multiplications (#2966) Our epic matrix multiplication odyssey is drawing to a close... I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython. The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced. With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with. * Use blis * Use -2 arg to Cython * Update dependencies * Fix requirements * Update setup dependencies * Fix requirement typo * Fix msgpack errors * Remove Python27 test from Appveyor, until Blis works there * Auto-format setup.py * Fix murmurhash version
2018-11-27 02:44:04 +03:00
"murmurhash>=0.28.0,<1.1.0",
"cymem>=2.0.2,<2.1.0",
"preshed>=2.0.1,<2.1.0",
2019-02-23 15:10:09 +03:00
"thinc>=7.0.2,<7.1.0",
💫 Use Blis for matrix multiplications (#2966) Our epic matrix multiplication odyssey is drawing to a close... I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython. The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced. With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with. * Use blis * Use -2 arg to Cython * Update dependencies * Fix requirements * Update setup dependencies * Fix requirement typo * Fix msgpack errors * Remove Python27 test from Appveyor, until Blis works there * Auto-format setup.py * Fix murmurhash version
2018-11-27 02:44:04 +03:00
"blis>=0.2.2,<0.3.0",
"plac<1.0.0,>=0.9.6",
"requests>=2.13.0,<3.0.0",
2019-05-03 12:58:58 +03:00
"jsonschema>=2.6.0,<3.1.0",
2019-03-22 15:31:58 +03:00
"wasabi>=0.2.0,<1.1.0",
2018-12-03 04:10:37 +03:00
"srsly>=0.0.5,<1.1.0",
'pathlib==1.0.1; python_version < "3.4"',
],
setup_requires=["wheel"],
extras_require={
2019-03-20 02:59:27 +03:00
"cuda": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy>=5.0.0b4"],
"cuda80": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda80>=5.0.0b4"],
"cuda90": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda90>=5.0.0b4"],
"cuda91": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda91>=5.0.0b4"],
"cuda92": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda92>=5.0.0b4"],
"cuda100": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda100>=5.0.0b4"],
2019-02-25 11:37:05 +03:00
# Language tokenizers with external dependencies
"ja": ["mecab-python3==0.7"],
},
2019-02-07 22:54:07 +03:00
python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*",
2016-03-12 15:47:10 +03:00
classifiers=[
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Operating System :: POSIX :: Linux",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
"Programming Language :: Cython",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Topic :: Scientific/Engineering",
],
cmdclass={"build_ext": build_ext_subclass},
2015-12-14 01:32:23 +03:00
)
2015-01-04 21:30:56 +03:00
if __name__ == "__main__":
2015-12-14 01:32:23 +03:00
setup_package()