2014-07-05 22:49:34 +04:00
|
|
|
#!/usr/bin/env python
|
2016-01-15 20:57:01 +03:00
|
|
|
from __future__ import print_function
|
2016-10-19 01:27:57 +03:00
|
|
|
import io
|
2015-12-13 13:49:17 +03:00
|
|
|
import os
|
|
|
|
import subprocess
|
2014-07-05 22:49:34 +04:00
|
|
|
import sys
|
2015-12-14 01:32:23 +03:00
|
|
|
import contextlib
|
2015-10-13 01:31:59 +03:00
|
|
|
from distutils.command.build_ext import build_ext
|
2015-12-13 13:49:17 +03:00
|
|
|
from distutils.sysconfig import get_python_inc
|
2018-12-04 02:06:42 +03:00
|
|
|
import distutils.util
|
2016-04-28 23:10:43 +03:00
|
|
|
from distutils import ccompiler, msvccompiler
|
2017-05-03 21:10:59 +03:00
|
|
|
from setuptools import Extension, setup, find_packages
|
2015-12-13 13:49:17 +03:00
|
|
|
|
|
|
|
|
2018-12-04 02:06:42 +03:00
|
|
|
def is_new_osx():
|
2018-12-08 13:49:43 +03:00
|
|
|
"""Check whether we're on OSX >= 10.10"""
|
2018-12-04 02:06:42 +03:00
|
|
|
name = distutils.util.get_platform()
|
2018-12-08 13:49:43 +03:00
|
|
|
if sys.platform != "darwin":
|
2018-12-04 02:06:42 +03:00
|
|
|
return False
|
2018-12-08 13:49:43 +03:00
|
|
|
elif name.startswith("macosx-10"):
|
|
|
|
minor_version = int(name.split("-")[1].split(".")[1])
|
2018-12-04 02:06:42 +03:00
|
|
|
if minor_version >= 7:
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2018-12-19 15:54:02 +03:00
|
|
|
PACKAGE_DATA = {"": ["*.pyx", "*.pxd", "*.txt", "*.tokens", "*.json"]}
|
2016-11-06 13:58:26 +03:00
|
|
|
|
|
|
|
|
2017-05-03 21:10:59 +03:00
|
|
|
PACKAGES = find_packages()
|
2015-12-13 13:49:17 +03:00
|
|
|
|
|
|
|
|
|
|
|
MOD_NAMES = [
|
2018-11-27 00:04:35 +03:00
|
|
|
"spacy._align",
|
|
|
|
"spacy.parts_of_speech",
|
|
|
|
"spacy.strings",
|
|
|
|
"spacy.lexeme",
|
|
|
|
"spacy.vocab",
|
|
|
|
"spacy.attrs",
|
2019-03-18 19:27:51 +03:00
|
|
|
"spacy.kb",
|
2018-11-27 00:04:35 +03:00
|
|
|
"spacy.morphology",
|
2019-02-10 14:14:51 +03:00
|
|
|
"spacy.pipeline.pipes",
|
2019-03-07 12:45:55 +03:00
|
|
|
"spacy.pipeline.morphologizer",
|
2018-11-27 00:04:35 +03:00
|
|
|
"spacy.syntax.stateclass",
|
|
|
|
"spacy.syntax._state",
|
|
|
|
"spacy.tokenizer",
|
|
|
|
"spacy.syntax.nn_parser",
|
|
|
|
"spacy.syntax._parser_model",
|
|
|
|
"spacy.syntax._beam_utils",
|
|
|
|
"spacy.syntax.nonproj",
|
|
|
|
"spacy.syntax.transition_system",
|
|
|
|
"spacy.syntax.arc_eager",
|
|
|
|
"spacy.gold",
|
|
|
|
"spacy.tokens.doc",
|
|
|
|
"spacy.tokens.span",
|
|
|
|
"spacy.tokens.token",
|
2019-03-07 16:34:54 +03:00
|
|
|
"spacy.tokens.morphanalysis",
|
2018-11-27 00:04:35 +03:00
|
|
|
"spacy.tokens._retokenize",
|
2019-02-07 11:42:25 +03:00
|
|
|
"spacy.matcher.matcher",
|
|
|
|
"spacy.matcher.phrasematcher",
|
|
|
|
"spacy.matcher.dependencymatcher",
|
2018-11-27 00:04:35 +03:00
|
|
|
"spacy.syntax.ner",
|
|
|
|
"spacy.symbols",
|
|
|
|
"spacy.vectors",
|
2017-06-05 13:45:29 +03:00
|
|
|
]
|
2015-12-13 13:49:17 +03:00
|
|
|
|
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
COMPILE_OPTIONS = {
|
|
|
|
"msvc": ["/Ox", "/EHsc"],
|
|
|
|
"mingw32": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
|
|
|
|
"other": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
|
2016-02-05 16:43:52 +03:00
|
|
|
}
|
2015-10-13 01:31:59 +03:00
|
|
|
|
2016-02-05 16:43:52 +03:00
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
|
2016-02-05 16:43:52 +03:00
|
|
|
|
2016-12-20 13:05:06 +03:00
|
|
|
|
2018-12-04 02:06:42 +03:00
|
|
|
if is_new_osx():
|
|
|
|
# On Mac, use libc++ because Apple deprecated use of
|
|
|
|
# libstdc
|
|
|
|
COMPILE_OPTIONS["other"].append("-stdlib=libc++")
|
|
|
|
LINK_OPTIONS["other"].append("-lc++")
|
|
|
|
# g++ (used by unix compiler on mac) links to libstdc++ as a default lib.
|
|
|
|
# See: https://stackoverflow.com/questions/1653047/avoid-linking-to-libstdc
|
|
|
|
LINK_OPTIONS["other"].append("-nodefaultlibs")
|
2016-02-05 16:43:52 +03:00
|
|
|
|
2016-12-20 13:05:06 +03:00
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
USE_OPENMP_DEFAULT = "0" if sys.platform != "darwin" else None
|
|
|
|
if os.environ.get("USE_OPENMP", USE_OPENMP_DEFAULT) == "1":
|
|
|
|
if sys.platform == "darwin":
|
|
|
|
COMPILE_OPTIONS["other"].append("-fopenmp")
|
|
|
|
LINK_OPTIONS["other"].append("-fopenmp")
|
|
|
|
PACKAGE_DATA["spacy.platform.darwin.lib"] = ["*.dylib"]
|
|
|
|
PACKAGES.append("spacy.platform.darwin.lib")
|
2016-11-06 13:58:26 +03:00
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
elif sys.platform == "win32":
|
|
|
|
COMPILE_OPTIONS["msvc"].append("/openmp")
|
2016-11-06 13:58:26 +03:00
|
|
|
|
|
|
|
else:
|
2018-11-27 00:04:35 +03:00
|
|
|
COMPILE_OPTIONS["other"].append("-fopenmp")
|
|
|
|
LINK_OPTIONS["other"].append("-fopenmp")
|
2015-01-06 04:34:55 +03:00
|
|
|
|
2017-05-07 19:36:35 +03:00
|
|
|
|
2016-04-19 20:50:42 +03:00
|
|
|
# By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
|
|
|
|
# http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
|
2015-12-13 13:49:17 +03:00
|
|
|
class build_ext_options:
|
|
|
|
def build_options(self):
|
|
|
|
for e in self.extensions:
|
2016-11-06 13:58:26 +03:00
|
|
|
e.extra_compile_args += COMPILE_OPTIONS.get(
|
2018-11-27 00:04:35 +03:00
|
|
|
self.compiler.compiler_type, COMPILE_OPTIONS["other"]
|
|
|
|
)
|
2015-12-13 13:49:17 +03:00
|
|
|
for e in self.extensions:
|
2016-11-06 13:58:26 +03:00
|
|
|
e.extra_link_args += LINK_OPTIONS.get(
|
2018-11-27 00:04:35 +03:00
|
|
|
self.compiler.compiler_type, LINK_OPTIONS["other"]
|
|
|
|
)
|
2015-01-17 08:19:54 +03:00
|
|
|
|
|
|
|
|
2015-12-13 13:49:17 +03:00
|
|
|
class build_ext_subclass(build_ext, build_ext_options):
|
|
|
|
def build_extensions(self):
|
|
|
|
build_ext_options.build_options(self)
|
|
|
|
build_ext.build_extensions(self)
|
2015-01-17 08:19:54 +03:00
|
|
|
|
|
|
|
|
2015-12-14 01:32:23 +03:00
|
|
|
def generate_cython(root, source):
|
2018-11-27 00:04:35 +03:00
|
|
|
print("Cythonizing sources")
|
|
|
|
p = subprocess.call(
|
|
|
|
[sys.executable, os.path.join(root, "bin", "cythonize.py"), source],
|
|
|
|
env=os.environ,
|
|
|
|
)
|
2015-12-13 13:49:17 +03:00
|
|
|
if p != 0:
|
2018-11-27 00:04:35 +03:00
|
|
|
raise RuntimeError("Running cythonize failed")
|
2015-12-13 13:49:17 +03:00
|
|
|
|
|
|
|
|
2015-12-14 01:32:23 +03:00
|
|
|
def is_source_release(path):
|
2018-11-27 00:04:35 +03:00
|
|
|
return os.path.exists(os.path.join(path, "PKG-INFO"))
|
2015-12-14 01:32:23 +03:00
|
|
|
|
|
|
|
|
|
|
|
def clean(path):
|
2015-12-13 13:49:17 +03:00
|
|
|
for name in MOD_NAMES:
|
2018-11-27 00:04:35 +03:00
|
|
|
name = name.replace(".", "/")
|
|
|
|
for ext in [".so", ".html", ".cpp", ".c"]:
|
2015-12-14 01:32:23 +03:00
|
|
|
file_path = os.path.join(path, name + ext)
|
|
|
|
if os.path.exists(file_path):
|
|
|
|
os.unlink(file_path)
|
2015-01-25 06:49:10 +03:00
|
|
|
|
|
|
|
|
2015-12-14 01:32:23 +03:00
|
|
|
@contextlib.contextmanager
|
|
|
|
def chdir(new_dir):
|
|
|
|
old_dir = os.getcwd()
|
2015-12-13 13:49:17 +03:00
|
|
|
try:
|
2015-12-14 01:32:23 +03:00
|
|
|
os.chdir(new_dir)
|
|
|
|
sys.path.insert(0, new_dir)
|
|
|
|
yield
|
2015-12-13 13:49:17 +03:00
|
|
|
finally:
|
|
|
|
del sys.path[0]
|
2015-12-14 01:32:23 +03:00
|
|
|
os.chdir(old_dir)
|
|
|
|
|
|
|
|
|
|
|
|
def setup_package():
|
|
|
|
root = os.path.abspath(os.path.dirname(__file__))
|
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
if len(sys.argv) > 1 and sys.argv[1] == "clean":
|
2015-12-14 01:32:23 +03:00
|
|
|
return clean(root)
|
|
|
|
|
|
|
|
with chdir(root):
|
2018-11-27 00:04:35 +03:00
|
|
|
with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f:
|
2016-03-13 20:12:32 +03:00
|
|
|
about = {}
|
2016-01-15 20:57:01 +03:00
|
|
|
exec(f.read(), about)
|
2015-12-14 01:32:23 +03:00
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
with io.open(os.path.join(root, "README.md"), encoding="utf8") as f:
|
2016-03-13 20:12:32 +03:00
|
|
|
readme = f.read()
|
|
|
|
|
2015-12-14 01:32:23 +03:00
|
|
|
include_dirs = [
|
|
|
|
get_python_inc(plat_specific=True),
|
2018-11-27 00:04:35 +03:00
|
|
|
os.path.join(root, "include"),
|
|
|
|
]
|
2015-12-14 01:32:23 +03:00
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
if (
|
|
|
|
ccompiler.new_compiler().compiler_type == "msvc"
|
|
|
|
and msvccompiler.get_build_version() == 9
|
|
|
|
):
|
|
|
|
include_dirs.append(os.path.join(root, "include", "msvc9"))
|
2016-04-28 23:10:43 +03:00
|
|
|
|
2015-12-14 01:32:23 +03:00
|
|
|
ext_modules = []
|
|
|
|
for mod_name in MOD_NAMES:
|
2018-11-27 00:04:35 +03:00
|
|
|
mod_path = mod_name.replace(".", "/") + ".cpp"
|
2016-11-06 13:58:26 +03:00
|
|
|
extra_link_args = []
|
2018-12-01 04:36:56 +03:00
|
|
|
extra_compile_args = []
|
2016-11-06 13:58:26 +03:00
|
|
|
# ???
|
|
|
|
# Imported from patch from @mikepb
|
|
|
|
# See Issue #267. Running blind here...
|
2018-11-27 00:04:35 +03:00
|
|
|
if sys.platform == "darwin":
|
|
|
|
dylib_path = [".." for _ in range(mod_name.count("."))]
|
|
|
|
dylib_path = "/".join(dylib_path)
|
|
|
|
dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path
|
|
|
|
extra_link_args.append("-Wl,-rpath,%s" % dylib_path)
|
2015-12-14 01:32:23 +03:00
|
|
|
ext_modules.append(
|
2018-11-27 00:04:35 +03:00
|
|
|
Extension(
|
|
|
|
mod_name,
|
|
|
|
[mod_path],
|
|
|
|
language="c++",
|
|
|
|
include_dirs=include_dirs,
|
|
|
|
extra_link_args=extra_link_args,
|
|
|
|
)
|
|
|
|
)
|
2015-12-14 01:32:23 +03:00
|
|
|
|
|
|
|
if not is_source_release(root):
|
2018-11-27 00:04:35 +03:00
|
|
|
generate_cython(root, "spacy")
|
2015-12-14 01:32:23 +03:00
|
|
|
|
|
|
|
setup(
|
2019-05-28 18:11:39 +03:00
|
|
|
name="spacy",
|
2016-01-15 20:01:02 +03:00
|
|
|
zip_safe=False,
|
2015-12-14 01:32:23 +03:00
|
|
|
packages=PACKAGES,
|
2016-11-06 13:58:26 +03:00
|
|
|
package_data=PACKAGE_DATA,
|
2018-11-27 00:04:35 +03:00
|
|
|
description=about["__summary__"],
|
2016-03-13 20:12:32 +03:00
|
|
|
long_description=readme,
|
2018-11-27 00:04:35 +03:00
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
author=about["__author__"],
|
|
|
|
author_email=about["__email__"],
|
|
|
|
version=about["__version__"],
|
|
|
|
url=about["__uri__"],
|
|
|
|
license=about["__license__"],
|
2015-12-14 01:32:23 +03:00
|
|
|
ext_modules=ext_modules,
|
2018-11-27 00:04:35 +03:00
|
|
|
scripts=["bin/spacy"],
|
2016-03-13 20:12:32 +03:00
|
|
|
install_requires=[
|
2018-11-27 00:04:35 +03:00
|
|
|
"numpy>=1.15.0",
|
💫 Use Blis for matrix multiplications (#2966)
Our epic matrix multiplication odyssey is drawing to a close...
I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis
Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython.
The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced.
With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with.
* Use blis
* Use -2 arg to Cython
* Update dependencies
* Fix requirements
* Update setup dependencies
* Fix requirement typo
* Fix msgpack errors
* Remove Python27 test from Appveyor, until Blis works there
* Auto-format setup.py
* Fix murmurhash version
2018-11-27 02:44:04 +03:00
|
|
|
"murmurhash>=0.28.0,<1.1.0",
|
|
|
|
"cymem>=2.0.2,<2.1.0",
|
|
|
|
"preshed>=2.0.1,<2.1.0",
|
2019-07-12 01:15:35 +03:00
|
|
|
"thinc>=7.0.8,<7.1.0",
|
💫 Use Blis for matrix multiplications (#2966)
Our epic matrix multiplication odyssey is drawing to a close...
I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis
Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython.
The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced.
With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with.
* Use blis
* Use -2 arg to Cython
* Update dependencies
* Fix requirements
* Update setup dependencies
* Fix requirement typo
* Fix msgpack errors
* Remove Python27 test from Appveyor, until Blis works there
* Auto-format setup.py
* Fix murmurhash version
2018-11-27 02:44:04 +03:00
|
|
|
"blis>=0.2.2,<0.3.0",
|
2018-11-27 00:04:35 +03:00
|
|
|
"plac<1.0.0,>=0.9.6",
|
|
|
|
"requests>=2.13.0,<3.0.0",
|
2019-03-22 15:31:58 +03:00
|
|
|
"wasabi>=0.2.0,<1.1.0",
|
2019-06-07 12:14:32 +03:00
|
|
|
"srsly>=0.0.6,<1.1.0",
|
2018-11-27 00:04:35 +03:00
|
|
|
'pathlib==1.0.1; python_version < "3.4"',
|
|
|
|
],
|
|
|
|
setup_requires=["wheel"],
|
2018-05-19 19:12:23 +03:00
|
|
|
extras_require={
|
2019-03-20 02:59:27 +03:00
|
|
|
"cuda": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy>=5.0.0b4"],
|
|
|
|
"cuda80": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda80>=5.0.0b4"],
|
|
|
|
"cuda90": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda90>=5.0.0b4"],
|
|
|
|
"cuda91": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda91>=5.0.0b4"],
|
|
|
|
"cuda92": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda92>=5.0.0b4"],
|
|
|
|
"cuda100": ["thinc_gpu_ops>=0.0.1,<0.1.0", "cupy-cuda100>=5.0.0b4"],
|
2019-02-25 11:37:05 +03:00
|
|
|
# Language tokenizers with external dependencies
|
|
|
|
"ja": ["mecab-python3==0.7"],
|
2019-07-09 23:23:16 +03:00
|
|
|
"ko": ["natto-py==0.9.0"],
|
2018-05-19 19:12:23 +03:00
|
|
|
},
|
2019-02-07 22:54:07 +03:00
|
|
|
python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*",
|
2016-03-12 15:47:10 +03:00
|
|
|
classifiers=[
|
2018-11-27 00:04:35 +03:00
|
|
|
"Development Status :: 5 - Production/Stable",
|
|
|
|
"Environment :: Console",
|
|
|
|
"Intended Audience :: Developers",
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
"License :: OSI Approved :: MIT License",
|
|
|
|
"Operating System :: POSIX :: Linux",
|
|
|
|
"Operating System :: MacOS :: MacOS X",
|
|
|
|
"Operating System :: Microsoft :: Windows",
|
|
|
|
"Programming Language :: Cython",
|
|
|
|
"Programming Language :: Python :: 2",
|
|
|
|
"Programming Language :: Python :: 2.7",
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"Programming Language :: Python :: 3.4",
|
|
|
|
"Programming Language :: Python :: 3.5",
|
|
|
|
"Programming Language :: Python :: 3.6",
|
|
|
|
"Programming Language :: Python :: 3.7",
|
|
|
|
"Topic :: Scientific/Engineering",
|
|
|
|
],
|
|
|
|
cmdclass={"build_ext": build_ext_subclass},
|
2015-12-14 01:32:23 +03:00
|
|
|
)
|
2015-01-04 21:30:56 +03:00
|
|
|
|
|
|
|
|
2018-11-27 00:04:35 +03:00
|
|
|
if __name__ == "__main__":
|
2015-12-14 01:32:23 +03:00
|
|
|
setup_package()
|