Improve setup.py and call into Cython directly (#4952)

* Improve setup.py and call into Cython directly

* Add numpy to setup_requires

* Improve clean helper

* Update setup.cfg

* Try if it builds without pyproject.toml

* Update MANIFEST.in
This commit is contained in:
Ines Montani 2020-02-11 17:46:18 -05:00 committed by GitHub
parent 9b84f987bd
commit 2ed49404e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 67 additions and 276 deletions

View File

@ -1,5 +1,5 @@
recursive-include include *.h
recursive-include spacy *.txt
recursive-include spacy *.pyx *.pxd *.txt
include LICENSE
include README.md
include bin/spacy

View File

@ -1,169 +0,0 @@
#!/usr/bin/env python
""" cythonize.py
Cythonize pyx files into C++ files as needed.
Usage: cythonize.py [root]
Checks pyx files to see if they have been changed relative to their
corresponding C++ files. If they have, then runs cython on these files to
recreate the C++ files.
Additionally, checks pxd files and setup.py if they have been changed. If
they have, rebuilds everything.
Change detection based on file hashes stored in JSON format.
For now, this script should be run by developers when changing Cython files
and the resulting C++ files checked in, so that end-users (and Python-only
developers) do not get the Cython dependencies.
Based upon:
https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py
Note: this script does not check any of the dependent C++ libraries.
"""
from __future__ import print_function
import os
import sys
import json
import hashlib
import subprocess
import argparse
HASH_FILE = "cythonize.json"
def process_pyx(fromfile, tofile, language_level="-3"):
print("Processing %s" % fromfile)
try:
from Cython.Compiler.Version import version as cython_version
from distutils.version import LooseVersion
if LooseVersion(cython_version) < LooseVersion("0.25"):
raise Exception("Require Cython >= 0.25")
except ImportError:
pass
flags = ["--fast-fail", language_level]
if tofile.endswith(".cpp"):
flags += ["--cplus"]
try:
try:
r = subprocess.call(
["cython"] + flags + ["-o", tofile, fromfile], env=os.environ
) # See Issue #791
if r != 0:
raise Exception("Cython failed")
except OSError:
# There are ways of installing Cython that don't result in a cython
# executable on the path, see gh-2397.
r = subprocess.call(
[
sys.executable,
"-c",
"import sys; from Cython.Compiler.Main import "
"setuptools_main as main; sys.exit(main())",
]
+ flags
+ ["-o", tofile, fromfile]
)
if r != 0:
raise Exception("Cython failed")
except OSError:
raise OSError("Cython needs to be installed")
def preserve_cwd(path, func, *args):
orig_cwd = os.getcwd()
try:
os.chdir(path)
func(*args)
finally:
os.chdir(orig_cwd)
def load_hashes(filename):
try:
return json.load(open(filename))
except (ValueError, IOError):
return {}
def save_hashes(hash_db, filename):
with open(filename, "w") as f:
f.write(json.dumps(hash_db))
def get_hash(path):
return hashlib.md5(open(path, "rb").read()).hexdigest()
def hash_changed(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
return not get_hash(full_path) == db.get(full_path)
def hash_add(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
db[full_path] = get_hash(full_path)
def process(base, filename, db):
root, ext = os.path.splitext(filename)
if ext in [".pyx", ".cpp"]:
if hash_changed(base, filename, db) or not os.path.isfile(
os.path.join(base, root + ".cpp")
):
preserve_cwd(base, process_pyx, root + ".pyx", root + ".cpp")
hash_add(base, root + ".cpp", db)
hash_add(base, root + ".pyx", db)
def check_changes(root, db):
res = False
new_db = {}
setup_filename = "setup.py"
hash_add(".", setup_filename, new_db)
if hash_changed(".", setup_filename, db):
res = True
for base, _, files in os.walk(root):
for filename in files:
if filename.endswith(".pxd"):
hash_add(base, filename, new_db)
if hash_changed(base, filename, db):
res = True
if res:
db.clear()
db.update(new_db)
return res
def run(root):
db = load_hashes(HASH_FILE)
try:
check_changes(root, db)
for base, _, files in os.walk(root):
for filename in files:
process(base, filename, db)
finally:
save_hashes(db, HASH_FILE)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Cythonize pyx files into C++ files as needed"
)
parser.add_argument("root", help="root directory")
args = parser.parse_args()
run(args.root)

View File

@ -1,3 +0,0 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

View File

@ -31,6 +31,7 @@ python_requires = >=3.6
setup_requires =
wheel
cython>=0.25
numpy>=1.15.0
# We also need our Cython packages here to compile against
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0

165
setup.py
View File

@ -1,34 +1,22 @@
#!/usr/bin/env python
import io
import os
import subprocess
import sys
import contextlib
from distutils.command.build_ext import build_ext
from distutils.sysconfig import get_python_inc
import distutils.util
from distutils import ccompiler, msvccompiler
from setuptools import Extension, setup, find_packages
import numpy
from pathlib import Path
from Cython.Build import cythonize
from Cython.Compiler import Options
def is_new_osx():
"""Check whether we're on OSX >= 10.10"""
name = distutils.util.get_platform()
if sys.platform != "darwin":
return False
elif name.startswith("macosx-10"):
minor_version = int(name.split("-")[1].split(".")[1])
if minor_version >= 7:
return True
else:
return False
else:
return False
# Preserve `__doc__` on functions and classes
# http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options
Options.docstrings = True
PACKAGES = find_packages()
MOD_NAMES = [
"spacy.parts_of_speech",
"spacy.strings",
@ -61,16 +49,32 @@ MOD_NAMES = [
"spacy.symbols",
"spacy.vectors",
]
COMPILE_OPTIONS = {
"msvc": ["/Ox", "/EHsc"],
"mingw32": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
"other": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
}
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
COMPILER_DIRECTIVES = {
"language_level": -3,
"embedsignature": True,
"annotation_typing": False,
}
def is_new_osx():
"""Check whether we're on OSX >= 10.10"""
name = distutils.util.get_platform()
if sys.platform != "darwin":
return False
elif name.startswith("macosx-10"):
minor_version = int(name.split("-")[1].split(".")[1])
if minor_version >= 7:
return True
else:
return False
else:
return False
if is_new_osx():
@ -103,95 +107,50 @@ class build_ext_subclass(build_ext, build_ext_options):
build_ext.build_extensions(self)
def generate_cython(root, source):
print("Cythonizing sources")
p = subprocess.call(
[sys.executable, os.path.join(root, "bin", "cythonize.py"), source],
env=os.environ,
)
if p != 0:
raise RuntimeError("Running cythonize failed")
def is_source_release(path):
return os.path.exists(os.path.join(path, "PKG-INFO"))
def clean(path):
for name in MOD_NAMES:
name = name.replace(".", "/")
for ext in [".so", ".html", ".cpp", ".c"]:
file_path = os.path.join(path, name + ext)
if os.path.exists(file_path):
os.unlink(file_path)
@contextlib.contextmanager
def chdir(new_dir):
old_dir = os.getcwd()
try:
os.chdir(new_dir)
sys.path.insert(0, new_dir)
yield
finally:
del sys.path[0]
os.chdir(old_dir)
for path in path.glob("**/*"):
if path.is_file() and path.suffix in (".so", ".cpp"):
print(f"Deleting {path.name}")
path.unlink()
def setup_package():
root = os.path.abspath(os.path.dirname(__file__))
root = Path(__file__).parent
if len(sys.argv) > 1 and sys.argv[1] == "clean":
return clean(root)
return clean(root / "spacy")
with chdir(root):
with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f:
about = {}
exec(f.read(), about)
with (root / "spacy" / "about.py").open("r") as f:
about = {}
exec(f.read(), about)
include_dirs = [
get_python_inc(plat_specific=True),
os.path.join(root, "include"),
]
include_dirs = [
get_python_inc(plat_specific=True),
numpy.get_include(),
str(root / "include"),
]
if (
ccompiler.new_compiler().compiler_type == "msvc"
and msvccompiler.get_build_version() == 9
):
include_dirs.append(str(root / "include" / "msvc9"))
ext_modules = []
for name in MOD_NAMES:
mod_path = name.replace(".", "/") + ".pyx"
ext = Extension(name, [mod_path], language="c++")
ext_modules.append(ext)
print("Cythonizing sources")
ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)
if (
ccompiler.new_compiler().compiler_type == "msvc"
and msvccompiler.get_build_version() == 9
):
include_dirs.append(os.path.join(root, "include", "msvc9"))
ext_modules = []
for mod_name in MOD_NAMES:
mod_path = mod_name.replace(".", "/") + ".cpp"
extra_link_args = []
# ???
# Imported from patch from @mikepb
# See Issue #267. Running blind here...
if sys.platform == "darwin":
dylib_path = [".." for _ in range(mod_name.count("."))]
dylib_path = "/".join(dylib_path)
dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path
extra_link_args.append("-Wl,-rpath,%s" % dylib_path)
ext_modules.append(
Extension(
mod_name,
[mod_path],
language="c++",
include_dirs=include_dirs,
extra_link_args=extra_link_args,
)
)
if not is_source_release(root):
generate_cython(root, "spacy")
setup(
name="spacy",
packages=PACKAGES,
version=about["__version__"],
ext_modules=ext_modules,
cmdclass={"build_ext": build_ext_subclass},
)
setup(
name="spacy",
packages=PACKAGES,
version=about["__version__"],
ext_modules=ext_modules,
cmdclass={"build_ext": build_ext_subclass},
include_dirs=include_dirs,
package_data={"": ["*.pyx", "*.pxd", "*.pxi", "*.cpp"]},
)
if __name__ == "__main__":

View File

@ -1,5 +1,7 @@
# cython: embedsignature=True
# cython: profile=True
from __future__ import unicode_literals
from cython.operator cimport dereference as deref
from cython.operator cimport preincrement as preinc
from libc.string cimport memcpy, memset

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
cimport numpy as np
from libc.math cimport sqrt