spaCy/bin/cythonize.py

170 lines
4.5 KiB
Python
Raw Normal View History

2015-12-13 13:53:02 +03:00
#!/usr/bin/env python
2016-02-05 17:37:00 +03:00
""" cythonize.py
2015-12-13 13:53:02 +03:00
2016-02-05 17:37:00 +03:00
Cythonize pyx files into C++ files as needed.
2015-12-13 13:53:02 +03:00
2016-02-05 17:37:00 +03:00
Usage: cythonize.py [root]
2015-12-13 13:53:02 +03:00
Checks pyx files to see if they have been changed relative to their
2016-02-05 17:37:00 +03:00
corresponding C++ files. If they have, then runs cython on these files to
recreate the C++ files.
2015-12-13 13:53:02 +03:00
2016-02-05 17:37:00 +03:00
Additionally, checks pxd files and setup.py if they have been changed. If
they have, rebuilds everything.
2015-12-13 13:53:02 +03:00
2016-02-05 17:37:00 +03:00
Change detection based on file hashes stored in JSON format.
2015-12-13 13:53:02 +03:00
For now, this script should be run by developers when changing Cython files
2016-02-05 17:37:00 +03:00
and the resulting C++ files checked in, so that end-users (and Python-only
developers) do not get the Cython dependencies.
2015-12-13 13:53:02 +03:00
2016-02-05 17:37:00 +03:00
Based upon:
2015-12-13 13:53:02 +03:00
https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
2016-02-05 17:37:00 +03:00
https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py
2015-12-13 13:53:02 +03:00
2016-02-05 17:37:00 +03:00
Note: this script does not check any of the dependent C++ libraries.
2015-12-13 13:53:02 +03:00
"""
2016-02-05 17:37:00 +03:00
from __future__ import print_function
2015-12-13 13:53:02 +03:00
import os
import sys
2016-02-05 17:37:00 +03:00
import json
2015-12-13 13:53:02 +03:00
import hashlib
import subprocess
2016-02-05 17:37:00 +03:00
import argparse
2015-12-13 13:53:02 +03:00
HASH_FILE = "cythonize.json"
2016-02-05 17:37:00 +03:00
2015-12-13 13:53:02 +03:00
def process_pyx(fromfile, tofile, language_level="-2"):
print("Processing %s" % fromfile)
2015-12-13 13:53:02 +03:00
try:
from Cython.Compiler.Version import version as cython_version
from distutils.version import LooseVersion
if LooseVersion(cython_version) < LooseVersion("0.19"):
raise Exception("Require Cython >= 0.19")
2015-12-13 13:53:02 +03:00
except ImportError:
pass
flags = ["--fast-fail", language_level]
if tofile.endswith(".cpp"):
flags += ["--cplus"]
2015-12-13 13:53:02 +03:00
try:
try:
r = subprocess.call(
["cython"] + flags + ["-o", tofile, fromfile], env=os.environ
) # See Issue #791
2015-12-13 13:53:02 +03:00
if r != 0:
raise Exception("Cython failed")
2015-12-13 13:53:02 +03:00
except OSError:
# There are ways of installing Cython that don't result in a cython
# executable on the path, see gh-2397.
r = subprocess.call(
[
sys.executable,
"-c",
"import sys; from Cython.Compiler.Main import "
"setuptools_main as main; sys.exit(main())",
]
+ flags
+ ["-o", tofile, fromfile]
)
2015-12-13 13:53:02 +03:00
if r != 0:
raise Exception("Cython failed")
2015-12-13 13:53:02 +03:00
except OSError:
raise OSError("Cython needs to be installed")
2015-12-13 13:53:02 +03:00
2016-02-05 17:37:00 +03:00
def preserve_cwd(path, func, *args):
2015-12-13 13:53:02 +03:00
orig_cwd = os.getcwd()
try:
os.chdir(path)
2016-02-05 17:37:00 +03:00
func(*args)
2015-12-13 13:53:02 +03:00
finally:
os.chdir(orig_cwd)
2016-02-05 17:37:00 +03:00
def load_hashes(filename):
try:
return json.load(open(filename))
except (ValueError, IOError):
return {}
def save_hashes(hash_db, filename):
with open(filename, "w") as f:
2016-02-05 18:17:23 +03:00
f.write(json.dumps(hash_db))
2016-02-05 17:37:00 +03:00
def get_hash(path):
return hashlib.md5(open(path, "rb").read()).hexdigest()
2016-02-05 17:37:00 +03:00
def hash_changed(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
return not get_hash(full_path) == db.get(full_path)
def hash_add(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
db[full_path] = get_hash(full_path)
def process(base, filename, db):
root, ext = os.path.splitext(filename)
if ext in [".pyx", ".cpp"]:
if hash_changed(base, filename, db) or not os.path.isfile(
os.path.join(base, root + ".cpp")
):
preserve_cwd(base, process_pyx, root + ".pyx", root + ".cpp")
hash_add(base, root + ".cpp", db)
hash_add(base, root + ".pyx", db)
2016-02-05 17:37:00 +03:00
def check_changes(root, db):
res = False
new_db = {}
setup_filename = "setup.py"
hash_add(".", setup_filename, new_db)
if hash_changed(".", setup_filename, db):
2016-02-05 17:37:00 +03:00
res = True
for base, _, files in os.walk(root):
2015-12-13 13:53:02 +03:00
for filename in files:
if filename.endswith(".pxd"):
2016-02-05 17:37:00 +03:00
hash_add(base, filename, new_db)
if hash_changed(base, filename, db):
res = True
if res:
db.clear()
db.update(new_db)
return res
def run(root):
db = load_hashes(HASH_FILE)
2015-12-13 13:53:02 +03:00
try:
2016-02-05 17:37:00 +03:00
check_changes(root, db)
for base, _, files in os.walk(root):
for filename in files:
process(base, filename, db)
finally:
save_hashes(db, HASH_FILE)
2015-12-13 13:53:02 +03:00
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Cythonize pyx files into C++ files as needed"
)
parser.add_argument("root", help="root directory")
2016-02-05 17:37:00 +03:00
args = parser.parse_args()
run(args.root)