2015-12-13 13:53:02 +03:00
|
|
|
#!/usr/bin/env python
|
2016-02-05 17:37:00 +03:00
|
|
|
""" cythonize.py
|
2015-12-13 13:53:02 +03:00
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
Cythonize pyx files into C++ files as needed.
|
2015-12-13 13:53:02 +03:00
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
Usage: cythonize.py [root]
|
2015-12-13 13:53:02 +03:00
|
|
|
|
|
|
|
Checks pyx files to see if they have been changed relative to their
|
2016-02-05 17:37:00 +03:00
|
|
|
corresponding C++ files. If they have, then runs cython on these files to
|
|
|
|
recreate the C++ files.
|
2015-12-13 13:53:02 +03:00
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
Additionally, checks pxd files and setup.py if they have been changed. If
|
|
|
|
they have, rebuilds everything.
|
2015-12-13 13:53:02 +03:00
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
Change detection based on file hashes stored in JSON format.
|
2015-12-13 13:53:02 +03:00
|
|
|
|
|
|
|
For now, this script should be run by developers when changing Cython files
|
2016-02-05 17:37:00 +03:00
|
|
|
and the resulting C++ files checked in, so that end-users (and Python-only
|
|
|
|
developers) do not get the Cython dependencies.
|
2015-12-13 13:53:02 +03:00
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
Based upon:
|
2015-12-13 13:53:02 +03:00
|
|
|
|
|
|
|
https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
|
2016-02-05 17:37:00 +03:00
|
|
|
https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py
|
2015-12-13 13:53:02 +03:00
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
Note: this script does not check any of the dependent C++ libraries.
|
2015-12-13 13:53:02 +03:00
|
|
|
"""
|
2016-02-05 17:37:00 +03:00
|
|
|
from __future__ import print_function
|
2015-12-13 13:53:02 +03:00
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
2016-02-05 17:37:00 +03:00
|
|
|
import json
|
2015-12-13 13:53:02 +03:00
|
|
|
import hashlib
|
|
|
|
import subprocess
|
2016-02-05 17:37:00 +03:00
|
|
|
import argparse
|
2015-12-13 13:53:02 +03:00
|
|
|
|
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
HASH_FILE = 'cythonize.json'
|
|
|
|
|
2015-12-13 13:53:02 +03:00
|
|
|
|
💫 Use Blis for matrix multiplications (#2966)
Our epic matrix multiplication odyssey is drawing to a close...
I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis
Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython.
The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced.
With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with.
* Use blis
* Use -2 arg to Cython
* Update dependencies
* Fix requirements
* Update setup dependencies
* Fix requirement typo
* Fix msgpack errors
* Remove Python27 test from Appveyor, until Blis works there
* Auto-format setup.py
* Fix murmurhash version
2018-11-27 02:44:04 +03:00
|
|
|
def process_pyx(fromfile, tofile, language_level='-2'):
|
2016-02-05 17:37:00 +03:00
|
|
|
print('Processing %s' % fromfile)
|
2015-12-13 13:53:02 +03:00
|
|
|
try:
|
|
|
|
from Cython.Compiler.Version import version as cython_version
|
|
|
|
from distutils.version import LooseVersion
|
|
|
|
if LooseVersion(cython_version) < LooseVersion('0.19'):
|
2016-02-05 17:37:00 +03:00
|
|
|
raise Exception('Require Cython >= 0.19')
|
2015-12-13 13:53:02 +03:00
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
pass
|
|
|
|
|
💫 Use Blis for matrix multiplications (#2966)
Our epic matrix multiplication odyssey is drawing to a close...
I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis
Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython.
The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced.
With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with.
* Use blis
* Use -2 arg to Cython
* Update dependencies
* Fix requirements
* Update setup dependencies
* Fix requirement typo
* Fix msgpack errors
* Remove Python27 test from Appveyor, until Blis works there
* Auto-format setup.py
* Fix murmurhash version
2018-11-27 02:44:04 +03:00
|
|
|
flags = ['--fast-fail', language_level]
|
2015-12-13 13:53:02 +03:00
|
|
|
if tofile.endswith('.cpp'):
|
|
|
|
flags += ['--cplus']
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
2017-02-16 17:04:16 +03:00
|
|
|
r = subprocess.call(['cython'] + flags + ['-o', tofile, fromfile],
|
2017-02-16 21:01:25 +03:00
|
|
|
env=os.environ) # See Issue #791
|
2015-12-13 13:53:02 +03:00
|
|
|
if r != 0:
|
|
|
|
raise Exception('Cython failed')
|
|
|
|
except OSError:
|
|
|
|
# There are ways of installing Cython that don't result in a cython
|
|
|
|
# executable on the path, see gh-2397.
|
|
|
|
r = subprocess.call([sys.executable, '-c',
|
2016-02-05 17:37:00 +03:00
|
|
|
'import sys; from Cython.Compiler.Main import '
|
|
|
|
'setuptools_main as main; sys.exit(main())'] + flags +
|
|
|
|
['-o', tofile, fromfile])
|
2015-12-13 13:53:02 +03:00
|
|
|
if r != 0:
|
|
|
|
raise Exception('Cython failed')
|
|
|
|
except OSError:
|
|
|
|
raise OSError('Cython needs to be installed')
|
|
|
|
|
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
def preserve_cwd(path, func, *args):
|
2015-12-13 13:53:02 +03:00
|
|
|
orig_cwd = os.getcwd()
|
|
|
|
try:
|
|
|
|
os.chdir(path)
|
2016-02-05 17:37:00 +03:00
|
|
|
func(*args)
|
2015-12-13 13:53:02 +03:00
|
|
|
finally:
|
|
|
|
os.chdir(orig_cwd)
|
|
|
|
|
|
|
|
|
2016-02-05 17:37:00 +03:00
|
|
|
def load_hashes(filename):
|
|
|
|
try:
|
|
|
|
return json.load(open(filename))
|
|
|
|
except (ValueError, IOError):
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
def save_hashes(hash_db, filename):
|
2016-02-05 18:17:23 +03:00
|
|
|
with open(filename, 'w') as f:
|
|
|
|
f.write(json.dumps(hash_db))
|
2016-02-05 17:37:00 +03:00
|
|
|
|
|
|
|
|
|
|
|
def get_hash(path):
|
2016-02-05 17:43:50 +03:00
|
|
|
return hashlib.md5(open(path, 'rb').read()).hexdigest()
|
2016-02-05 17:37:00 +03:00
|
|
|
|
|
|
|
|
|
|
|
def hash_changed(base, path, db):
|
|
|
|
full_path = os.path.normpath(os.path.join(base, path))
|
|
|
|
return not get_hash(full_path) == db.get(full_path)
|
|
|
|
|
|
|
|
|
|
|
|
def hash_add(base, path, db):
|
|
|
|
full_path = os.path.normpath(os.path.join(base, path))
|
|
|
|
db[full_path] = get_hash(full_path)
|
|
|
|
|
|
|
|
|
|
|
|
def process(base, filename, db):
|
|
|
|
root, ext = os.path.splitext(filename)
|
|
|
|
if ext in ['.pyx', '.cpp']:
|
2016-02-05 18:17:23 +03:00
|
|
|
if hash_changed(base, filename, db) or not os.path.isfile(os.path.join(base, root + '.cpp')):
|
2016-02-05 17:37:00 +03:00
|
|
|
preserve_cwd(base, process_pyx, root + '.pyx', root + '.cpp')
|
|
|
|
hash_add(base, root + '.cpp', db)
|
|
|
|
hash_add(base, root + '.pyx', db)
|
|
|
|
|
|
|
|
|
|
|
|
def check_changes(root, db):
|
|
|
|
res = False
|
|
|
|
new_db = {}
|
|
|
|
|
|
|
|
setup_filename = 'setup.py'
|
|
|
|
hash_add('.', setup_filename, new_db)
|
|
|
|
if hash_changed('.', setup_filename, db):
|
|
|
|
res = True
|
|
|
|
|
|
|
|
for base, _, files in os.walk(root):
|
2015-12-13 13:53:02 +03:00
|
|
|
for filename in files:
|
2016-02-05 17:37:00 +03:00
|
|
|
if filename.endswith('.pxd'):
|
|
|
|
hash_add(base, filename, new_db)
|
|
|
|
if hash_changed(base, filename, db):
|
|
|
|
res = True
|
|
|
|
|
|
|
|
if res:
|
|
|
|
db.clear()
|
|
|
|
db.update(new_db)
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
def run(root):
|
|
|
|
db = load_hashes(HASH_FILE)
|
|
|
|
|
2015-12-13 13:53:02 +03:00
|
|
|
try:
|
2016-02-05 17:37:00 +03:00
|
|
|
check_changes(root, db)
|
|
|
|
for base, _, files in os.walk(root):
|
|
|
|
for filename in files:
|
|
|
|
process(base, filename, db)
|
|
|
|
finally:
|
|
|
|
save_hashes(db, HASH_FILE)
|
2015-12-13 13:53:02 +03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2016-02-05 17:37:00 +03:00
|
|
|
parser = argparse.ArgumentParser(description='Cythonize pyx files into C++ files as needed')
|
|
|
|
parser.add_argument('root', help='root directory')
|
|
|
|
args = parser.parse_args()
|
|
|
|
run(args.root)
|