mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	Improve setup.py and call into Cython directly (#4952)
* Improve setup.py and call into Cython directly * Add numpy to setup_requires * Improve clean helper * Update setup.cfg * Try if it builds without pyproject.toml * Update MANIFEST.in
This commit is contained in:
		
							parent
							
								
									9b84f987bd
								
							
						
					
					
						commit
						2ed49404e3
					
				|  | @ -1,5 +1,5 @@ | |||
| recursive-include include *.h | ||||
| recursive-include spacy *.txt | ||||
| recursive-include spacy *.pyx *.pxd *.txt | ||||
| include LICENSE | ||||
| include README.md | ||||
| include bin/spacy | ||||
|  |  | |||
							
								
								
									
										169
									
								
								bin/cythonize.py
									
									
									
									
									
								
							
							
						
						
									
										169
									
								
								bin/cythonize.py
									
									
									
									
									
								
							|  | @ -1,169 +0,0 @@ | |||
| #!/usr/bin/env python | ||||
| """ cythonize.py | ||||
| 
 | ||||
| Cythonize pyx files into C++ files as needed. | ||||
| 
 | ||||
| Usage: cythonize.py [root] | ||||
| 
 | ||||
| Checks pyx files to see if they have been changed relative to their | ||||
| corresponding C++ files. If they have, then runs cython on these files to | ||||
| recreate the C++ files. | ||||
| 
 | ||||
| Additionally, checks pxd files and setup.py if they have been changed. If | ||||
| they have, rebuilds everything. | ||||
| 
 | ||||
| Change detection based on file hashes stored in JSON format. | ||||
| 
 | ||||
| For now, this script should be run by developers when changing Cython files | ||||
| and the resulting C++ files checked in, so that end-users (and Python-only | ||||
| developers) do not get the Cython dependencies. | ||||
| 
 | ||||
| Based upon: | ||||
| 
 | ||||
| https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py | ||||
| https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py | ||||
| 
 | ||||
| Note: this script does not check any of the dependent C++ libraries. | ||||
| """ | ||||
| from __future__ import print_function | ||||
| 
 | ||||
| import os | ||||
| import sys | ||||
| import json | ||||
| import hashlib | ||||
| import subprocess | ||||
| import argparse | ||||
| 
 | ||||
| 
 | ||||
| HASH_FILE = "cythonize.json" | ||||
| 
 | ||||
| 
 | ||||
| def process_pyx(fromfile, tofile, language_level="-3"): | ||||
|     print("Processing %s" % fromfile) | ||||
|     try: | ||||
|         from Cython.Compiler.Version import version as cython_version | ||||
|         from distutils.version import LooseVersion | ||||
| 
 | ||||
|         if LooseVersion(cython_version) < LooseVersion("0.25"): | ||||
|             raise Exception("Require Cython >= 0.25") | ||||
| 
 | ||||
|     except ImportError: | ||||
|         pass | ||||
| 
 | ||||
|     flags = ["--fast-fail", language_level] | ||||
|     if tofile.endswith(".cpp"): | ||||
|         flags += ["--cplus"] | ||||
| 
 | ||||
|     try: | ||||
|         try: | ||||
|             r = subprocess.call( | ||||
|                 ["cython"] + flags + ["-o", tofile, fromfile], env=os.environ | ||||
|             )  # See Issue #791 | ||||
|             if r != 0: | ||||
|                 raise Exception("Cython failed") | ||||
|         except OSError: | ||||
|             # There are ways of installing Cython that don't result in a cython | ||||
|             # executable on the path, see gh-2397. | ||||
|             r = subprocess.call( | ||||
|                 [ | ||||
|                     sys.executable, | ||||
|                     "-c", | ||||
|                     "import sys; from Cython.Compiler.Main import " | ||||
|                     "setuptools_main as main; sys.exit(main())", | ||||
|                 ] | ||||
|                 + flags | ||||
|                 + ["-o", tofile, fromfile] | ||||
|             ) | ||||
|             if r != 0: | ||||
|                 raise Exception("Cython failed") | ||||
|     except OSError: | ||||
|         raise OSError("Cython needs to be installed") | ||||
| 
 | ||||
| 
 | ||||
| def preserve_cwd(path, func, *args): | ||||
|     orig_cwd = os.getcwd() | ||||
|     try: | ||||
|         os.chdir(path) | ||||
|         func(*args) | ||||
|     finally: | ||||
|         os.chdir(orig_cwd) | ||||
| 
 | ||||
| 
 | ||||
| def load_hashes(filename): | ||||
|     try: | ||||
|         return json.load(open(filename)) | ||||
|     except (ValueError, IOError): | ||||
|         return {} | ||||
| 
 | ||||
| 
 | ||||
| def save_hashes(hash_db, filename): | ||||
|     with open(filename, "w") as f: | ||||
|         f.write(json.dumps(hash_db)) | ||||
| 
 | ||||
| 
 | ||||
| def get_hash(path): | ||||
|     return hashlib.md5(open(path, "rb").read()).hexdigest() | ||||
| 
 | ||||
| 
 | ||||
| def hash_changed(base, path, db): | ||||
|     full_path = os.path.normpath(os.path.join(base, path)) | ||||
|     return not get_hash(full_path) == db.get(full_path) | ||||
| 
 | ||||
| 
 | ||||
| def hash_add(base, path, db): | ||||
|     full_path = os.path.normpath(os.path.join(base, path)) | ||||
|     db[full_path] = get_hash(full_path) | ||||
| 
 | ||||
| 
 | ||||
| def process(base, filename, db): | ||||
|     root, ext = os.path.splitext(filename) | ||||
|     if ext in [".pyx", ".cpp"]: | ||||
|         if hash_changed(base, filename, db) or not os.path.isfile( | ||||
|             os.path.join(base, root + ".cpp") | ||||
|         ): | ||||
|             preserve_cwd(base, process_pyx, root + ".pyx", root + ".cpp") | ||||
|             hash_add(base, root + ".cpp", db) | ||||
|             hash_add(base, root + ".pyx", db) | ||||
| 
 | ||||
| 
 | ||||
| def check_changes(root, db): | ||||
|     res = False | ||||
|     new_db = {} | ||||
| 
 | ||||
|     setup_filename = "setup.py" | ||||
|     hash_add(".", setup_filename, new_db) | ||||
|     if hash_changed(".", setup_filename, db): | ||||
|         res = True | ||||
| 
 | ||||
|     for base, _, files in os.walk(root): | ||||
|         for filename in files: | ||||
|             if filename.endswith(".pxd"): | ||||
|                 hash_add(base, filename, new_db) | ||||
|                 if hash_changed(base, filename, db): | ||||
|                     res = True | ||||
| 
 | ||||
|     if res: | ||||
|         db.clear() | ||||
|         db.update(new_db) | ||||
|     return res | ||||
| 
 | ||||
| 
 | ||||
| def run(root): | ||||
|     db = load_hashes(HASH_FILE) | ||||
| 
 | ||||
|     try: | ||||
|         check_changes(root, db) | ||||
|         for base, _, files in os.walk(root): | ||||
|             for filename in files: | ||||
|                 process(base, filename, db) | ||||
|     finally: | ||||
|         save_hashes(db, HASH_FILE) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description="Cythonize pyx files into C++ files as needed" | ||||
|     ) | ||||
|     parser.add_argument("root", help="root directory") | ||||
|     args = parser.parse_args() | ||||
|     run(args.root) | ||||
|  | @ -1,3 +0,0 @@ | |||
| [build-system] | ||||
| requires = ["setuptools"] | ||||
| build-backend = "setuptools.build_meta" | ||||
|  | @ -31,6 +31,7 @@ python_requires = >=3.6 | |||
| setup_requires = | ||||
|     wheel | ||||
|     cython>=0.25 | ||||
|     numpy>=1.15.0 | ||||
|     # We also need our Cython packages here to compile against | ||||
|     cymem>=2.0.2,<2.1.0 | ||||
|     preshed>=3.0.2,<3.1.0 | ||||
|  |  | |||
							
								
								
									
										165
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										165
									
								
								setup.py
									
									
									
									
									
								
							|  | @ -1,34 +1,22 @@ | |||
| #!/usr/bin/env python | ||||
| import io | ||||
| import os | ||||
| import subprocess | ||||
| import sys | ||||
| import contextlib | ||||
| from distutils.command.build_ext import build_ext | ||||
| from distutils.sysconfig import get_python_inc | ||||
| import distutils.util | ||||
| from distutils import ccompiler, msvccompiler | ||||
| from setuptools import Extension, setup, find_packages | ||||
| import numpy | ||||
| from pathlib import Path | ||||
| from Cython.Build import cythonize | ||||
| from Cython.Compiler import Options | ||||
| 
 | ||||
| 
 | ||||
| def is_new_osx(): | ||||
|     """Check whether we're on OSX >= 10.10""" | ||||
|     name = distutils.util.get_platform() | ||||
|     if sys.platform != "darwin": | ||||
|         return False | ||||
|     elif name.startswith("macosx-10"): | ||||
|         minor_version = int(name.split("-")[1].split(".")[1]) | ||||
|         if minor_version >= 7: | ||||
|             return True | ||||
|         else: | ||||
|             return False | ||||
|     else: | ||||
|         return False | ||||
| # Preserve `__doc__` on functions and classes | ||||
| # http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options | ||||
| Options.docstrings = True | ||||
| 
 | ||||
| 
 | ||||
| PACKAGES = find_packages() | ||||
| 
 | ||||
| 
 | ||||
| MOD_NAMES = [ | ||||
|     "spacy.parts_of_speech", | ||||
|     "spacy.strings", | ||||
|  | @ -61,16 +49,32 @@ MOD_NAMES = [ | |||
|     "spacy.symbols", | ||||
|     "spacy.vectors", | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| COMPILE_OPTIONS = { | ||||
|     "msvc": ["/Ox", "/EHsc"], | ||||
|     "mingw32": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"], | ||||
|     "other": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"], | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []} | ||||
| COMPILER_DIRECTIVES = { | ||||
|     "language_level": -3, | ||||
|     "embedsignature": True, | ||||
|     "annotation_typing": False, | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| def is_new_osx(): | ||||
|     """Check whether we're on OSX >= 10.10""" | ||||
|     name = distutils.util.get_platform() | ||||
|     if sys.platform != "darwin": | ||||
|         return False | ||||
|     elif name.startswith("macosx-10"): | ||||
|         minor_version = int(name.split("-")[1].split(".")[1]) | ||||
|         if minor_version >= 7: | ||||
|             return True | ||||
|         else: | ||||
|             return False | ||||
|     else: | ||||
|         return False | ||||
| 
 | ||||
| 
 | ||||
| if is_new_osx(): | ||||
|  | @ -103,95 +107,50 @@ class build_ext_subclass(build_ext, build_ext_options): | |||
|         build_ext.build_extensions(self) | ||||
| 
 | ||||
| 
 | ||||
| def generate_cython(root, source): | ||||
|     print("Cythonizing sources") | ||||
|     p = subprocess.call( | ||||
|         [sys.executable, os.path.join(root, "bin", "cythonize.py"), source], | ||||
|         env=os.environ, | ||||
|     ) | ||||
|     if p != 0: | ||||
|         raise RuntimeError("Running cythonize failed") | ||||
| 
 | ||||
| 
 | ||||
| def is_source_release(path): | ||||
|     return os.path.exists(os.path.join(path, "PKG-INFO")) | ||||
| 
 | ||||
| 
 | ||||
| def clean(path): | ||||
|     for name in MOD_NAMES: | ||||
|         name = name.replace(".", "/") | ||||
|         for ext in [".so", ".html", ".cpp", ".c"]: | ||||
|             file_path = os.path.join(path, name + ext) | ||||
|             if os.path.exists(file_path): | ||||
|                 os.unlink(file_path) | ||||
| 
 | ||||
| 
 | ||||
| @contextlib.contextmanager | ||||
| def chdir(new_dir): | ||||
|     old_dir = os.getcwd() | ||||
|     try: | ||||
|         os.chdir(new_dir) | ||||
|         sys.path.insert(0, new_dir) | ||||
|         yield | ||||
|     finally: | ||||
|         del sys.path[0] | ||||
|         os.chdir(old_dir) | ||||
|     for path in path.glob("**/*"): | ||||
|         if path.is_file() and path.suffix in (".so", ".cpp"): | ||||
|             print(f"Deleting {path.name}") | ||||
|             path.unlink() | ||||
| 
 | ||||
| 
 | ||||
| def setup_package(): | ||||
|     root = os.path.abspath(os.path.dirname(__file__)) | ||||
|     root = Path(__file__).parent | ||||
| 
 | ||||
|     if len(sys.argv) > 1 and sys.argv[1] == "clean": | ||||
|         return clean(root) | ||||
|         return clean(root / "spacy") | ||||
| 
 | ||||
|     with chdir(root): | ||||
|         with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f: | ||||
|             about = {} | ||||
|             exec(f.read(), about) | ||||
|     with (root / "spacy" / "about.py").open("r") as f: | ||||
|         about = {} | ||||
|         exec(f.read(), about) | ||||
| 
 | ||||
|         include_dirs = [ | ||||
|             get_python_inc(plat_specific=True), | ||||
|             os.path.join(root, "include"), | ||||
|         ] | ||||
|     include_dirs = [ | ||||
|         get_python_inc(plat_specific=True), | ||||
|         numpy.get_include(), | ||||
|         str(root / "include"), | ||||
|     ] | ||||
|     if ( | ||||
|         ccompiler.new_compiler().compiler_type == "msvc" | ||||
|         and msvccompiler.get_build_version() == 9 | ||||
|     ): | ||||
|         include_dirs.append(str(root / "include" / "msvc9")) | ||||
|     ext_modules = [] | ||||
|     for name in MOD_NAMES: | ||||
|         mod_path = name.replace(".", "/") + ".pyx" | ||||
|         ext = Extension(name, [mod_path], language="c++") | ||||
|         ext_modules.append(ext) | ||||
|     print("Cythonizing sources") | ||||
|     ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES) | ||||
| 
 | ||||
|         if ( | ||||
|             ccompiler.new_compiler().compiler_type == "msvc" | ||||
|             and msvccompiler.get_build_version() == 9 | ||||
|         ): | ||||
|             include_dirs.append(os.path.join(root, "include", "msvc9")) | ||||
| 
 | ||||
|         ext_modules = [] | ||||
|         for mod_name in MOD_NAMES: | ||||
|             mod_path = mod_name.replace(".", "/") + ".cpp" | ||||
|             extra_link_args = [] | ||||
|             # ??? | ||||
|             # Imported from patch from @mikepb | ||||
|             # See Issue #267. Running blind here... | ||||
|             if sys.platform == "darwin": | ||||
|                 dylib_path = [".." for _ in range(mod_name.count("."))] | ||||
|                 dylib_path = "/".join(dylib_path) | ||||
|                 dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path | ||||
|                 extra_link_args.append("-Wl,-rpath,%s" % dylib_path) | ||||
|             ext_modules.append( | ||||
|                 Extension( | ||||
|                     mod_name, | ||||
|                     [mod_path], | ||||
|                     language="c++", | ||||
|                     include_dirs=include_dirs, | ||||
|                     extra_link_args=extra_link_args, | ||||
|                 ) | ||||
|             ) | ||||
| 
 | ||||
|         if not is_source_release(root): | ||||
|             generate_cython(root, "spacy") | ||||
| 
 | ||||
|         setup( | ||||
|             name="spacy", | ||||
|             packages=PACKAGES, | ||||
|             version=about["__version__"], | ||||
|             ext_modules=ext_modules, | ||||
|             cmdclass={"build_ext": build_ext_subclass}, | ||||
|         ) | ||||
|     setup( | ||||
|         name="spacy", | ||||
|         packages=PACKAGES, | ||||
|         version=about["__version__"], | ||||
|         ext_modules=ext_modules, | ||||
|         cmdclass={"build_ext": build_ext_subclass}, | ||||
|         include_dirs=include_dirs, | ||||
|         package_data={"": ["*.pyx", "*.pxd", "*.pxi", "*.cpp"]}, | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|  |  | |||
|  | @ -1,5 +1,7 @@ | |||
| # cython: embedsignature=True | ||||
| # cython: profile=True | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from cython.operator cimport dereference as deref | ||||
| from cython.operator cimport preincrement as preinc | ||||
| from libc.string cimport memcpy, memset | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| from __future__ import unicode_literals | ||||
| cimport numpy as np | ||||
| from libc.math cimport sqrt | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user