diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 000000000..8f0a21967 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,87 @@ +environment: + global: + # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the + # /E:ON and /V:ON options are not enabled in the batch script intepreter + # See: http://stackoverflow.com/a/13751649/163740 + CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" + + matrix: + + # Python 2.7.10 is the latest version and is not pre-installed. + + - PYTHON: "C:\\Python27.10-x64" + PYTHON_VERSION: "2.7.10" + PYTHON_ARCH: "64" + + # The lastest Python 3.4. + - PYTHON: "C:\\Python34-x64" + PYTHON_VERSION: "3.4.x" # currently 3.4.3 + PYTHON_ARCH: "64" + +install: + # Install Python (from the official .msi of http://python.org) and pip when + # not already installed. + - ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 } + + # Prepend newly installed Python to the PATH of this build (this cannot be + # done from inside the powershell script as it would require to restart + # the parent CMD process). + - "SET PATH=%CD%;%PYTHON%;%PYTHON%\\Scripts;%PATH%" + - "SET PYTHONPATH=%CD%;%PYTHONPATH%" + + # Filesystem root + # - ps: "ls \"C:/\"" + + # Installed SDKs + # - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\"" + + # Checking stdint.h + #- ps: "ls \"C:/projects/spacy/include/\"" + + + # Check that we have the expected version and architecture for Python + - "python --version" + - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" + + # Upgrade to the latest version of pip to avoid it displaying warnings + # about it being out of date. + - "pip install --disable-pip-version-check --user --upgrade pip" + + # Install the build dependencies of the project. If some dependencies contain + # compiled extensions and are not provided as pre-built wheel packages, + # pip will build them from source using the MSVC compiler matching the + # target Python version and architecture + - "pip install --upgrade setuptools" + - "%CMD_IN_ENV% pip install cython fabric fabtools" + - "%CMD_IN_ENV% pip install -r requirements.txt" + +build_script: + # Build the compiled extension + - "%CMD_IN_ENV% python setup.py build_ext --inplace" + - ps: appveyor\download.ps1 + - "tar -xzf corpora/en/wordnet.tar.gz" + #- "ls \"C:/projects/spacy/corpora/en/" + #- "ls \"C:/projects/spacy/" + - "%CMD_IN_ENV% python bin/init_model.py en lang_data/ corpora/ spacy/en/data" + + +test_script: + # Run the project tests + - "pip install pytest" + - "%CMD_IN_ENV% py.test tests/ -x" + +after_test: + # If tests are successful, create binary packages for the project. + #- "%CMD_IN_ENV% python setup.py bdist_wheel" + #- "%CMD_IN_ENV% python setup.py bdist_wininst" + #- "%CMD_IN_ENV% python setup.py bdist_msi" + #- ps: "ls dist" + +artifacts: + # Archive the generated packages in the ci.appveyor.com build report. + - path: dist\* + +#on_success: +# - TODO: upload the content of dist/*.whl to a public wheelhouse +# + diff --git a/.gitignore b/.gitignore index 70eddf717..40a800245 100644 --- a/.gitignore +++ b/.gitignore @@ -91,3 +91,10 @@ coverage.xml # Sphinx documentation docs/_build/ docs/_themes/ +setup.py + +# Windows local helper files +*.bat + +# Komodo project files +*.komodoproject \ No newline at end of file diff --git a/README-MSVC.txt b/README-MSVC.txt new file mode 100644 index 000000000..db64d16c5 --- /dev/null +++ b/README-MSVC.txt @@ -0,0 +1,13 @@ +Python 2.7 Windows build has been tested with the following toolchain: + - Python 2.7.10 :) + - Microsoft Visual C++ Compiler Package for Python 2.7 http://www.microsoft.com/en-us/download/details.aspx?id=44266 + - C99 compliant stdint.h for MSVC http://msinttypes.googlecode.com/svn/trunk/stdint.h + (C99 complian stdint.h header which is not supplied with Microsoft Visual C++ compiler prior to MSVC 2010) + +Build steps: + - pip install --upgrade setuptools + - pip install cython fabric fabtools + - pip install -r requirements.txt + - python setup.py build_ext --inplace + + \ No newline at end of file diff --git a/appveyor/download.ps1 b/appveyor/download.ps1 new file mode 100644 index 000000000..8f722fca0 --- /dev/null +++ b/appveyor/download.ps1 @@ -0,0 +1,51 @@ +# Wordnet download Windows script + +$WORDNET_URL = "http://wordnetcode.princeton.edu/3.0/WordNet-3.0.tar.gz" +$WORDNET_RELATIVE_PATH = "corpora\en" + +function Download ($filename, $url) { + $webclient = New-Object System.Net.WebClient + + $basedir = $pwd.Path + "\" + $filepath = $basedir + $filename + if (Test-Path $filename) { + Write-Host "Reusing" $filepath + return $filepath + } + + # Download and retry up to 3 times in case of network transient errors. + Write-Host "Downloading" $filename "from" $url + $retry_attempts = 2 + for ($i = 0; $i -lt $retry_attempts; $i++) { + try { + $webclient.DownloadFile($url, $filepath) + break + } + Catch [Exception]{ + Start-Sleep 1 + } + } + if (Test-Path $filepath) { + Write-Host "File saved at" $filepath + } else { + # Retry once to get the error message if any at the last try + $webclient.DownloadFile($url, $filepath) + } + return $filepath +} + +function InstallWordNet () { + if((Test-Path $WORDNET_RELATIVE_PATH) -eq 0) + { + mkdir $WORDNET_RELATIVE_PATH; + } + $wordnet_fname = $WORDNET_RELATIVE_PATH + "\wordnet.tar.gz" + Download $wordnet_fname $WORDNET_URL +} + + +function main () { + InstallWordNet +} + +main \ No newline at end of file diff --git a/appveyor/install.ps1 b/appveyor/install.ps1 new file mode 100644 index 000000000..660041f8d --- /dev/null +++ b/appveyor/install.ps1 @@ -0,0 +1,249 @@ +# Based on the script to install Python and pip under Windows +# Authors: Olivier Grisel, Jonathan Helmus, Kyle Kastner, and Alex Willmer +# License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ +# +# Added: stdint.h download for Python 2.x + +$MINICONDA_URL = "http://repo.continuum.io/miniconda/" +$BASE_URL = "https://www.python.org/ftp/python/" +$GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py" +$GET_PIP_PATH = "C:\get-pip.py" +$STDINT_H_URL = "http://msinttypes.googlecode.com/svn/trunk/stdint.h" +$STDINT_H_RELATIVE_PATH = "include" + +$PYTHON_PRERELEASE_REGEX = @" +(?x) +(?\d+) +\. +(?\d+) +\. +(?\d+) +(?[a-z]{1,2}\d+) +"@ + + +function Download ($filename, $url) { + $webclient = New-Object System.Net.WebClient + + $basedir = $pwd.Path + "\" + $filepath = $basedir + $filename + if (Test-Path $filename) { + Write-Host "Reusing" $filepath + return $filepath + } + + # Download and retry up to 3 times in case of network transient errors. + Write-Host "Downloading" $filename "from" $url + $retry_attempts = 2 + for ($i = 0; $i -lt $retry_attempts; $i++) { + try { + $webclient.DownloadFile($url, $filepath) + break + } + Catch [Exception]{ + Start-Sleep 1 + } + } + if (Test-Path $filepath) { + Write-Host "File saved at" $filepath + } else { + # Retry once to get the error message if any at the last try + $webclient.DownloadFile($url, $filepath) + } + return $filepath +} + + +function ParsePythonVersion ($python_version) { + if ($python_version -match $PYTHON_PRERELEASE_REGEX) { + return ([int]$matches.major, [int]$matches.minor, [int]$matches.micro, + $matches.prerelease) + } + $version_obj = [version]$python_version + return ($version_obj.major, $version_obj.minor, $version_obj.build, "") +} + + +function DownloadPython ($python_version, $platform_suffix) { + $major, $minor, $micro, $prerelease = ParsePythonVersion $python_version + + if (($major -le 2 -and $micro -eq 0) ` + -or ($major -eq 3 -and $minor -le 2 -and $micro -eq 0) ` + ) { + $dir = "$major.$minor" + $python_version = "$major.$minor$prerelease" + } else { + $dir = "$major.$minor.$micro" + } + + if ($prerelease) { + if (($major -le 2) ` + -or ($major -eq 3 -and $minor -eq 1) ` + -or ($major -eq 3 -and $minor -eq 2) ` + -or ($major -eq 3 -and $minor -eq 3) ` + ) { + $dir = "$dir/prev" + } + } + + if (($major -le 2) -or ($major -le 3 -and $minor -le 4)) { + $ext = "msi" + if ($platform_suffix) { + $platform_suffix = ".$platform_suffix" + } + } else { + $ext = "exe" + if ($platform_suffix) { + $platform_suffix = "-$platform_suffix" + } + } + + $filename = "python-$python_version$platform_suffix.$ext" + $url = "$BASE_URL$dir/$filename" + $filepath = Download $filename $url + return $filepath +} + + +function InstallPython ($python_version, $architecture, $python_home) { + Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home + if (Test-Path $python_home) { + Write-Host $python_home "already exists, skipping." + return $false + } + if ($architecture -eq "32") { + $platform_suffix = "" + } else { + $platform_suffix = "amd64" + } + $installer_path = DownloadPython $python_version $platform_suffix + $installer_ext = [System.IO.Path]::GetExtension($installer_path) + Write-Host "Installing $installer_path to $python_home" + $install_log = $python_home + ".log" + if ($installer_ext -eq '.msi') { + InstallPythonMSI $installer_path $python_home $install_log + } else { + InstallPythonEXE $installer_path $python_home $install_log + } + if (Test-Path $python_home) { + Write-Host "Python $python_version ($architecture) installation complete" + } else { + Write-Host "Failed to install Python in $python_home" + Get-Content -Path $install_log + Exit 1 + } +} + + +function InstallPythonEXE ($exepath, $python_home, $install_log) { + $install_args = "/quiet InstallAllUsers=1 TargetDir=$python_home" + RunCommand $exepath $install_args +} + + +function InstallPythonMSI ($msipath, $python_home, $install_log) { + $install_args = "/qn /log $install_log /i $msipath TARGETDIR=$python_home" + $uninstall_args = "/qn /x $msipath" + RunCommand "msiexec.exe" $install_args + if (-not(Test-Path $python_home)) { + Write-Host "Python seems to be installed else-where, reinstalling." + RunCommand "msiexec.exe" $uninstall_args + RunCommand "msiexec.exe" $install_args + } +} + +function RunCommand ($command, $command_args) { + Write-Host $command $command_args + Start-Process -FilePath $command -ArgumentList $command_args -Wait -Passthru +} + + +function InstallPip ($python_home) { + $pip_path = $python_home + "\Scripts\pip.exe" + $python_path = $python_home + "\python.exe" + if (-not(Test-Path $pip_path)) { + Write-Host "Installing pip..." + $webclient = New-Object System.Net.WebClient + $webclient.DownloadFile($GET_PIP_URL, $GET_PIP_PATH) + Write-Host "Executing:" $python_path $GET_PIP_PATH + & $python_path $GET_PIP_PATH + } else { + Write-Host "pip already installed." + } +} + + +function DownloadMiniconda ($python_version, $platform_suffix) { + if ($python_version -eq "3.4") { + $filename = "Miniconda3-3.5.5-Windows-" + $platform_suffix + ".exe" + } else { + $filename = "Miniconda-3.5.5-Windows-" + $platform_suffix + ".exe" + } + $url = $MINICONDA_URL + $filename + $filepath = Download $filename $url + return $filepath +} + + +function InstallMiniconda ($python_version, $architecture, $python_home) { + Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home + if (Test-Path $python_home) { + Write-Host $python_home "already exists, skipping." + return $false + } + if ($architecture -eq "32") { + $platform_suffix = "x86" + } else { + $platform_suffix = "x86_64" + } + $filepath = DownloadMiniconda $python_version $platform_suffix + Write-Host "Installing" $filepath "to" $python_home + $install_log = $python_home + ".log" + $args = "/S /D=$python_home" + Write-Host $filepath $args + Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru + if (Test-Path $python_home) { + Write-Host "Python $python_version ($architecture) installation complete" + } else { + Write-Host "Failed to install Python in $python_home" + Get-Content -Path $install_log + Exit 1 + } +} + + +function InstallMinicondaPip ($python_home) { + $pip_path = $python_home + "\Scripts\pip.exe" + $conda_path = $python_home + "\Scripts\conda.exe" + if (-not(Test-Path $pip_path)) { + Write-Host "Installing pip..." + $args = "install --yes pip" + Write-Host $conda_path $args + Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru + } else { + Write-Host "pip already installed." + } +} + +function InstallStdintH ($python_version) { + $major, $minor, $micro, $prerelease = ParsePythonVersion $python_version + if ($major -le 2) { + if((Test-Path $STDINT_H_RELATIVE_PATH) -eq 0) + { + mkdir $STDINT_H_RELATIVE_PATH; + } + $stdint_h_fname = $STDINT_H_RELATIVE_PATH + "\stdint.h" + Download $stdint_h_fname $STDINT_H_URL + } else { + Write-Host $python_version " uses C99 compliant Microsoft compiler. stdint.h download is not required." + } +} + + +function main () { + InstallPython $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON + InstallPip $env:PYTHON + InstallStdintH $env:PYTHON_VERSION +} + +main \ No newline at end of file diff --git a/appveyor/run_with_env.cmd b/appveyor/run_with_env.cmd new file mode 100644 index 000000000..597e2ae6e --- /dev/null +++ b/appveyor/run_with_env.cmd @@ -0,0 +1,94 @@ +:: To build extensions for 64 bit Python 3, we need to configure environment +:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) +:: +:: To build extensions for 64 bit Python 2, we need to configure environment +:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) +:: +:: To provide C99 compatibility C:\projects\include is added to INCLUDE environment variable +:: in a believe that stdin.h has been uploaded there earlier during the installation +:: +:: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific +:: environment configurations. +:: +:: Note: this script needs to be run with the /E:ON and /V:ON flags for the +:: cmd interpreter, at least for (SDK v7.0) +:: +:: More details at: +:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows +:: http://stackoverflow.com/a/13751649/163740 +:: +:: Author: Olivier Grisel +:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ +:: +:: Notes about batch files for Python people: +:: +:: Quotes in values are literally part of the values: +:: SET FOO="bar" +:: FOO is now five characters long: " b a r " +:: If you don't want quotes, don't include them on the right-hand side. +:: +:: The CALL lines at the end of this file look redundant, but if you move them +:: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y +:: case, I don't know why. +@ECHO OFF + +SET COMMAND_TO_RUN=%* +SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows +SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf + +:: Extract the major and minor versions, and allow for the minor version to be +:: more than 9. This requires the version number to have two dots in it. +SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1% +IF "%PYTHON_VERSION:~3,1%" == "." ( + SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1% +) ELSE ( + SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2% +) + +:: Based on the Python version, determine what SDK version to use, and whether +:: to set the SDK for 64-bit. +:: For Python 2.x INCLUDE environment variable is supplemented by local include directory +:: which is supposed to contain C99 compliant stdint.h +IF %MAJOR_PYTHON_VERSION% == 2 ( + SET WINDOWS_SDK_VERSION="v7.0" + SET SET_SDK_64=Y + SET INCLUDE=%CD%\include;%INCLUDE% +) ELSE ( + IF %MAJOR_PYTHON_VERSION% == 3 ( + SET WINDOWS_SDK_VERSION="v7.1" + IF %MINOR_PYTHON_VERSION% LEQ 4 ( + SET SET_SDK_64=Y + ) ELSE ( + SET SET_SDK_64=N + IF EXIST "%WIN_WDK%" ( + :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ + REN "%WIN_WDK%" 0wdf + ) + ) + ) ELSE ( + ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" + EXIT 1 + ) +) + +IF %PYTHON_ARCH% == 64 ( + IF %SET_SDK_64% == Y ( + ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture + SET DISTUTILS_USE_SDK=1 + SET MSSdk=1 + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 + ) ELSE ( + ECHO Using default MSVC build environment for 64 bit architecture + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 + ) +) ELSE ( + ECHO Using default MSVC build environment for 32 bit architecture + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 +) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6a63178a7..dcf2c6dea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ cython cymem == 1.11 pathlib -preshed >= 0.41 +preshed >= 0.42 thinc == 3.3 murmurhash == 0.24 text-unidecode diff --git a/setup.py b/setup.py index 32149044a..21f9c6f53 100644 --- a/setup.py +++ b/setup.py @@ -8,8 +8,44 @@ from os import path from setuptools import Extension from distutils import sysconfig +from distutils.core import setup, Extension +from distutils.command.build_ext import build_ext + import platform +# By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options +# http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used +compile_options = {'msvc' : ['/Ox', '/EHsc'] , + 'other' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function'] } +link_options = {'msvc' : [] , + 'other' : [] } +class build_ext_options: + def build_options(self): + c_type = None + if self.compiler.compiler_type in compile_options: + c_type = self.compiler.compiler_type + elif 'other' in compile_options: + c_type = 'other' + if c_type is not None: + for e in self.extensions: + e.extra_compile_args = compile_options[c_type] + + l_type = None + if self.compiler.compiler_type in link_options: + l_type = self.compiler.compiler_type + elif 'other' in link_options: + l_type = 'other' + if l_type is not None: + for e in self.extensions: + e.extra_link_args = link_options[l_type] + +class build_ext_subclass( build_ext, build_ext_options ): + def build_extensions(self): + build_ext_options.build_options(self) + build_ext.build_extensions(self) + + + # PyPy --- NB! PyPy doesn't really work, it segfaults all over the place. But, # this is necessary to get it compile. # We have to resort to monkey-patching to set the compiler, because pypy broke @@ -61,24 +97,27 @@ def name_to_path(mod_name, ext): return '%s.%s' % (mod_name.replace('.', '/'), ext) -def c_ext(mod_name, language, includes, compile_args, link_args): +def c_ext(mod_name, language, includes): mod_path = name_to_path(mod_name, language) - return Extension(mod_name, [mod_path], include_dirs=includes, - extra_compile_args=compile_args, extra_link_args=link_args) + return Extension(mod_name, [mod_path], include_dirs=includes) -def cython_setup(mod_names, language, includes, compile_args, link_args): +def cython_setup(mod_names, language, includes): import Cython.Distutils import Cython.Build import distutils.core + class build_ext_cython_subclass( Cython.Distutils.build_ext, build_ext_options ): + def build_extensions(self): + build_ext_options.build_options(self) + Cython.Distutils.build_ext.build_extensions(self) + if language == 'cpp': language = 'c++' exts = [] for mod_name in mod_names: mod_path = mod_name.replace('.', '/') + '.pyx' - e = Extension(mod_name, [mod_path], language=language, include_dirs=includes, - extra_compile_args=compile_args, extra_link_args=link_args) + e = Extension(mod_name, [mod_path], language=language, include_dirs=includes) exts.append(e) distutils.core.setup( name='spacy', @@ -97,7 +136,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args): "data/vocab/strings.txt"], "spacy.syntax": ["*.pxd"]}, ext_modules=exts, - cmdclass={'build_ext': Cython.Distutils.build_ext}, + cmdclass={'build_ext': build_ext_cython_subclass}, license="Dual: Commercial or AGPL", ) @@ -122,10 +161,11 @@ def run_setup(exts): "spacy.syntax": ["*.pxd"]}, ext_modules=exts, license="MIT", - install_requires=['numpy', 'murmurhash', 'cymem >= 1.11', 'preshed >= 0.41', + install_requires=['numpy', 'murmurhash', 'cymem >= 1.11', 'preshed >= 0.42', 'thinc == 3.3', "text_unidecode", 'wget', 'plac', 'six', 'ujson', 'cloudpickle'], setup_requires=["headers_workaround"], + cmdclass = {'build_ext': build_ext_subclass }, ) import headers_workaround @@ -139,19 +179,16 @@ VERSION = '0.95' def main(modules, is_pypy): language = "cpp" includes = ['.', path.join(sys.prefix, 'include')] - compile_args = ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function'] - link_args = [] if sys.prefix == 'darwin': - compile_args.append(['-mmacosx-version-min=10.8', '-stdlib=libc++']) - link_args.append('-lc++') + compile_options['other'].append(['-mmacosx-version-min=10.8', '-stdlib=libc++']) + link_opions['other'].append('-lc++') if use_cython: - cython_setup(modules, language, includes, compile_args, link_args) + cython_setup(modules, language, includes) else: - exts = [c_ext(mn, language, includes, compile_args, link_args) + exts = [c_ext(mn, language, includes) for mn in modules] run_setup(exts) - MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings', 'spacy.lexeme', 'spacy.vocab', 'spacy.attrs', 'spacy.morphology', 'spacy.tagger',