mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 12:20:20 +03:00
Compare commits
18 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
a70b5c186b | ||
|
a5b6d92ea3 | ||
|
fe0bae9159 | ||
|
8e9a84952a | ||
|
ca0cae2074 | ||
|
46234a5221 | ||
|
19f64ee18a | ||
|
07e630cd03 | ||
|
4297ca43cf | ||
|
b32e143326 | ||
|
2e91e07388 | ||
|
dca663a2ef | ||
|
cae72e46dd | ||
|
22287c89c0 | ||
|
2c1de4b9a4 | ||
|
5e7e7cda94 | ||
|
6ce9f0469f | ||
|
6ffb395d68 |
23
.travis.yml
23
.travis.yml
|
@ -1,23 +0,0 @@
|
||||||
language: python
|
|
||||||
sudo: false
|
|
||||||
cache: pip
|
|
||||||
dist: trusty
|
|
||||||
group: edge
|
|
||||||
python:
|
|
||||||
- "2.7"
|
|
||||||
os:
|
|
||||||
- linux
|
|
||||||
install:
|
|
||||||
- "python -m pip install -U pip setuptools"
|
|
||||||
- "pip install -e . --prefer-binary"
|
|
||||||
script:
|
|
||||||
- "cat /proc/cpuinfo | grep flags | head -n 1"
|
|
||||||
- "pip install -r requirements.txt"
|
|
||||||
- "python -m pytest --tb=native spacy"
|
|
||||||
branches:
|
|
||||||
except:
|
|
||||||
- spacy.io
|
|
||||||
notifications:
|
|
||||||
slack:
|
|
||||||
secure: F8GvqnweSdzImuLL64TpfG0i5rYl89liyr9tmFVsHl4c0DNiDuGhZivUz0M1broS8svE3OPOllLfQbACG/4KxD890qfF9MoHzvRDlp7U+RtwMV/YAkYn8MGWjPIbRbX0HpGdY7O2Rc9Qy4Kk0T8ZgiqXYIqAz2Eva9/9BlSmsJQ=
|
|
||||||
email: false
|
|
|
@ -7,3 +7,4 @@ include pyproject.toml
|
||||||
recursive-exclude spacy/lang *.json
|
recursive-exclude spacy/lang *.json
|
||||||
recursive-include spacy/lang *.json.gz
|
recursive-include spacy/lang *.json.gz
|
||||||
recursive-include licenses *
|
recursive-include licenses *
|
||||||
|
recursive-exclude spacy *.cpp
|
||||||
|
|
|
@ -18,7 +18,6 @@ It's commercial open-source software, released under the MIT license.
|
||||||
[Check out the release notes here.](https://github.com/explosion/spaCy/releases/tag/v3.0.0rc1)
|
[Check out the release notes here.](https://github.com/explosion/spaCy/releases/tag/v3.0.0rc1)
|
||||||
|
|
||||||
[>)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
|
[>)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
|
||||||
[>)](https://travis-ci.org/explosion/spaCy)
|
|
||||||
[](https://github.com/explosion/spaCy/releases)
|
[](https://github.com/explosion/spaCy/releases)
|
||||||
[](https://pypi.org/project/spacy/)
|
[](https://pypi.org/project/spacy/)
|
||||||
[](https://anaconda.org/conda-forge/spacy)
|
[](https://anaconda.org/conda-forge/spacy)
|
||||||
|
|
|
@ -21,7 +21,7 @@ jobs:
|
||||||
# defined in .flake8 and overwrites the selected codes.
|
# defined in .flake8 and overwrites the selected codes.
|
||||||
- job: 'Validate'
|
- job: 'Validate'
|
||||||
pool:
|
pool:
|
||||||
vmImage: 'ubuntu-16.04'
|
vmImage: 'ubuntu-latest'
|
||||||
steps:
|
steps:
|
||||||
- task: UsePythonVersion@0
|
- task: UsePythonVersion@0
|
||||||
inputs:
|
inputs:
|
||||||
|
@ -35,50 +35,37 @@ jobs:
|
||||||
dependsOn: 'Validate'
|
dependsOn: 'Validate'
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
Python35Linux:
|
# Test on one OS per python 3.6/3.7/3.8/3.9 to speed up CI
|
||||||
imageName: 'ubuntu-16.04'
|
|
||||||
python.version: '3.5'
|
|
||||||
os: linux
|
|
||||||
Python35Windows:
|
|
||||||
imageName: 'vs2017-win2016'
|
|
||||||
python.version: '3.5'
|
|
||||||
# Test on one OS per python 3.6/3.7/3.8 to speed up CI
|
|
||||||
Python36Linux:
|
Python36Linux:
|
||||||
imageName: 'ubuntu-16.04'
|
imageName: 'ubuntu-20.04'
|
||||||
python.version: '3.6'
|
python.version: '3.6'
|
||||||
# Python36Windows:
|
|
||||||
# imageName: 'vs2017-win2016'
|
|
||||||
# python.version: '3.6'
|
|
||||||
# Python36Mac:
|
|
||||||
# imageName: 'macos-10.14'
|
|
||||||
# python.version: '3.6'
|
|
||||||
# Python37Linux:
|
|
||||||
# imageName: 'ubuntu-16.04'
|
|
||||||
# python.version: '3.7'
|
|
||||||
Python37Windows:
|
Python37Windows:
|
||||||
imageName: 'vs2017-win2016'
|
imageName: 'windows-latest'
|
||||||
python.version: '3.7'
|
python.version: '3.7'
|
||||||
# Python37Mac:
|
|
||||||
# imageName: 'macos-10.14'
|
|
||||||
# python.version: '3.7'
|
|
||||||
# Python38Linux:
|
|
||||||
# imageName: 'ubuntu-16.04'
|
|
||||||
# python.version: '3.8'
|
|
||||||
# Python38Windows:
|
|
||||||
# imageName: 'vs2017-win2016'
|
|
||||||
# python.version: '3.8'
|
|
||||||
Python38Mac:
|
Python38Mac:
|
||||||
imageName: 'macos-10.14'
|
imageName: 'macos-latest'
|
||||||
python.version: '3.8'
|
python.version: '3.8'
|
||||||
Python39Linux:
|
Python39Linux:
|
||||||
imageName: 'ubuntu-16.04'
|
imageName: 'ubuntu-latest'
|
||||||
python.version: '3.9'
|
|
||||||
Python39Windows:
|
|
||||||
imageName: 'vs2017-win2016'
|
|
||||||
python.version: '3.9'
|
|
||||||
Python39Mac:
|
|
||||||
imageName: 'macos-10.14'
|
|
||||||
python.version: '3.9'
|
python.version: '3.9'
|
||||||
|
Python310Linux:
|
||||||
|
imageName: 'ubuntu-latest'
|
||||||
|
python.version: '3.10'
|
||||||
|
Python310Windows:
|
||||||
|
imageName: 'windows-latest'
|
||||||
|
python.version: '3.10'
|
||||||
|
Python310Mac:
|
||||||
|
imageName: 'macos-latest'
|
||||||
|
python.version: '3.10'
|
||||||
|
Python311Linux:
|
||||||
|
imageName: 'ubuntu-latest'
|
||||||
|
python.version: '3.11'
|
||||||
|
Python311Windows:
|
||||||
|
imageName: 'windows-latest'
|
||||||
|
python.version: '3.11'
|
||||||
|
Python311Mac:
|
||||||
|
imageName: 'macos-latest'
|
||||||
|
python.version: '3.11'
|
||||||
maxParallel: 4
|
maxParallel: 4
|
||||||
pool:
|
pool:
|
||||||
vmImage: $(imageName)
|
vmImage: $(imageName)
|
||||||
|
@ -88,17 +75,13 @@ jobs:
|
||||||
inputs:
|
inputs:
|
||||||
versionSpec: '$(python.version)'
|
versionSpec: '$(python.version)'
|
||||||
architecture: 'x64'
|
architecture: 'x64'
|
||||||
|
allowUnstable: true
|
||||||
|
|
||||||
- script: python -m pip install -U pip setuptools
|
- script: python -m pip install -U pip setuptools
|
||||||
displayName: 'Update pip'
|
displayName: 'Update pip'
|
||||||
|
|
||||||
- script: pip install -r requirements.txt --prefer-binary
|
|
||||||
displayName: 'Install dependencies (python 3.5: prefer binary)'
|
|
||||||
condition: eq(variables['python.version'], '3.5')
|
|
||||||
|
|
||||||
- script: pip install -r requirements.txt
|
- script: pip install -r requirements.txt
|
||||||
displayName: 'Install dependencies'
|
displayName: 'Install dependencies'
|
||||||
condition: not(eq(variables['python.version'], '3.5'))
|
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
python setup.py build_ext --inplace -j 2
|
python setup.py build_ext --inplace -j 2
|
||||||
|
@ -115,19 +98,18 @@ jobs:
|
||||||
pip uninstall -y -r installed.txt
|
pip uninstall -y -r installed.txt
|
||||||
displayName: 'Uninstall all packages'
|
displayName: 'Uninstall all packages'
|
||||||
|
|
||||||
- bash: |
|
|
||||||
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
|
||||||
pip install dist/$SDIST --prefer-binary
|
|
||||||
displayName: 'Install from sdist (python 3.5: prefer binary)'
|
|
||||||
condition: eq(variables['python.version'], '3.5')
|
|
||||||
|
|
||||||
- bash: |
|
- bash: |
|
||||||
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||||
pip install dist/$SDIST
|
pip install dist/$SDIST
|
||||||
displayName: 'Install from sdist'
|
displayName: 'Install from sdist'
|
||||||
condition: not(eq(variables['python.version'], '3.5'))
|
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
pip install -r requirements.txt --prefer-binary
|
pip install -r requirements.txt
|
||||||
python -m pytest --pyargs spacy
|
python -m pytest --pyargs spacy
|
||||||
displayName: 'Run tests'
|
displayName: 'Run tests'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
python -m spacy download en_core_web_sm
|
||||||
|
python -c "import spacy; nlp=spacy.load('en_core_web_sm'); doc=nlp('test')"
|
||||||
|
displayName: 'Test download CLI'
|
||||||
|
condition: eq(variables['python.version'], '3.9')
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
# build version constraints for use with wheelwright + multibuild
|
# build version constraints for use with wheelwright + multibuild
|
||||||
numpy==1.15.0; python_version<='3.7'
|
numpy==1.15.0; python_version<='3.7' and platform_machine!='aarch64'
|
||||||
numpy==1.17.3; python_version=='3.8'
|
numpy==1.19.2; python_version<='3.7' and platform_machine=='aarch64'
|
||||||
|
numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
|
||||||
|
numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
|
||||||
numpy==1.19.3; python_version=='3.9'
|
numpy==1.19.3; python_version=='3.9'
|
||||||
numpy; python_version>='3.10'
|
numpy==1.21.3; python_version=='3.10'
|
||||||
|
numpy==1.23.2; python_version=='3.11'
|
||||||
|
numpy; python_version>='3.12'
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = [
|
requires = [
|
||||||
"setuptools",
|
"setuptools",
|
||||||
"cython>=0.25",
|
"cython>=0.25,<3.0",
|
||||||
"cymem>=2.0.2,<2.1.0",
|
"cymem>=2.0.2,<2.1.0",
|
||||||
"preshed>=3.0.2,<3.1.0",
|
"preshed>=3.0.2,<3.1.0",
|
||||||
"murmurhash>=0.28.0,<1.1.0",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
|
|
|
@ -17,8 +17,8 @@ tqdm>=4.38.0,<5.0.0
|
||||||
pyrsistent<0.17.0
|
pyrsistent<0.17.0
|
||||||
jsonschema>=2.6.0,<3.1.0
|
jsonschema>=2.6.0,<3.1.0
|
||||||
# Development dependencies
|
# Development dependencies
|
||||||
cython>=0.25
|
cython>=0.25,<3.0
|
||||||
pytest>=4.6.5
|
pytest>=4.6.5
|
||||||
pytest-timeout>=1.3.0,<2.0.0
|
pytest-timeout>=1.3.0,<2.0.0
|
||||||
mock>=2.0.0,<3.0.0
|
mock>=2.0.0,<3.0.0
|
||||||
flake8>=3.5.0,<3.6.0
|
flake8>=3.5.0,<6.0.0
|
||||||
|
|
|
@ -24,6 +24,8 @@ classifiers =
|
||||||
Programming Language :: Python :: 3.7
|
Programming Language :: Python :: 3.7
|
||||||
Programming Language :: Python :: 3.8
|
Programming Language :: Python :: 3.8
|
||||||
Programming Language :: Python :: 3.9
|
Programming Language :: Python :: 3.9
|
||||||
|
Programming Language :: Python :: 3.10
|
||||||
|
Programming Language :: Python :: 3.11
|
||||||
Topic :: Scientific/Engineering
|
Topic :: Scientific/Engineering
|
||||||
|
|
||||||
[options]
|
[options]
|
||||||
|
@ -33,7 +35,7 @@ scripts =
|
||||||
bin/spacy
|
bin/spacy
|
||||||
python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*
|
python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*
|
||||||
setup_requires =
|
setup_requires =
|
||||||
cython>=0.25
|
cython>=0.25,<3.0
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
# We also need our Cython packages here to compile against
|
# We also need our Cython packages here to compile against
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
|
|
135
setup.py
135
setup.py
|
@ -1,16 +1,17 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import contextlib
|
|
||||||
import numpy
|
import numpy
|
||||||
|
from pathlib import Path
|
||||||
from distutils.command.build_ext import build_ext
|
from distutils.command.build_ext import build_ext
|
||||||
from distutils.sysconfig import get_python_inc
|
from distutils.sysconfig import get_python_inc
|
||||||
import distutils.util
|
import distutils.util
|
||||||
from distutils import ccompiler, msvccompiler
|
from distutils import ccompiler, msvccompiler
|
||||||
from setuptools import Extension, setup, find_packages
|
from setuptools import Extension, setup, find_packages
|
||||||
|
from Cython.Build import cythonize
|
||||||
|
from Cython.Compiler import Options
|
||||||
|
|
||||||
|
|
||||||
def is_new_osx():
|
def is_new_osx():
|
||||||
|
@ -28,6 +29,10 @@ def is_new_osx():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# Preserve `__doc__` on functions and classes
|
||||||
|
# http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options
|
||||||
|
Options.docstrings = True
|
||||||
|
|
||||||
PACKAGES = find_packages()
|
PACKAGES = find_packages()
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,6 +79,12 @@ COMPILE_OPTIONS = {
|
||||||
|
|
||||||
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
|
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
|
||||||
|
|
||||||
|
COMPILER_DIRECTIVES = {
|
||||||
|
"language_level": -3,
|
||||||
|
"embedsignature": True,
|
||||||
|
"annotation_typing": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if is_new_osx():
|
if is_new_osx():
|
||||||
# On Mac, use libc++ because Apple deprecated use of
|
# On Mac, use libc++ because Apple deprecated use of
|
||||||
|
@ -105,20 +116,6 @@ class build_ext_subclass(build_ext, build_ext_options):
|
||||||
build_ext.build_extensions(self)
|
build_ext.build_extensions(self)
|
||||||
|
|
||||||
|
|
||||||
def generate_cython(root, source):
|
|
||||||
print("Cythonizing sources")
|
|
||||||
p = subprocess.call(
|
|
||||||
[sys.executable, os.path.join(root, "bin", "cythonize.py"), source],
|
|
||||||
env=os.environ,
|
|
||||||
)
|
|
||||||
if p != 0:
|
|
||||||
raise RuntimeError("Running cythonize failed")
|
|
||||||
|
|
||||||
|
|
||||||
def is_source_release(path):
|
|
||||||
return os.path.exists(os.path.join(path, "PKG-INFO"))
|
|
||||||
|
|
||||||
|
|
||||||
# Include the git version in the build (adapted from NumPy)
|
# Include the git version in the build (adapted from NumPy)
|
||||||
# Copyright (c) 2005-2020, NumPy Developers.
|
# Copyright (c) 2005-2020, NumPy Developers.
|
||||||
# BSD 3-Clause license, see licenses/3rd_party_licenses.txt
|
# BSD 3-Clause license, see licenses/3rd_party_licenses.txt
|
||||||
|
@ -142,7 +139,7 @@ def write_git_info_py(filename="spacy/git_info.py"):
|
||||||
try:
|
try:
|
||||||
out = _minimal_ext_cmd(["git", "rev-parse", "--short", "HEAD"])
|
out = _minimal_ext_cmd(["git", "rev-parse", "--short", "HEAD"])
|
||||||
git_version = out.strip().decode("ascii")
|
git_version = out.strip().decode("ascii")
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
elif os.path.exists(filename):
|
elif os.path.exists(filename):
|
||||||
# must be a source distribution, use existing version file
|
# must be a source distribution, use existing version file
|
||||||
|
@ -150,7 +147,7 @@ def write_git_info_py(filename="spacy/git_info.py"):
|
||||||
a = open(filename, "r")
|
a = open(filename, "r")
|
||||||
lines = a.readlines()
|
lines = a.readlines()
|
||||||
git_version = lines[-1].split('"')[1]
|
git_version = lines[-1].split('"')[1]
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
finally:
|
finally:
|
||||||
a.close()
|
a.close()
|
||||||
|
@ -169,83 +166,51 @@ GIT_VERSION = "%(git_version)s"
|
||||||
|
|
||||||
|
|
||||||
def clean(path):
|
def clean(path):
|
||||||
for name in MOD_NAMES:
|
for path in path.glob("**/*"):
|
||||||
name = name.replace(".", "/")
|
if path.is_file() and path.suffix in (".so", ".cpp"):
|
||||||
for ext in [".so", ".html", ".cpp", ".c"]:
|
print("Deleting", path.name)
|
||||||
file_path = os.path.join(path, name + ext)
|
path.unlink()
|
||||||
if os.path.exists(file_path):
|
|
||||||
os.unlink(file_path)
|
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def chdir(new_dir):
|
|
||||||
old_dir = os.getcwd()
|
|
||||||
try:
|
|
||||||
os.chdir(new_dir)
|
|
||||||
sys.path.insert(0, new_dir)
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
del sys.path[0]
|
|
||||||
os.chdir(old_dir)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_package():
|
def setup_package():
|
||||||
write_git_info_py()
|
write_git_info_py()
|
||||||
|
|
||||||
root = os.path.abspath(os.path.dirname(__file__))
|
root = Path(__file__).parent
|
||||||
|
|
||||||
if hasattr(sys, "argv") and len(sys.argv) > 1 and sys.argv[1] == "clean":
|
if hasattr(sys, "argv") and len(sys.argv) > 1 and sys.argv[1] == "clean":
|
||||||
return clean(root)
|
return clean(root / "spacy")
|
||||||
|
|
||||||
with chdir(root):
|
with (root / "spacy" / "about.py").open("r") as f:
|
||||||
with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f:
|
about = {}
|
||||||
about = {}
|
exec(f.read(), about)
|
||||||
exec(f.read(), about)
|
|
||||||
|
|
||||||
include_dirs = [
|
include_dirs = [
|
||||||
numpy.get_include(),
|
get_python_inc(plat_specific=True),
|
||||||
get_python_inc(plat_specific=True),
|
numpy.get_include(),
|
||||||
os.path.join(root, "include"),
|
str(root / "include"),
|
||||||
]
|
]
|
||||||
|
if (
|
||||||
|
ccompiler.new_compiler().compiler_type == "msvc"
|
||||||
|
and msvccompiler.get_build_version() == 9
|
||||||
|
):
|
||||||
|
include_dirs.append(str(root / "include" / "msvc9"))
|
||||||
|
ext_modules = []
|
||||||
|
for name in MOD_NAMES:
|
||||||
|
mod_path = name.replace(".", "/") + ".pyx"
|
||||||
|
ext = Extension(name, [mod_path], language="c++")
|
||||||
|
ext_modules.append(ext)
|
||||||
|
print("Cythonizing sources")
|
||||||
|
ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)
|
||||||
|
|
||||||
if (
|
setup(
|
||||||
ccompiler.new_compiler().compiler_type == "msvc"
|
name="spacy",
|
||||||
and msvccompiler.get_build_version() == 9
|
packages=PACKAGES,
|
||||||
):
|
version=about["__version__"],
|
||||||
include_dirs.append(os.path.join(root, "include", "msvc9"))
|
ext_modules=ext_modules,
|
||||||
|
cmdclass={"build_ext": build_ext_subclass},
|
||||||
ext_modules = []
|
include_dirs=include_dirs,
|
||||||
for mod_name in MOD_NAMES:
|
package_data={"": ["*.pyx", "*.pxd", "*.pxi"]},
|
||||||
mod_path = mod_name.replace(".", "/") + ".cpp"
|
)
|
||||||
extra_link_args = []
|
|
||||||
# ???
|
|
||||||
# Imported from patch from @mikepb
|
|
||||||
# See Issue #267. Running blind here...
|
|
||||||
if sys.platform == "darwin":
|
|
||||||
dylib_path = [".." for _ in range(mod_name.count("."))]
|
|
||||||
dylib_path = "/".join(dylib_path)
|
|
||||||
dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path
|
|
||||||
extra_link_args.append("-Wl,-rpath,%s" % dylib_path)
|
|
||||||
ext_modules.append(
|
|
||||||
Extension(
|
|
||||||
mod_name,
|
|
||||||
[mod_path],
|
|
||||||
language="c++",
|
|
||||||
include_dirs=include_dirs,
|
|
||||||
extra_link_args=extra_link_args,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not is_source_release(root):
|
|
||||||
generate_cython(root, "spacy")
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name="spacy",
|
|
||||||
packages=PACKAGES,
|
|
||||||
version=about["__version__"],
|
|
||||||
ext_modules=ext_modules,
|
|
||||||
cmdclass={"build_ext": build_ext_subclass},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# fmt: off
|
# fmt: off
|
||||||
__title__ = "spacy"
|
__title__ = "spacy"
|
||||||
__version__ = "2.3.5"
|
__version__ = "2.3.9"
|
||||||
__release__ = True
|
__release__ = True
|
||||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||||
|
|
|
@ -128,6 +128,6 @@ def get_version(model, comp):
|
||||||
|
|
||||||
def download_model(filename, user_pip_args=None):
|
def download_model(filename, user_pip_args=None):
|
||||||
download_url = about.__download_url__ + "/" + filename
|
download_url = about.__download_url__ + "/" + filename
|
||||||
pip_args = user_pip_args if user_pip_args is not None else []
|
pip_args = list(user_pip_args) if user_pip_args is not None else []
|
||||||
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
|
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
|
||||||
return subprocess.call(cmd, env=os.environ.copy())
|
return subprocess.call(cmd, env=os.environ.copy())
|
||||||
|
|
|
@ -295,7 +295,7 @@ def make_docs(nlp, batch, min_length, max_length):
|
||||||
raise ValueError(Errors.E138.format(text=record))
|
raise ValueError(Errors.E138.format(text=record))
|
||||||
if "heads" in record:
|
if "heads" in record:
|
||||||
heads = record["heads"]
|
heads = record["heads"]
|
||||||
heads = numpy.asarray(heads, dtype="uint64")
|
heads = numpy.asarray([numpy.array(h).astype(numpy.uint64) for h in heads], dtype="uint64")
|
||||||
heads = heads.reshape((len(doc), 1))
|
heads = heads.reshape((len(doc), 1))
|
||||||
doc = doc.from_array([HEAD], heads)
|
doc = doc.from_array([HEAD], heads)
|
||||||
if len(doc) >= min_length and len(doc) < max_length:
|
if len(doc) >= min_length and len(doc) < max_length:
|
||||||
|
|
|
@ -6,7 +6,13 @@ from spacy.util import get_lang_class
|
||||||
|
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
parser.addoption("--slow", action="store_true", help="include slow tests")
|
try:
|
||||||
|
parser.addoption("--slow", action="store_true", help="include slow tests")
|
||||||
|
parser.addoption("--issue", action="store", help="test specific issues")
|
||||||
|
# Options are already added, e.g. if conftest is copied in a build pipeline
|
||||||
|
# and runs twice
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def pytest_runtest_setup(item):
|
def pytest_runtest_setup(item):
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import numpy
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
from spacy.attrs import ORTH, SHAPE, POS, DEP
|
from spacy.attrs import ORTH, SHAPE, POS, DEP
|
||||||
|
@ -91,14 +92,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
|
||||||
|
|
||||||
# head before start
|
# head before start
|
||||||
arr = doc.to_array(["HEAD"])
|
arr = doc.to_array(["HEAD"])
|
||||||
arr[0] = -1
|
arr[0] = numpy.int32(-1).astype(numpy.uint64)
|
||||||
doc_from_array = Doc(en_vocab, words=words)
|
doc_from_array = Doc(en_vocab, words=words)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
doc_from_array.from_array(["HEAD"], arr)
|
doc_from_array.from_array(["HEAD"], arr)
|
||||||
|
|
||||||
# head after end
|
# head after end
|
||||||
arr = doc.to_array(["HEAD"])
|
arr = doc.to_array(["HEAD"])
|
||||||
arr[0] = 5
|
arr[0] = numpy.int32(5).astype(numpy.uint64)
|
||||||
doc_from_array = Doc(en_vocab, words=words)
|
doc_from_array = Doc(en_vocab, words=words)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
doc_from_array.from_array(["HEAD"], arr)
|
doc_from_array.from_array(["HEAD"], arr)
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import numpy
|
||||||
|
from numpy.testing import assert_array_equal
|
||||||
from spacy.attrs import ORTH, LENGTH
|
from spacy.attrs import ORTH, LENGTH
|
||||||
from spacy.tokens import Doc, Span
|
from spacy.tokens import Doc, Span
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
|
@ -118,6 +120,14 @@ def test_spans_lca_matrix(en_tokenizer):
|
||||||
assert lca[1, 0] == 1 # slept & dog -> slept
|
assert lca[1, 0] == 1 # slept & dog -> slept
|
||||||
assert lca[1, 1] == 1 # slept & slept -> slept
|
assert lca[1, 1] == 1 # slept & slept -> slept
|
||||||
|
|
||||||
|
# example from Span API docs
|
||||||
|
tokens = en_tokenizer("I like New York in Autumn")
|
||||||
|
doc = get_doc(
|
||||||
|
tokens.vocab, words=[t.text for t in tokens], heads=[1, 0, 1, -2, -1, -1]
|
||||||
|
)
|
||||||
|
lca = doc[1:4].get_lca_matrix()
|
||||||
|
assert_array_equal(lca, numpy.asarray([[0, 0, 0], [0, 1, 2], [0, 2, 2]]))
|
||||||
|
|
||||||
|
|
||||||
def test_span_similarity_match():
|
def test_span_similarity_match():
|
||||||
doc = Doc(Vocab(), words=["a", "b", "a", "b"])
|
doc = Doc(Vocab(), words=["a", "b", "a", "b"])
|
||||||
|
|
|
@ -37,9 +37,9 @@ def test_en_noun_chunks_not_nested(en_vocab):
|
||||||
[0, root],
|
[0, root],
|
||||||
[4, amod],
|
[4, amod],
|
||||||
[3, nmod],
|
[3, nmod],
|
||||||
[-1, cc],
|
[numpy.int32(-1).astype(numpy.uint64), cc],
|
||||||
[-2, conj],
|
[numpy.int32(-2).astype(numpy.uint64), conj],
|
||||||
[-5, dobj],
|
[numpy.int32(-5).astype(numpy.uint64), dobj],
|
||||||
],
|
],
|
||||||
dtype="uint64",
|
dtype="uint64",
|
||||||
),
|
),
|
||||||
|
|
|
@ -58,11 +58,12 @@ def get_doc(
|
||||||
for annot in annotations:
|
for annot in annotations:
|
||||||
if annot:
|
if annot:
|
||||||
if annot is heads:
|
if annot is heads:
|
||||||
|
annot = numpy.array(heads, dtype=numpy.int32).astype(numpy.uint64)
|
||||||
for i in range(len(words)):
|
for i in range(len(words)):
|
||||||
if attrs.ndim == 1:
|
if attrs.ndim == 1:
|
||||||
attrs[i] = heads[i]
|
attrs[i] = annot[i]
|
||||||
else:
|
else:
|
||||||
attrs[i, j] = heads[i]
|
attrs[i, j] = annot[i]
|
||||||
else:
|
else:
|
||||||
for i in range(len(words)):
|
for i in range(len(words)):
|
||||||
if attrs.ndim == 1:
|
if attrs.ndim == 1:
|
||||||
|
|
|
@ -805,7 +805,7 @@ cdef class Doc:
|
||||||
`(M, N)` array of attributes.
|
`(M, N)` array of attributes.
|
||||||
|
|
||||||
attrs (list) A list of attribute ID ints.
|
attrs (list) A list of attribute ID ints.
|
||||||
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
|
array (numpy.ndarray[ndim=2, dtype='uint64']): The attribute values.
|
||||||
RETURNS (Doc): Itself.
|
RETURNS (Doc): Itself.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#from_array
|
DOCS: https://spacy.io/api/doc#from_array
|
||||||
|
@ -845,9 +845,9 @@ cdef class Doc:
|
||||||
col = attrs.index(HEAD)
|
col = attrs.index(HEAD)
|
||||||
for i in range(length):
|
for i in range(length):
|
||||||
# cast index to signed int
|
# cast index to signed int
|
||||||
abs_head_index = numpy.int32(array[i, col]) + i
|
abs_head_index = array[i, col].astype(numpy.int32) + i
|
||||||
if abs_head_index < 0 or abs_head_index >= length:
|
if abs_head_index < 0 or abs_head_index >= length:
|
||||||
raise ValueError(Errors.E190.format(index=i, value=array[i, col], rel_head_index=numpy.int32(array[i, col])))
|
raise ValueError(Errors.E190.format(index=i, value=array[i, col], rel_head_index=abs_head_index-i))
|
||||||
# Do TAG first. This lets subsequent loop override stuff like POS, LEMMA
|
# Do TAG first. This lets subsequent loop override stuff like POS, LEMMA
|
||||||
if TAG in attrs:
|
if TAG in attrs:
|
||||||
col = attrs.index(TAG)
|
col = attrs.index(TAG)
|
||||||
|
@ -1351,7 +1351,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
|
||||||
j_idx_in_sent = start + j - sent_start
|
j_idx_in_sent = start + j - sent_start
|
||||||
n_missing_tokens_in_sent = len(sent) - j_idx_in_sent
|
n_missing_tokens_in_sent = len(sent) - j_idx_in_sent
|
||||||
# make sure we do not go past `end`, in cases where `end` < sent.end
|
# make sure we do not go past `end`, in cases where `end` < sent.end
|
||||||
max_range = min(j + n_missing_tokens_in_sent, end)
|
max_range = min(j + n_missing_tokens_in_sent, end - start)
|
||||||
for k in range(j + 1, max_range):
|
for k in range(j + 1, max_range):
|
||||||
lca = _get_tokens_lca(token_j, doc[start + k])
|
lca = _get_tokens_lca(token_j, doc[start + k])
|
||||||
# if lca is outside of span, we set it to -1
|
# if lca is outside of span, we set it to -1
|
||||||
|
|
|
@ -272,7 +272,7 @@ cdef class Span:
|
||||||
for ancestor in ancestors:
|
for ancestor in ancestors:
|
||||||
ancestor_i = ancestor.i - self.start
|
ancestor_i = ancestor.i - self.start
|
||||||
if ancestor_i in range(length):
|
if ancestor_i in range(length):
|
||||||
array[i, head_col] = ancestor_i - i
|
array[i, head_col] = numpy.int32(ancestor_i - i).astype(numpy.uint64)
|
||||||
|
|
||||||
# if there is no appropriate ancestor, define a new artificial root
|
# if there is no appropriate ancestor, define a new artificial root
|
||||||
value = array[i, head_col]
|
value = array[i, head_col]
|
||||||
|
@ -280,7 +280,7 @@ cdef class Span:
|
||||||
new_root = old_to_new_root.get(ancestor_i, None)
|
new_root = old_to_new_root.get(ancestor_i, None)
|
||||||
if new_root is not None:
|
if new_root is not None:
|
||||||
# take the same artificial root as a previous token from the same sentence
|
# take the same artificial root as a previous token from the same sentence
|
||||||
array[i, head_col] = new_root - i
|
array[i, head_col] = numpy.int32(new_root - i).astype(numpy.uint64)
|
||||||
else:
|
else:
|
||||||
# set this token as the new artificial root
|
# set this token as the new artificial root
|
||||||
array[i, head_col] = 0
|
array[i, head_col] = 0
|
||||||
|
|
|
@ -68,7 +68,7 @@ function isStableVersion(v) {
|
||||||
|
|
||||||
function getLatestVersion(modelId, compatibility) {
|
function getLatestVersion(modelId, compatibility) {
|
||||||
for (let [version, models] of Object.entries(compatibility)) {
|
for (let [version, models] of Object.entries(compatibility)) {
|
||||||
if (isStableVersion(version) && models[modelId]) {
|
if (version.startsWith('2.') && isStableVersion(version) && models[modelId]) {
|
||||||
const modelVersions = models[modelId]
|
const modelVersions = models[modelId]
|
||||||
for (let modelVersion of modelVersions) {
|
for (let modelVersion of modelVersions) {
|
||||||
if (isStableVersion(modelVersion)) {
|
if (isStableVersion(modelVersion)) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user