Compare commits

...

18 Commits

Author SHA1 Message Date
Adriane Boyd
a70b5c186b
Set version to v2.3.9 (#11977) 2022-12-15 11:09:16 +01:00
Adriane Boyd
a5b6d92ea3
Allow conftest.py to run twice for build envs (#11978)
Co-authored-by: Ines Montani <ines@ines.io>
2022-12-15 10:03:19 +01:00
Adriane Boyd
fe0bae9159
Cast to uint64 for all array-based doc representations (#11940)
* Cast to uint64 for all array-based doc representations

* Update images and versions in CI

* Temporarily test with prerelease numpy

* Convert specifically from int32 to uint64

* Use int32 in array tests

* Revert "Temporarily test with prerelease numpy"

This reverts commit 02f2cc7e29.

* Update remaining tests to use int32
2022-12-15 08:16:14 +01:00
Adriane Boyd
8e9a84952a
Update images and versions in CI (#11947) 2022-12-08 16:21:52 +01:00
Adriane Boyd
ca0cae2074
Fix python 2.7 compat in setup.py (#11671)
* Plus minor linting / updating
2022-10-19 08:07:08 +02:00
Adriane Boyd
46234a5221
Merge pull request #11668 from adrianeboyd/chore/v2.3.8
Updates for python 3.10 and 3.11, set version to v2.3.8
2022-10-18 17:18:33 +02:00
Adriane Boyd
19f64ee18a Set version to v2.3.8 2022-10-18 16:06:10 +02:00
Adriane Boyd
07e630cd03 Update dev requirements 2022-10-18 16:06:10 +02:00
Adriane Boyd
4297ca43cf Update CI 2022-10-18 16:06:10 +02:00
Adriane Boyd
b32e143326 Update package for python 3.10 and 3.11 2022-10-18 16:06:10 +02:00
Adriane Boyd
2e91e07388 Update cythonize 2022-10-18 16:06:10 +02:00
Adriane Boyd
dca663a2ef
Update CI for v2.x (#8290)
* Remove Travis CI for python 2.7

* Move download CLI test to separate step

* Switch to ubuntu-18.04

* Remove duplicate CI download tests

* Restrict download test to linux python 3.9
2021-06-07 10:25:36 +02:00
Adriane Boyd
cae72e46dd
Set version to v2.3.7 (#8289)
* Set version to v2.3.7

* Add download test to CI
2021-06-04 19:33:42 +02:00
Adriane Boyd
22287c89c0
Fix pip args in download CLI (#8287) 2021-06-04 19:02:02 +02:00
Adriane Boyd
2c1de4b9a4
Set version to v2.3.6 (#8117) 2021-05-17 17:55:19 +02:00
Adriane Boyd
5e7e7cda94
Fix range in Span.get_lca_matrix (#8115)
Fix the adjusted token index / lca matrix index ranges for
`_get_lca_matrix` for spans.

* The range for `k` should correspond to the adjusted indices in
`lca_matrix` with the `start` indexed at `0`
2021-05-17 16:54:10 +02:00
Ines Montani
6ce9f0469f
Merge pull request #7261 from adrianeboyd/docs/v2-model-details
Limit to v2 models on v2.spacy.io
2021-03-03 23:12:59 +11:00
Adriane Boyd
6ffb395d68 Limit to v2 models on v2.spacy.io 2021-03-03 09:34:45 +01:00
20 changed files with 132 additions and 184 deletions

View File

@ -1,23 +0,0 @@
language: python
sudo: false
cache: pip
dist: trusty
group: edge
python:
- "2.7"
os:
- linux
install:
- "python -m pip install -U pip setuptools"
- "pip install -e . --prefer-binary"
script:
- "cat /proc/cpuinfo | grep flags | head -n 1"
- "pip install -r requirements.txt"
- "python -m pytest --tb=native spacy"
branches:
except:
- spacy.io
notifications:
slack:
secure: F8GvqnweSdzImuLL64TpfG0i5rYl89liyr9tmFVsHl4c0DNiDuGhZivUz0M1broS8svE3OPOllLfQbACG/4KxD890qfF9MoHzvRDlp7U+RtwMV/YAkYn8MGWjPIbRbX0HpGdY7O2Rc9Qy4Kk0T8ZgiqXYIqAz2Eva9/9BlSmsJQ=
email: false

View File

@ -7,3 +7,4 @@ include pyproject.toml
recursive-exclude spacy/lang *.json
recursive-include spacy/lang *.json.gz
recursive-include licenses *
recursive-exclude spacy *.cpp

View File

@ -18,7 +18,6 @@ It's commercial open-source software, released under the MIT license.
[Check out the release notes here.](https://github.com/explosion/spaCy/releases/tag/v3.0.0rc1)
[![Azure Pipelines](<https://img.shields.io/azure-devops/build/explosion-ai/public/8/master.svg?logo=azure-pipelines&style=flat-square&label=build+(3.x)>)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
[![Travis Build Status](<https://img.shields.io/travis/explosion/spaCy/master.svg?style=flat-square&logo=travis-ci&logoColor=white&label=build+(2.7)>)](https://travis-ci.org/explosion/spaCy)
[![Current Release Version](https://img.shields.io/github/release/explosion/spacy.svg?style=flat-square&logo=github)](https://github.com/explosion/spaCy/releases)
[![pypi Version](https://img.shields.io/pypi/v/spacy.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/spacy/)
[![conda Version](https://img.shields.io/conda/vn/conda-forge/spacy.svg?style=flat-square&logo=conda-forge&logoColor=white)](https://anaconda.org/conda-forge/spacy)

View File

@ -21,7 +21,7 @@ jobs:
# defined in .flake8 and overwrites the selected codes.
- job: 'Validate'
pool:
vmImage: 'ubuntu-16.04'
vmImage: 'ubuntu-latest'
steps:
- task: UsePythonVersion@0
inputs:
@ -35,50 +35,37 @@ jobs:
dependsOn: 'Validate'
strategy:
matrix:
Python35Linux:
imageName: 'ubuntu-16.04'
python.version: '3.5'
os: linux
Python35Windows:
imageName: 'vs2017-win2016'
python.version: '3.5'
# Test on one OS per python 3.6/3.7/3.8 to speed up CI
# Test on one OS per python 3.6/3.7/3.8/3.9 to speed up CI
Python36Linux:
imageName: 'ubuntu-16.04'
imageName: 'ubuntu-20.04'
python.version: '3.6'
# Python36Windows:
# imageName: 'vs2017-win2016'
# python.version: '3.6'
# Python36Mac:
# imageName: 'macos-10.14'
# python.version: '3.6'
# Python37Linux:
# imageName: 'ubuntu-16.04'
# python.version: '3.7'
Python37Windows:
imageName: 'vs2017-win2016'
imageName: 'windows-latest'
python.version: '3.7'
# Python37Mac:
# imageName: 'macos-10.14'
# python.version: '3.7'
# Python38Linux:
# imageName: 'ubuntu-16.04'
# python.version: '3.8'
# Python38Windows:
# imageName: 'vs2017-win2016'
# python.version: '3.8'
Python38Mac:
imageName: 'macos-10.14'
imageName: 'macos-latest'
python.version: '3.8'
Python39Linux:
imageName: 'ubuntu-16.04'
python.version: '3.9'
Python39Windows:
imageName: 'vs2017-win2016'
python.version: '3.9'
Python39Mac:
imageName: 'macos-10.14'
imageName: 'ubuntu-latest'
python.version: '3.9'
Python310Linux:
imageName: 'ubuntu-latest'
python.version: '3.10'
Python310Windows:
imageName: 'windows-latest'
python.version: '3.10'
Python310Mac:
imageName: 'macos-latest'
python.version: '3.10'
Python311Linux:
imageName: 'ubuntu-latest'
python.version: '3.11'
Python311Windows:
imageName: 'windows-latest'
python.version: '3.11'
Python311Mac:
imageName: 'macos-latest'
python.version: '3.11'
maxParallel: 4
pool:
vmImage: $(imageName)
@ -88,17 +75,13 @@ jobs:
inputs:
versionSpec: '$(python.version)'
architecture: 'x64'
allowUnstable: true
- script: python -m pip install -U pip setuptools
displayName: 'Update pip'
- script: pip install -r requirements.txt --prefer-binary
displayName: 'Install dependencies (python 3.5: prefer binary)'
condition: eq(variables['python.version'], '3.5')
- script: pip install -r requirements.txt
displayName: 'Install dependencies'
condition: not(eq(variables['python.version'], '3.5'))
- script: |
python setup.py build_ext --inplace -j 2
@ -115,19 +98,18 @@ jobs:
pip uninstall -y -r installed.txt
displayName: 'Uninstall all packages'
- bash: |
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
pip install dist/$SDIST --prefer-binary
displayName: 'Install from sdist (python 3.5: prefer binary)'
condition: eq(variables['python.version'], '3.5')
- bash: |
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
pip install dist/$SDIST
displayName: 'Install from sdist'
condition: not(eq(variables['python.version'], '3.5'))
- script: |
pip install -r requirements.txt --prefer-binary
pip install -r requirements.txt
python -m pytest --pyargs spacy
displayName: 'Run tests'
- script: |
python -m spacy download en_core_web_sm
python -c "import spacy; nlp=spacy.load('en_core_web_sm'); doc=nlp('test')"
displayName: 'Test download CLI'
condition: eq(variables['python.version'], '3.9')

View File

@ -1,5 +1,9 @@
# build version constraints for use with wheelwright + multibuild
numpy==1.15.0; python_version<='3.7'
numpy==1.17.3; python_version=='3.8'
numpy==1.15.0; python_version<='3.7' and platform_machine!='aarch64'
numpy==1.19.2; python_version<='3.7' and platform_machine=='aarch64'
numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
numpy==1.19.3; python_version=='3.9'
numpy; python_version>='3.10'
numpy==1.21.3; python_version=='3.10'
numpy==1.23.2; python_version=='3.11'
numpy; python_version>='3.12'

View File

@ -1,7 +1,7 @@
[build-system]
requires = [
"setuptools",
"cython>=0.25",
"cython>=0.25,<3.0",
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",

View File

@ -17,8 +17,8 @@ tqdm>=4.38.0,<5.0.0
pyrsistent<0.17.0
jsonschema>=2.6.0,<3.1.0
# Development dependencies
cython>=0.25
cython>=0.25,<3.0
pytest>=4.6.5
pytest-timeout>=1.3.0,<2.0.0
mock>=2.0.0,<3.0.0
flake8>=3.5.0,<3.6.0
flake8>=3.5.0,<6.0.0

View File

@ -24,6 +24,8 @@ classifiers =
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Topic :: Scientific/Engineering
[options]
@ -33,7 +35,7 @@ scripts =
bin/spacy
python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*
setup_requires =
cython>=0.25
cython>=0.25,<3.0
numpy>=1.15.0
# We also need our Cython packages here to compile against
cymem>=2.0.2,<2.1.0

135
setup.py
View File

@ -1,16 +1,17 @@
#!/usr/bin/env python
from __future__ import print_function
import io
import os
import subprocess
import sys
import contextlib
import numpy
from pathlib import Path
from distutils.command.build_ext import build_ext
from distutils.sysconfig import get_python_inc
import distutils.util
from distutils import ccompiler, msvccompiler
from setuptools import Extension, setup, find_packages
from Cython.Build import cythonize
from Cython.Compiler import Options
def is_new_osx():
@ -28,6 +29,10 @@ def is_new_osx():
return False
# Preserve `__doc__` on functions and classes
# http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options
Options.docstrings = True
PACKAGES = find_packages()
@ -74,6 +79,12 @@ COMPILE_OPTIONS = {
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
COMPILER_DIRECTIVES = {
"language_level": -3,
"embedsignature": True,
"annotation_typing": False,
}
if is_new_osx():
# On Mac, use libc++ because Apple deprecated use of
@ -105,20 +116,6 @@ class build_ext_subclass(build_ext, build_ext_options):
build_ext.build_extensions(self)
def generate_cython(root, source):
print("Cythonizing sources")
p = subprocess.call(
[sys.executable, os.path.join(root, "bin", "cythonize.py"), source],
env=os.environ,
)
if p != 0:
raise RuntimeError("Running cythonize failed")
def is_source_release(path):
return os.path.exists(os.path.join(path, "PKG-INFO"))
# Include the git version in the build (adapted from NumPy)
# Copyright (c) 2005-2020, NumPy Developers.
# BSD 3-Clause license, see licenses/3rd_party_licenses.txt
@ -142,7 +139,7 @@ def write_git_info_py(filename="spacy/git_info.py"):
try:
out = _minimal_ext_cmd(["git", "rev-parse", "--short", "HEAD"])
git_version = out.strip().decode("ascii")
except:
except Exception:
pass
elif os.path.exists(filename):
# must be a source distribution, use existing version file
@ -150,7 +147,7 @@ def write_git_info_py(filename="spacy/git_info.py"):
a = open(filename, "r")
lines = a.readlines()
git_version = lines[-1].split('"')[1]
except:
except Exception:
pass
finally:
a.close()
@ -169,83 +166,51 @@ GIT_VERSION = "%(git_version)s"
def clean(path):
for name in MOD_NAMES:
name = name.replace(".", "/")
for ext in [".so", ".html", ".cpp", ".c"]:
file_path = os.path.join(path, name + ext)
if os.path.exists(file_path):
os.unlink(file_path)
@contextlib.contextmanager
def chdir(new_dir):
old_dir = os.getcwd()
try:
os.chdir(new_dir)
sys.path.insert(0, new_dir)
yield
finally:
del sys.path[0]
os.chdir(old_dir)
for path in path.glob("**/*"):
if path.is_file() and path.suffix in (".so", ".cpp"):
print("Deleting", path.name)
path.unlink()
def setup_package():
write_git_info_py()
root = os.path.abspath(os.path.dirname(__file__))
root = Path(__file__).parent
if hasattr(sys, "argv") and len(sys.argv) > 1 and sys.argv[1] == "clean":
return clean(root)
return clean(root / "spacy")
with chdir(root):
with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f:
about = {}
exec(f.read(), about)
with (root / "spacy" / "about.py").open("r") as f:
about = {}
exec(f.read(), about)
include_dirs = [
numpy.get_include(),
get_python_inc(plat_specific=True),
os.path.join(root, "include"),
]
include_dirs = [
get_python_inc(plat_specific=True),
numpy.get_include(),
str(root / "include"),
]
if (
ccompiler.new_compiler().compiler_type == "msvc"
and msvccompiler.get_build_version() == 9
):
include_dirs.append(str(root / "include" / "msvc9"))
ext_modules = []
for name in MOD_NAMES:
mod_path = name.replace(".", "/") + ".pyx"
ext = Extension(name, [mod_path], language="c++")
ext_modules.append(ext)
print("Cythonizing sources")
ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)
if (
ccompiler.new_compiler().compiler_type == "msvc"
and msvccompiler.get_build_version() == 9
):
include_dirs.append(os.path.join(root, "include", "msvc9"))
ext_modules = []
for mod_name in MOD_NAMES:
mod_path = mod_name.replace(".", "/") + ".cpp"
extra_link_args = []
# ???
# Imported from patch from @mikepb
# See Issue #267. Running blind here...
if sys.platform == "darwin":
dylib_path = [".." for _ in range(mod_name.count("."))]
dylib_path = "/".join(dylib_path)
dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path
extra_link_args.append("-Wl,-rpath,%s" % dylib_path)
ext_modules.append(
Extension(
mod_name,
[mod_path],
language="c++",
include_dirs=include_dirs,
extra_link_args=extra_link_args,
)
)
if not is_source_release(root):
generate_cython(root, "spacy")
setup(
name="spacy",
packages=PACKAGES,
version=about["__version__"],
ext_modules=ext_modules,
cmdclass={"build_ext": build_ext_subclass},
)
setup(
name="spacy",
packages=PACKAGES,
version=about["__version__"],
ext_modules=ext_modules,
cmdclass={"build_ext": build_ext_subclass},
include_dirs=include_dirs,
package_data={"": ["*.pyx", "*.pxd", "*.pxi"]},
)
if __name__ == "__main__":

View File

@ -1,6 +1,6 @@
# fmt: off
__title__ = "spacy"
__version__ = "2.3.5"
__version__ = "2.3.9"
__release__ = True
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

View File

@ -128,6 +128,6 @@ def get_version(model, comp):
def download_model(filename, user_pip_args=None):
download_url = about.__download_url__ + "/" + filename
pip_args = user_pip_args if user_pip_args is not None else []
pip_args = list(user_pip_args) if user_pip_args is not None else []
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
return subprocess.call(cmd, env=os.environ.copy())

View File

@ -295,7 +295,7 @@ def make_docs(nlp, batch, min_length, max_length):
raise ValueError(Errors.E138.format(text=record))
if "heads" in record:
heads = record["heads"]
heads = numpy.asarray(heads, dtype="uint64")
heads = numpy.asarray([numpy.array(h).astype(numpy.uint64) for h in heads], dtype="uint64")
heads = heads.reshape((len(doc), 1))
doc = doc.from_array([HEAD], heads)
if len(doc) >= min_length and len(doc) < max_length:

View File

@ -6,7 +6,13 @@ from spacy.util import get_lang_class
def pytest_addoption(parser):
parser.addoption("--slow", action="store_true", help="include slow tests")
try:
parser.addoption("--slow", action="store_true", help="include slow tests")
parser.addoption("--issue", action="store", help="test specific issues")
# Options are already added, e.g. if conftest is copied in a build pipeline
# and runs twice
except ValueError:
pass
def pytest_runtest_setup(item):

View File

@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import numpy
import pytest
from spacy.tokens import Doc
from spacy.attrs import ORTH, SHAPE, POS, DEP
@ -91,14 +92,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
# head before start
arr = doc.to_array(["HEAD"])
arr[0] = -1
arr[0] = numpy.int32(-1).astype(numpy.uint64)
doc_from_array = Doc(en_vocab, words=words)
with pytest.raises(ValueError):
doc_from_array.from_array(["HEAD"], arr)
# head after end
arr = doc.to_array(["HEAD"])
arr[0] = 5
arr[0] = numpy.int32(5).astype(numpy.uint64)
doc_from_array = Doc(en_vocab, words=words)
with pytest.raises(ValueError):
doc_from_array.from_array(["HEAD"], arr)

View File

@ -2,6 +2,8 @@
from __future__ import unicode_literals
import pytest
import numpy
from numpy.testing import assert_array_equal
from spacy.attrs import ORTH, LENGTH
from spacy.tokens import Doc, Span
from spacy.vocab import Vocab
@ -118,6 +120,14 @@ def test_spans_lca_matrix(en_tokenizer):
assert lca[1, 0] == 1 # slept & dog -> slept
assert lca[1, 1] == 1 # slept & slept -> slept
# example from Span API docs
tokens = en_tokenizer("I like New York in Autumn")
doc = get_doc(
tokens.vocab, words=[t.text for t in tokens], heads=[1, 0, 1, -2, -1, -1]
)
lca = doc[1:4].get_lca_matrix()
assert_array_equal(lca, numpy.asarray([[0, 0, 0], [0, 1, 2], [0, 2, 2]]))
def test_span_similarity_match():
doc = Doc(Vocab(), words=["a", "b", "a", "b"])

View File

@ -37,9 +37,9 @@ def test_en_noun_chunks_not_nested(en_vocab):
[0, root],
[4, amod],
[3, nmod],
[-1, cc],
[-2, conj],
[-5, dobj],
[numpy.int32(-1).astype(numpy.uint64), cc],
[numpy.int32(-2).astype(numpy.uint64), conj],
[numpy.int32(-5).astype(numpy.uint64), dobj],
],
dtype="uint64",
),

View File

@ -58,11 +58,12 @@ def get_doc(
for annot in annotations:
if annot:
if annot is heads:
annot = numpy.array(heads, dtype=numpy.int32).astype(numpy.uint64)
for i in range(len(words)):
if attrs.ndim == 1:
attrs[i] = heads[i]
attrs[i] = annot[i]
else:
attrs[i, j] = heads[i]
attrs[i, j] = annot[i]
else:
for i in range(len(words)):
if attrs.ndim == 1:

View File

@ -805,7 +805,7 @@ cdef class Doc:
`(M, N)` array of attributes.
attrs (list) A list of attribute ID ints.
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
array (numpy.ndarray[ndim=2, dtype='uint64']): The attribute values.
RETURNS (Doc): Itself.
DOCS: https://spacy.io/api/doc#from_array
@ -845,9 +845,9 @@ cdef class Doc:
col = attrs.index(HEAD)
for i in range(length):
# cast index to signed int
abs_head_index = numpy.int32(array[i, col]) + i
abs_head_index = array[i, col].astype(numpy.int32) + i
if abs_head_index < 0 or abs_head_index >= length:
raise ValueError(Errors.E190.format(index=i, value=array[i, col], rel_head_index=numpy.int32(array[i, col])))
raise ValueError(Errors.E190.format(index=i, value=array[i, col], rel_head_index=abs_head_index-i))
# Do TAG first. This lets subsequent loop override stuff like POS, LEMMA
if TAG in attrs:
col = attrs.index(TAG)
@ -1351,7 +1351,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
j_idx_in_sent = start + j - sent_start
n_missing_tokens_in_sent = len(sent) - j_idx_in_sent
# make sure we do not go past `end`, in cases where `end` < sent.end
max_range = min(j + n_missing_tokens_in_sent, end)
max_range = min(j + n_missing_tokens_in_sent, end - start)
for k in range(j + 1, max_range):
lca = _get_tokens_lca(token_j, doc[start + k])
# if lca is outside of span, we set it to -1

View File

@ -272,7 +272,7 @@ cdef class Span:
for ancestor in ancestors:
ancestor_i = ancestor.i - self.start
if ancestor_i in range(length):
array[i, head_col] = ancestor_i - i
array[i, head_col] = numpy.int32(ancestor_i - i).astype(numpy.uint64)
# if there is no appropriate ancestor, define a new artificial root
value = array[i, head_col]
@ -280,7 +280,7 @@ cdef class Span:
new_root = old_to_new_root.get(ancestor_i, None)
if new_root is not None:
# take the same artificial root as a previous token from the same sentence
array[i, head_col] = new_root - i
array[i, head_col] = numpy.int32(new_root - i).astype(numpy.uint64)
else:
# set this token as the new artificial root
array[i, head_col] = 0

View File

@ -68,7 +68,7 @@ function isStableVersion(v) {
function getLatestVersion(modelId, compatibility) {
for (let [version, models] of Object.entries(compatibility)) {
if (isStableVersion(version) && models[modelId]) {
if (version.startsWith('2.') && isStableVersion(version) && models[modelId]) {
const modelVersions = models[modelId]
for (let modelVersion of modelVersions) {
if (isStableVersion(modelVersion)) {