mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 12:20:20 +03:00
Compare commits
18 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
a70b5c186b | ||
|
a5b6d92ea3 | ||
|
fe0bae9159 | ||
|
8e9a84952a | ||
|
ca0cae2074 | ||
|
46234a5221 | ||
|
19f64ee18a | ||
|
07e630cd03 | ||
|
4297ca43cf | ||
|
b32e143326 | ||
|
2e91e07388 | ||
|
dca663a2ef | ||
|
cae72e46dd | ||
|
22287c89c0 | ||
|
2c1de4b9a4 | ||
|
5e7e7cda94 | ||
|
6ce9f0469f | ||
|
6ffb395d68 |
23
.travis.yml
23
.travis.yml
|
@ -1,23 +0,0 @@
|
|||
language: python
|
||||
sudo: false
|
||||
cache: pip
|
||||
dist: trusty
|
||||
group: edge
|
||||
python:
|
||||
- "2.7"
|
||||
os:
|
||||
- linux
|
||||
install:
|
||||
- "python -m pip install -U pip setuptools"
|
||||
- "pip install -e . --prefer-binary"
|
||||
script:
|
||||
- "cat /proc/cpuinfo | grep flags | head -n 1"
|
||||
- "pip install -r requirements.txt"
|
||||
- "python -m pytest --tb=native spacy"
|
||||
branches:
|
||||
except:
|
||||
- spacy.io
|
||||
notifications:
|
||||
slack:
|
||||
secure: F8GvqnweSdzImuLL64TpfG0i5rYl89liyr9tmFVsHl4c0DNiDuGhZivUz0M1broS8svE3OPOllLfQbACG/4KxD890qfF9MoHzvRDlp7U+RtwMV/YAkYn8MGWjPIbRbX0HpGdY7O2Rc9Qy4Kk0T8ZgiqXYIqAz2Eva9/9BlSmsJQ=
|
||||
email: false
|
|
@ -7,3 +7,4 @@ include pyproject.toml
|
|||
recursive-exclude spacy/lang *.json
|
||||
recursive-include spacy/lang *.json.gz
|
||||
recursive-include licenses *
|
||||
recursive-exclude spacy *.cpp
|
||||
|
|
|
@ -18,7 +18,6 @@ It's commercial open-source software, released under the MIT license.
|
|||
[Check out the release notes here.](https://github.com/explosion/spaCy/releases/tag/v3.0.0rc1)
|
||||
|
||||
[>)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
|
||||
[>)](https://travis-ci.org/explosion/spaCy)
|
||||
[](https://github.com/explosion/spaCy/releases)
|
||||
[](https://pypi.org/project/spacy/)
|
||||
[](https://anaconda.org/conda-forge/spacy)
|
||||
|
|
|
@ -21,7 +21,7 @@ jobs:
|
|||
# defined in .flake8 and overwrites the selected codes.
|
||||
- job: 'Validate'
|
||||
pool:
|
||||
vmImage: 'ubuntu-16.04'
|
||||
vmImage: 'ubuntu-latest'
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
|
@ -35,50 +35,37 @@ jobs:
|
|||
dependsOn: 'Validate'
|
||||
strategy:
|
||||
matrix:
|
||||
Python35Linux:
|
||||
imageName: 'ubuntu-16.04'
|
||||
python.version: '3.5'
|
||||
os: linux
|
||||
Python35Windows:
|
||||
imageName: 'vs2017-win2016'
|
||||
python.version: '3.5'
|
||||
# Test on one OS per python 3.6/3.7/3.8 to speed up CI
|
||||
# Test on one OS per python 3.6/3.7/3.8/3.9 to speed up CI
|
||||
Python36Linux:
|
||||
imageName: 'ubuntu-16.04'
|
||||
imageName: 'ubuntu-20.04'
|
||||
python.version: '3.6'
|
||||
# Python36Windows:
|
||||
# imageName: 'vs2017-win2016'
|
||||
# python.version: '3.6'
|
||||
# Python36Mac:
|
||||
# imageName: 'macos-10.14'
|
||||
# python.version: '3.6'
|
||||
# Python37Linux:
|
||||
# imageName: 'ubuntu-16.04'
|
||||
# python.version: '3.7'
|
||||
Python37Windows:
|
||||
imageName: 'vs2017-win2016'
|
||||
imageName: 'windows-latest'
|
||||
python.version: '3.7'
|
||||
# Python37Mac:
|
||||
# imageName: 'macos-10.14'
|
||||
# python.version: '3.7'
|
||||
# Python38Linux:
|
||||
# imageName: 'ubuntu-16.04'
|
||||
# python.version: '3.8'
|
||||
# Python38Windows:
|
||||
# imageName: 'vs2017-win2016'
|
||||
# python.version: '3.8'
|
||||
Python38Mac:
|
||||
imageName: 'macos-10.14'
|
||||
imageName: 'macos-latest'
|
||||
python.version: '3.8'
|
||||
Python39Linux:
|
||||
imageName: 'ubuntu-16.04'
|
||||
python.version: '3.9'
|
||||
Python39Windows:
|
||||
imageName: 'vs2017-win2016'
|
||||
python.version: '3.9'
|
||||
Python39Mac:
|
||||
imageName: 'macos-10.14'
|
||||
imageName: 'ubuntu-latest'
|
||||
python.version: '3.9'
|
||||
Python310Linux:
|
||||
imageName: 'ubuntu-latest'
|
||||
python.version: '3.10'
|
||||
Python310Windows:
|
||||
imageName: 'windows-latest'
|
||||
python.version: '3.10'
|
||||
Python310Mac:
|
||||
imageName: 'macos-latest'
|
||||
python.version: '3.10'
|
||||
Python311Linux:
|
||||
imageName: 'ubuntu-latest'
|
||||
python.version: '3.11'
|
||||
Python311Windows:
|
||||
imageName: 'windows-latest'
|
||||
python.version: '3.11'
|
||||
Python311Mac:
|
||||
imageName: 'macos-latest'
|
||||
python.version: '3.11'
|
||||
maxParallel: 4
|
||||
pool:
|
||||
vmImage: $(imageName)
|
||||
|
@ -88,17 +75,13 @@ jobs:
|
|||
inputs:
|
||||
versionSpec: '$(python.version)'
|
||||
architecture: 'x64'
|
||||
allowUnstable: true
|
||||
|
||||
- script: python -m pip install -U pip setuptools
|
||||
displayName: 'Update pip'
|
||||
|
||||
- script: pip install -r requirements.txt --prefer-binary
|
||||
displayName: 'Install dependencies (python 3.5: prefer binary)'
|
||||
condition: eq(variables['python.version'], '3.5')
|
||||
|
||||
- script: pip install -r requirements.txt
|
||||
displayName: 'Install dependencies'
|
||||
condition: not(eq(variables['python.version'], '3.5'))
|
||||
|
||||
- script: |
|
||||
python setup.py build_ext --inplace -j 2
|
||||
|
@ -115,19 +98,18 @@ jobs:
|
|||
pip uninstall -y -r installed.txt
|
||||
displayName: 'Uninstall all packages'
|
||||
|
||||
- bash: |
|
||||
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||
pip install dist/$SDIST --prefer-binary
|
||||
displayName: 'Install from sdist (python 3.5: prefer binary)'
|
||||
condition: eq(variables['python.version'], '3.5')
|
||||
|
||||
- bash: |
|
||||
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||
pip install dist/$SDIST
|
||||
displayName: 'Install from sdist'
|
||||
condition: not(eq(variables['python.version'], '3.5'))
|
||||
|
||||
- script: |
|
||||
pip install -r requirements.txt --prefer-binary
|
||||
pip install -r requirements.txt
|
||||
python -m pytest --pyargs spacy
|
||||
displayName: 'Run tests'
|
||||
|
||||
- script: |
|
||||
python -m spacy download en_core_web_sm
|
||||
python -c "import spacy; nlp=spacy.load('en_core_web_sm'); doc=nlp('test')"
|
||||
displayName: 'Test download CLI'
|
||||
condition: eq(variables['python.version'], '3.9')
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
# build version constraints for use with wheelwright + multibuild
|
||||
numpy==1.15.0; python_version<='3.7'
|
||||
numpy==1.17.3; python_version=='3.8'
|
||||
numpy==1.15.0; python_version<='3.7' and platform_machine!='aarch64'
|
||||
numpy==1.19.2; python_version<='3.7' and platform_machine=='aarch64'
|
||||
numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
|
||||
numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
|
||||
numpy==1.19.3; python_version=='3.9'
|
||||
numpy; python_version>='3.10'
|
||||
numpy==1.21.3; python_version=='3.10'
|
||||
numpy==1.23.2; python_version=='3.11'
|
||||
numpy; python_version>='3.12'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[build-system]
|
||||
requires = [
|
||||
"setuptools",
|
||||
"cython>=0.25",
|
||||
"cython>=0.25,<3.0",
|
||||
"cymem>=2.0.2,<2.1.0",
|
||||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
|
|
|
@ -17,8 +17,8 @@ tqdm>=4.38.0,<5.0.0
|
|||
pyrsistent<0.17.0
|
||||
jsonschema>=2.6.0,<3.1.0
|
||||
# Development dependencies
|
||||
cython>=0.25
|
||||
cython>=0.25,<3.0
|
||||
pytest>=4.6.5
|
||||
pytest-timeout>=1.3.0,<2.0.0
|
||||
mock>=2.0.0,<3.0.0
|
||||
flake8>=3.5.0,<3.6.0
|
||||
flake8>=3.5.0,<6.0.0
|
||||
|
|
|
@ -24,6 +24,8 @@ classifiers =
|
|||
Programming Language :: Python :: 3.7
|
||||
Programming Language :: Python :: 3.8
|
||||
Programming Language :: Python :: 3.9
|
||||
Programming Language :: Python :: 3.10
|
||||
Programming Language :: Python :: 3.11
|
||||
Topic :: Scientific/Engineering
|
||||
|
||||
[options]
|
||||
|
@ -33,7 +35,7 @@ scripts =
|
|||
bin/spacy
|
||||
python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*
|
||||
setup_requires =
|
||||
cython>=0.25
|
||||
cython>=0.25,<3.0
|
||||
numpy>=1.15.0
|
||||
# We also need our Cython packages here to compile against
|
||||
cymem>=2.0.2,<2.1.0
|
||||
|
|
135
setup.py
135
setup.py
|
@ -1,16 +1,17 @@
|
|||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import io
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import contextlib
|
||||
import numpy
|
||||
from pathlib import Path
|
||||
from distutils.command.build_ext import build_ext
|
||||
from distutils.sysconfig import get_python_inc
|
||||
import distutils.util
|
||||
from distutils import ccompiler, msvccompiler
|
||||
from setuptools import Extension, setup, find_packages
|
||||
from Cython.Build import cythonize
|
||||
from Cython.Compiler import Options
|
||||
|
||||
|
||||
def is_new_osx():
|
||||
|
@ -28,6 +29,10 @@ def is_new_osx():
|
|||
return False
|
||||
|
||||
|
||||
# Preserve `__doc__` on functions and classes
|
||||
# http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options
|
||||
Options.docstrings = True
|
||||
|
||||
PACKAGES = find_packages()
|
||||
|
||||
|
||||
|
@ -74,6 +79,12 @@ COMPILE_OPTIONS = {
|
|||
|
||||
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
|
||||
|
||||
COMPILER_DIRECTIVES = {
|
||||
"language_level": -3,
|
||||
"embedsignature": True,
|
||||
"annotation_typing": False,
|
||||
}
|
||||
|
||||
|
||||
if is_new_osx():
|
||||
# On Mac, use libc++ because Apple deprecated use of
|
||||
|
@ -105,20 +116,6 @@ class build_ext_subclass(build_ext, build_ext_options):
|
|||
build_ext.build_extensions(self)
|
||||
|
||||
|
||||
def generate_cython(root, source):
|
||||
print("Cythonizing sources")
|
||||
p = subprocess.call(
|
||||
[sys.executable, os.path.join(root, "bin", "cythonize.py"), source],
|
||||
env=os.environ,
|
||||
)
|
||||
if p != 0:
|
||||
raise RuntimeError("Running cythonize failed")
|
||||
|
||||
|
||||
def is_source_release(path):
|
||||
return os.path.exists(os.path.join(path, "PKG-INFO"))
|
||||
|
||||
|
||||
# Include the git version in the build (adapted from NumPy)
|
||||
# Copyright (c) 2005-2020, NumPy Developers.
|
||||
# BSD 3-Clause license, see licenses/3rd_party_licenses.txt
|
||||
|
@ -142,7 +139,7 @@ def write_git_info_py(filename="spacy/git_info.py"):
|
|||
try:
|
||||
out = _minimal_ext_cmd(["git", "rev-parse", "--short", "HEAD"])
|
||||
git_version = out.strip().decode("ascii")
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
elif os.path.exists(filename):
|
||||
# must be a source distribution, use existing version file
|
||||
|
@ -150,7 +147,7 @@ def write_git_info_py(filename="spacy/git_info.py"):
|
|||
a = open(filename, "r")
|
||||
lines = a.readlines()
|
||||
git_version = lines[-1].split('"')[1]
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
a.close()
|
||||
|
@ -169,83 +166,51 @@ GIT_VERSION = "%(git_version)s"
|
|||
|
||||
|
||||
def clean(path):
|
||||
for name in MOD_NAMES:
|
||||
name = name.replace(".", "/")
|
||||
for ext in [".so", ".html", ".cpp", ".c"]:
|
||||
file_path = os.path.join(path, name + ext)
|
||||
if os.path.exists(file_path):
|
||||
os.unlink(file_path)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def chdir(new_dir):
|
||||
old_dir = os.getcwd()
|
||||
try:
|
||||
os.chdir(new_dir)
|
||||
sys.path.insert(0, new_dir)
|
||||
yield
|
||||
finally:
|
||||
del sys.path[0]
|
||||
os.chdir(old_dir)
|
||||
for path in path.glob("**/*"):
|
||||
if path.is_file() and path.suffix in (".so", ".cpp"):
|
||||
print("Deleting", path.name)
|
||||
path.unlink()
|
||||
|
||||
|
||||
def setup_package():
|
||||
write_git_info_py()
|
||||
|
||||
root = os.path.abspath(os.path.dirname(__file__))
|
||||
root = Path(__file__).parent
|
||||
|
||||
if hasattr(sys, "argv") and len(sys.argv) > 1 and sys.argv[1] == "clean":
|
||||
return clean(root)
|
||||
return clean(root / "spacy")
|
||||
|
||||
with chdir(root):
|
||||
with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f:
|
||||
about = {}
|
||||
exec(f.read(), about)
|
||||
with (root / "spacy" / "about.py").open("r") as f:
|
||||
about = {}
|
||||
exec(f.read(), about)
|
||||
|
||||
include_dirs = [
|
||||
numpy.get_include(),
|
||||
get_python_inc(plat_specific=True),
|
||||
os.path.join(root, "include"),
|
||||
]
|
||||
include_dirs = [
|
||||
get_python_inc(plat_specific=True),
|
||||
numpy.get_include(),
|
||||
str(root / "include"),
|
||||
]
|
||||
if (
|
||||
ccompiler.new_compiler().compiler_type == "msvc"
|
||||
and msvccompiler.get_build_version() == 9
|
||||
):
|
||||
include_dirs.append(str(root / "include" / "msvc9"))
|
||||
ext_modules = []
|
||||
for name in MOD_NAMES:
|
||||
mod_path = name.replace(".", "/") + ".pyx"
|
||||
ext = Extension(name, [mod_path], language="c++")
|
||||
ext_modules.append(ext)
|
||||
print("Cythonizing sources")
|
||||
ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)
|
||||
|
||||
if (
|
||||
ccompiler.new_compiler().compiler_type == "msvc"
|
||||
and msvccompiler.get_build_version() == 9
|
||||
):
|
||||
include_dirs.append(os.path.join(root, "include", "msvc9"))
|
||||
|
||||
ext_modules = []
|
||||
for mod_name in MOD_NAMES:
|
||||
mod_path = mod_name.replace(".", "/") + ".cpp"
|
||||
extra_link_args = []
|
||||
# ???
|
||||
# Imported from patch from @mikepb
|
||||
# See Issue #267. Running blind here...
|
||||
if sys.platform == "darwin":
|
||||
dylib_path = [".." for _ in range(mod_name.count("."))]
|
||||
dylib_path = "/".join(dylib_path)
|
||||
dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path
|
||||
extra_link_args.append("-Wl,-rpath,%s" % dylib_path)
|
||||
ext_modules.append(
|
||||
Extension(
|
||||
mod_name,
|
||||
[mod_path],
|
||||
language="c++",
|
||||
include_dirs=include_dirs,
|
||||
extra_link_args=extra_link_args,
|
||||
)
|
||||
)
|
||||
|
||||
if not is_source_release(root):
|
||||
generate_cython(root, "spacy")
|
||||
|
||||
setup(
|
||||
name="spacy",
|
||||
packages=PACKAGES,
|
||||
version=about["__version__"],
|
||||
ext_modules=ext_modules,
|
||||
cmdclass={"build_ext": build_ext_subclass},
|
||||
)
|
||||
setup(
|
||||
name="spacy",
|
||||
packages=PACKAGES,
|
||||
version=about["__version__"],
|
||||
ext_modules=ext_modules,
|
||||
cmdclass={"build_ext": build_ext_subclass},
|
||||
include_dirs=include_dirs,
|
||||
package_data={"": ["*.pyx", "*.pxd", "*.pxi"]},
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# fmt: off
|
||||
__title__ = "spacy"
|
||||
__version__ = "2.3.5"
|
||||
__version__ = "2.3.9"
|
||||
__release__ = True
|
||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||
|
|
|
@ -128,6 +128,6 @@ def get_version(model, comp):
|
|||
|
||||
def download_model(filename, user_pip_args=None):
|
||||
download_url = about.__download_url__ + "/" + filename
|
||||
pip_args = user_pip_args if user_pip_args is not None else []
|
||||
pip_args = list(user_pip_args) if user_pip_args is not None else []
|
||||
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
|
||||
return subprocess.call(cmd, env=os.environ.copy())
|
||||
|
|
|
@ -295,7 +295,7 @@ def make_docs(nlp, batch, min_length, max_length):
|
|||
raise ValueError(Errors.E138.format(text=record))
|
||||
if "heads" in record:
|
||||
heads = record["heads"]
|
||||
heads = numpy.asarray(heads, dtype="uint64")
|
||||
heads = numpy.asarray([numpy.array(h).astype(numpy.uint64) for h in heads], dtype="uint64")
|
||||
heads = heads.reshape((len(doc), 1))
|
||||
doc = doc.from_array([HEAD], heads)
|
||||
if len(doc) >= min_length and len(doc) < max_length:
|
||||
|
|
|
@ -6,7 +6,13 @@ from spacy.util import get_lang_class
|
|||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption("--slow", action="store_true", help="include slow tests")
|
||||
try:
|
||||
parser.addoption("--slow", action="store_true", help="include slow tests")
|
||||
parser.addoption("--issue", action="store", help="test specific issues")
|
||||
# Options are already added, e.g. if conftest is copied in a build pipeline
|
||||
# and runs twice
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def pytest_runtest_setup(item):
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy
|
||||
import pytest
|
||||
from spacy.tokens import Doc
|
||||
from spacy.attrs import ORTH, SHAPE, POS, DEP
|
||||
|
@ -91,14 +92,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
|
|||
|
||||
# head before start
|
||||
arr = doc.to_array(["HEAD"])
|
||||
arr[0] = -1
|
||||
arr[0] = numpy.int32(-1).astype(numpy.uint64)
|
||||
doc_from_array = Doc(en_vocab, words=words)
|
||||
with pytest.raises(ValueError):
|
||||
doc_from_array.from_array(["HEAD"], arr)
|
||||
|
||||
# head after end
|
||||
arr = doc.to_array(["HEAD"])
|
||||
arr[0] = 5
|
||||
arr[0] = numpy.int32(5).astype(numpy.uint64)
|
||||
doc_from_array = Doc(en_vocab, words=words)
|
||||
with pytest.raises(ValueError):
|
||||
doc_from_array.from_array(["HEAD"], arr)
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
import numpy
|
||||
from numpy.testing import assert_array_equal
|
||||
from spacy.attrs import ORTH, LENGTH
|
||||
from spacy.tokens import Doc, Span
|
||||
from spacy.vocab import Vocab
|
||||
|
@ -118,6 +120,14 @@ def test_spans_lca_matrix(en_tokenizer):
|
|||
assert lca[1, 0] == 1 # slept & dog -> slept
|
||||
assert lca[1, 1] == 1 # slept & slept -> slept
|
||||
|
||||
# example from Span API docs
|
||||
tokens = en_tokenizer("I like New York in Autumn")
|
||||
doc = get_doc(
|
||||
tokens.vocab, words=[t.text for t in tokens], heads=[1, 0, 1, -2, -1, -1]
|
||||
)
|
||||
lca = doc[1:4].get_lca_matrix()
|
||||
assert_array_equal(lca, numpy.asarray([[0, 0, 0], [0, 1, 2], [0, 2, 2]]))
|
||||
|
||||
|
||||
def test_span_similarity_match():
|
||||
doc = Doc(Vocab(), words=["a", "b", "a", "b"])
|
||||
|
|
|
@ -37,9 +37,9 @@ def test_en_noun_chunks_not_nested(en_vocab):
|
|||
[0, root],
|
||||
[4, amod],
|
||||
[3, nmod],
|
||||
[-1, cc],
|
||||
[-2, conj],
|
||||
[-5, dobj],
|
||||
[numpy.int32(-1).astype(numpy.uint64), cc],
|
||||
[numpy.int32(-2).astype(numpy.uint64), conj],
|
||||
[numpy.int32(-5).astype(numpy.uint64), dobj],
|
||||
],
|
||||
dtype="uint64",
|
||||
),
|
||||
|
|
|
@ -58,11 +58,12 @@ def get_doc(
|
|||
for annot in annotations:
|
||||
if annot:
|
||||
if annot is heads:
|
||||
annot = numpy.array(heads, dtype=numpy.int32).astype(numpy.uint64)
|
||||
for i in range(len(words)):
|
||||
if attrs.ndim == 1:
|
||||
attrs[i] = heads[i]
|
||||
attrs[i] = annot[i]
|
||||
else:
|
||||
attrs[i, j] = heads[i]
|
||||
attrs[i, j] = annot[i]
|
||||
else:
|
||||
for i in range(len(words)):
|
||||
if attrs.ndim == 1:
|
||||
|
|
|
@ -805,7 +805,7 @@ cdef class Doc:
|
|||
`(M, N)` array of attributes.
|
||||
|
||||
attrs (list) A list of attribute ID ints.
|
||||
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
|
||||
array (numpy.ndarray[ndim=2, dtype='uint64']): The attribute values.
|
||||
RETURNS (Doc): Itself.
|
||||
|
||||
DOCS: https://spacy.io/api/doc#from_array
|
||||
|
@ -845,9 +845,9 @@ cdef class Doc:
|
|||
col = attrs.index(HEAD)
|
||||
for i in range(length):
|
||||
# cast index to signed int
|
||||
abs_head_index = numpy.int32(array[i, col]) + i
|
||||
abs_head_index = array[i, col].astype(numpy.int32) + i
|
||||
if abs_head_index < 0 or abs_head_index >= length:
|
||||
raise ValueError(Errors.E190.format(index=i, value=array[i, col], rel_head_index=numpy.int32(array[i, col])))
|
||||
raise ValueError(Errors.E190.format(index=i, value=array[i, col], rel_head_index=abs_head_index-i))
|
||||
# Do TAG first. This lets subsequent loop override stuff like POS, LEMMA
|
||||
if TAG in attrs:
|
||||
col = attrs.index(TAG)
|
||||
|
@ -1351,7 +1351,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
|
|||
j_idx_in_sent = start + j - sent_start
|
||||
n_missing_tokens_in_sent = len(sent) - j_idx_in_sent
|
||||
# make sure we do not go past `end`, in cases where `end` < sent.end
|
||||
max_range = min(j + n_missing_tokens_in_sent, end)
|
||||
max_range = min(j + n_missing_tokens_in_sent, end - start)
|
||||
for k in range(j + 1, max_range):
|
||||
lca = _get_tokens_lca(token_j, doc[start + k])
|
||||
# if lca is outside of span, we set it to -1
|
||||
|
|
|
@ -272,7 +272,7 @@ cdef class Span:
|
|||
for ancestor in ancestors:
|
||||
ancestor_i = ancestor.i - self.start
|
||||
if ancestor_i in range(length):
|
||||
array[i, head_col] = ancestor_i - i
|
||||
array[i, head_col] = numpy.int32(ancestor_i - i).astype(numpy.uint64)
|
||||
|
||||
# if there is no appropriate ancestor, define a new artificial root
|
||||
value = array[i, head_col]
|
||||
|
@ -280,7 +280,7 @@ cdef class Span:
|
|||
new_root = old_to_new_root.get(ancestor_i, None)
|
||||
if new_root is not None:
|
||||
# take the same artificial root as a previous token from the same sentence
|
||||
array[i, head_col] = new_root - i
|
||||
array[i, head_col] = numpy.int32(new_root - i).astype(numpy.uint64)
|
||||
else:
|
||||
# set this token as the new artificial root
|
||||
array[i, head_col] = 0
|
||||
|
|
|
@ -68,7 +68,7 @@ function isStableVersion(v) {
|
|||
|
||||
function getLatestVersion(modelId, compatibility) {
|
||||
for (let [version, models] of Object.entries(compatibility)) {
|
||||
if (isStableVersion(version) && models[modelId]) {
|
||||
if (version.startsWith('2.') && isStableVersion(version) && models[modelId]) {
|
||||
const modelVersions = models[modelId]
|
||||
for (let modelVersion of modelVersions) {
|
||||
if (isStableVersion(modelVersion)) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user