mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-13 01:32:32 +03:00
This PR removes the dependency on langcodes introduced in #9342. While the introduction of langcodes allows a significantly wider range of language codes, there are some unexpected side effects: zh-Hant (Traditional Chinese) should be mapped to zh intead of None, as spaCy's Chinese model is based on pkuseg which supports tokenization of both Simplified and Traditional Chinese. Since it is possible that spaCy may have a model for Norwegian Nynorsk in the future, mapping no (macrolanguage Norwegian) to nb (Norwegian Bokmål) might be misleading. In that case, the user should be asked to specify nb or nn (Norwegian Nynorsk) specifically or consult the doc. Same as above for regional variants of languages such as en_gb and en_us. Overall, IMHO, introducing an extra dependency just for the conversion of language codes is an overkill. It is possible that most user just need the conversion between 2/3-letter ISO codes and a simple dictionary lookup should suffice. With this PR, ISO 639-1 and ISO 639-3 codes are supported. ISO 639-2/B (bibliographic codes which are not favored and used in ISO 639-3) and deprecated ISO 639-1/2 codes are also supported to maximize backward compatibility.
153 lines
3.7 KiB
INI
153 lines
3.7 KiB
INI
[metadata]
|
|
description = Industrial-strength Natural Language Processing (NLP) in Python
|
|
url = https://spacy.io
|
|
author = Explosion
|
|
author_email = contact@explosion.ai
|
|
license = MIT
|
|
long_description = file: README.md
|
|
long_description_content_type = text/markdown
|
|
classifiers =
|
|
Development Status :: 5 - Production/Stable
|
|
Environment :: Console
|
|
Intended Audience :: Developers
|
|
Intended Audience :: Science/Research
|
|
License :: OSI Approved :: MIT License
|
|
Operating System :: POSIX :: Linux
|
|
Operating System :: MacOS :: MacOS X
|
|
Operating System :: Microsoft :: Windows
|
|
Programming Language :: Cython
|
|
Programming Language :: Python :: 3
|
|
Programming Language :: Python :: 3.9
|
|
Programming Language :: Python :: 3.10
|
|
Programming Language :: Python :: 3.11
|
|
Programming Language :: Python :: 3.12
|
|
Programming Language :: Python :: 3.13
|
|
Topic :: Scientific/Engineering
|
|
project_urls =
|
|
Release notes = https://github.com/explosion/spaCy/releases
|
|
Source = https://github.com/explosion/spaCy
|
|
|
|
[options]
|
|
zip_safe = false
|
|
include_package_data = true
|
|
python_requires = >=3.9,<3.14
|
|
# NOTE: This section is superseded by pyproject.toml and will be removed in
|
|
# spaCy v4
|
|
setup_requires =
|
|
cython>=3.0,<4.0
|
|
numpy>=2.0.0,<3.0.0; python_version < "3.9"
|
|
numpy>=2.0.0,<3.0.0; python_version >= "3.9"
|
|
# We also need our Cython packages here to compile against
|
|
cymem>=2.0.2,<2.1.0
|
|
preshed>=3.0.2,<3.1.0
|
|
murmurhash>=0.28.0,<1.1.0
|
|
thinc>=8.3.4,<8.4.0
|
|
install_requires =
|
|
# Our libraries
|
|
spacy-legacy>=3.0.11,<3.1.0
|
|
spacy-loggers>=1.0.0,<2.0.0
|
|
murmurhash>=0.28.0,<1.1.0
|
|
cymem>=2.0.2,<2.1.0
|
|
preshed>=3.0.2,<3.1.0
|
|
thinc>=8.3.4,<8.4.0
|
|
wasabi>=0.9.1,<1.2.0
|
|
srsly>=2.4.3,<3.0.0
|
|
catalogue>=2.0.6,<2.1.0
|
|
weasel>=0.1.0,<0.5.0
|
|
# Third-party dependencies
|
|
typer-slim>=0.3.0,<1.0.0
|
|
tqdm>=4.38.0,<5.0.0
|
|
numpy>=1.15.0; python_version < "3.9"
|
|
numpy>=1.19.0; python_version >= "3.9"
|
|
requests>=2.13.0,<3.0.0
|
|
pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
|
|
jinja2
|
|
# Official Python utilities
|
|
setuptools
|
|
packaging>=20.0
|
|
|
|
[options.entry_points]
|
|
console_scripts =
|
|
spacy = spacy.cli:setup_cli
|
|
|
|
[options.extras_require]
|
|
lookups =
|
|
spacy_lookups_data>=1.0.3,<1.1.0
|
|
transformers =
|
|
spacy_transformers>=1.1.2,<1.4.0
|
|
cuda =
|
|
cupy>=5.0.0b4,<13.0.0
|
|
cuda80 =
|
|
cupy-cuda80>=5.0.0b4,<13.0.0
|
|
cuda90 =
|
|
cupy-cuda90>=5.0.0b4,<13.0.0
|
|
cuda91 =
|
|
cupy-cuda91>=5.0.0b4,<13.0.0
|
|
cuda92 =
|
|
cupy-cuda92>=5.0.0b4,<13.0.0
|
|
cuda100 =
|
|
cupy-cuda100>=5.0.0b4,<13.0.0
|
|
cuda101 =
|
|
cupy-cuda101>=5.0.0b4,<13.0.0
|
|
cuda102 =
|
|
cupy-cuda102>=5.0.0b4,<13.0.0
|
|
cuda110 =
|
|
cupy-cuda110>=5.0.0b4,<13.0.0
|
|
cuda111 =
|
|
cupy-cuda111>=5.0.0b4,<13.0.0
|
|
cuda112 =
|
|
cupy-cuda112>=5.0.0b4,<13.0.0
|
|
cuda113 =
|
|
cupy-cuda113>=5.0.0b4,<13.0.0
|
|
cuda114 =
|
|
cupy-cuda114>=5.0.0b4,<13.0.0
|
|
cuda115 =
|
|
cupy-cuda115>=5.0.0b4,<13.0.0
|
|
cuda116 =
|
|
cupy-cuda116>=5.0.0b4,<13.0.0
|
|
cuda117 =
|
|
cupy-cuda117>=5.0.0b4,<13.0.0
|
|
cuda11x =
|
|
cupy-cuda11x>=11.0.0,<13.0.0
|
|
cuda12x =
|
|
cupy-cuda12x>=11.5.0,<13.0.0
|
|
cuda-autodetect =
|
|
cupy-wheel>=11.0.0,<13.0.0
|
|
apple =
|
|
thinc-apple-ops>=1.0.0,<2.0.0
|
|
# Language tokenizers with external dependencies
|
|
ja =
|
|
sudachipy>=0.5.2,!=0.6.1
|
|
sudachidict_core>=20211220
|
|
ko =
|
|
natto-py>=0.9.0
|
|
th =
|
|
pythainlp>=2.0
|
|
|
|
[bdist_wheel]
|
|
universal = false
|
|
|
|
[sdist]
|
|
formats = gztar
|
|
|
|
[flake8]
|
|
ignore = E203, E266, E501, E731, W503, E741, F541
|
|
max-line-length = 80
|
|
select = B,C,E,F,W,T4,B9
|
|
exclude =
|
|
.env,
|
|
.git,
|
|
__pycache__,
|
|
_tokenizer_exceptions_list.py,
|
|
|
|
[tool:pytest]
|
|
markers =
|
|
slow: mark a test as slow
|
|
issue: reference specific issue
|
|
|
|
[mypy]
|
|
ignore_missing_imports = True
|
|
no_implicit_optional = True
|
|
plugins = pydantic.mypy, thinc.mypy
|
|
allow_redefinition = True
|