mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-05 01:34:14 +03:00
Merge branch 'master' into docs/memory-management
This commit is contained in:
commit
c0a6696cba
20
.github/workflows/tests.yml
vendored
20
.github/workflows/tests.yml
vendored
|
@ -12,7 +12,6 @@ on:
|
|||
- "*.md"
|
||||
- "*.mdx"
|
||||
- "website/**"
|
||||
- ".github/workflows/**"
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
paths-ignore:
|
||||
|
@ -32,7 +31,7 @@ jobs:
|
|||
- name: Configure Python version
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.7"
|
||||
python-version: "3.10"
|
||||
|
||||
- name: black
|
||||
run: |
|
||||
|
@ -59,18 +58,7 @@ jobs:
|
|||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python_version: ["3.12"]
|
||||
include:
|
||||
- os: windows-latest
|
||||
python_version: "3.7"
|
||||
- os: macos-latest
|
||||
python_version: "3.8"
|
||||
- os: ubuntu-latest
|
||||
python_version: "3.9"
|
||||
- os: windows-latest
|
||||
python_version: "3.10"
|
||||
- os: macos-latest
|
||||
python_version: "3.11"
|
||||
python_version: ["3.9", "3.11", "3.12"]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
|
@ -159,7 +147,9 @@ jobs:
|
|||
- name: "Test assemble CLI"
|
||||
run: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
|
||||
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
env:
|
||||
PYTHONWARNINGS: "error,ignore::DeprecationWarning"
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test assemble CLI vectors warning"
|
||||
|
|
|
@ -35,7 +35,7 @@ so that more people can benefit from it.
|
|||
|
||||
When opening an issue, use a **descriptive title** and include your
|
||||
**environment** (operating system, Python version, spaCy version). Our
|
||||
[issue template](https://github.com/explosion/spaCy/issues/new) helps you
|
||||
[issue templates](https://github.com/explosion/spaCy/issues/new/choose) help you
|
||||
remember the most important details to include. If you've discovered a bug, you
|
||||
can also submit a [regression test](#fixing-bugs) straight away. When you're
|
||||
opening an issue to report the bug, simply refer to your pull request in the
|
||||
|
|
|
@ -3,7 +3,7 @@ spacy-legacy>=3.0.11,<3.1.0
|
|||
spacy-loggers>=1.0.0,<2.0.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.2.2,<8.3.0
|
||||
thinc>=8.3.0,<8.4.0
|
||||
ml_datasets>=0.2.0,<0.3.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
wasabi>=0.9.1,<1.2.0
|
||||
|
|
|
@ -17,8 +17,6 @@ classifiers =
|
|||
Operating System :: Microsoft :: Windows
|
||||
Programming Language :: Cython
|
||||
Programming Language :: Python :: 3
|
||||
Programming Language :: Python :: 3.7
|
||||
Programming Language :: Python :: 3.8
|
||||
Programming Language :: Python :: 3.9
|
||||
Programming Language :: Python :: 3.10
|
||||
Programming Language :: Python :: 3.11
|
||||
|
@ -31,7 +29,7 @@ project_urls =
|
|||
[options]
|
||||
zip_safe = false
|
||||
include_package_data = true
|
||||
python_requires = >=3.7
|
||||
python_requires = >=3.9
|
||||
# NOTE: This section is superseded by pyproject.toml and will be removed in
|
||||
# spaCy v4
|
||||
setup_requires =
|
||||
|
@ -116,7 +114,7 @@ cuda12x =
|
|||
cuda-autodetect =
|
||||
cupy-wheel>=11.0.0,<13.0.0
|
||||
apple =
|
||||
thinc-apple-ops>=0.1.0.dev0,<1.0.0
|
||||
thinc-apple-ops>=1.0.0,<2.0.0
|
||||
# Language tokenizers with external dependencies
|
||||
ja =
|
||||
sudachipy>=0.5.2,!=0.6.1
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# fmt: off
|
||||
__title__ = "spacy"
|
||||
__version__ = "3.8.0"
|
||||
__version__ = "3.8.2"
|
||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
The list of Croatian lemmas was extracted from the reldi-tagger repository (https://github.com/clarinsi/reldi-tagger).
|
||||
Reldi-tagger is licesned under the Apache 2.0 licence.
|
||||
Reldi-tagger is licensed under the Apache 2.0 licence.
|
||||
|
||||
@InProceedings{ljubesic16-new,
|
||||
author = {Nikola Ljubešić and Filip Klubička and Željko Agić and Ivo-Pavao Jazbec},
|
||||
|
@ -12,4 +12,4 @@ Reldi-tagger is licesned under the Apache 2.0 licence.
|
|||
publisher = {European Language Resources Association (ELRA)},
|
||||
address = {Paris, France},
|
||||
isbn = {978-2-9517408-9-1}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,6 @@ from contextlib import ExitStack, contextmanager
|
|||
from copy import deepcopy
|
||||
from dataclasses import dataclass
|
||||
from itertools import chain, cycle
|
||||
import numpy
|
||||
from pathlib import Path
|
||||
from timeit import default_timer as timer
|
||||
from typing import (
|
||||
|
@ -31,6 +30,7 @@ from typing import (
|
|||
overload,
|
||||
)
|
||||
|
||||
import numpy
|
||||
import srsly
|
||||
from cymem.cymem import Pool
|
||||
from thinc.api import Config, CupyOps, Optimizer, get_current_ops
|
||||
|
@ -2143,7 +2143,9 @@ class Language:
|
|||
serializers["tokenizer"] = lambda p: self.tokenizer.to_disk( # type: ignore[union-attr]
|
||||
p, exclude=["vocab"]
|
||||
)
|
||||
serializers["meta.json"] = lambda p: srsly.write_json(p, _replace_numpy_floats(self.meta))
|
||||
serializers["meta.json"] = lambda p: srsly.write_json(
|
||||
p, _replace_numpy_floats(self.meta)
|
||||
)
|
||||
serializers["config.cfg"] = lambda p: self.config.to_disk(p)
|
||||
for name, proc in self._components:
|
||||
if name in exclude:
|
||||
|
@ -2257,7 +2259,9 @@ class Language:
|
|||
serializers: Dict[str, Callable[[], bytes]] = {}
|
||||
serializers["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude)
|
||||
serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"]) # type: ignore[union-attr]
|
||||
serializers["meta.json"] = lambda: srsly.json_dumps(_replace_numpy_floats(self.meta))
|
||||
serializers["meta.json"] = lambda: srsly.json_dumps(
|
||||
_replace_numpy_floats(self.meta)
|
||||
)
|
||||
serializers["config.cfg"] = lambda: self.config.to_bytes()
|
||||
for name, proc in self._components:
|
||||
if name in exclude:
|
||||
|
@ -2309,7 +2313,9 @@ class Language:
|
|||
|
||||
|
||||
def _replace_numpy_floats(meta_dict: dict) -> dict:
|
||||
return convert_recursive(lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict))
|
||||
return convert_recursive(
|
||||
lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict)
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
@ -1597,7 +1597,7 @@ The name of the model to be used has to be passed in via the `name` attribute.
|
|||
|
||||
| Argument | Description |
|
||||
| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `name` | The name of a mdodel supported by LangChain for this API. ~~str~~ |
|
||||
| `name` | The name of a model supported by LangChain for this API. ~~str~~ |
|
||||
| `config` | Configuration passed on to the LangChain model. Defaults to `{}`. ~~Dict[Any, Any]~~ |
|
||||
| `query` | Function that executes the prompts. If `None`, defaults to `spacy.CallLangChain.v1`. ~~Optional[Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]]]~~ |
|
||||
|
||||
|
|
|
@ -720,7 +720,7 @@ matches = matcher(doc)
|
|||
|
||||
# Serve visualization of sentences containing match with displaCy
|
||||
# set manual=True to make displaCy render straight from a dictionary
|
||||
# (if you're not running the code within a Jupyer environment, you can
|
||||
# (if you're not running the code within a Jupyter environment, you can
|
||||
# use displacy.serve instead)
|
||||
displacy.render(matched_sents, style="ent", manual=True)
|
||||
```
|
||||
|
|
|
@ -276,6 +276,47 @@
|
|||
"ancient Greek"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "solipcysme",
|
||||
"title": "solipCysme",
|
||||
"slogan": "spaCy pipeline for french fictions and first person point of view texts.",
|
||||
"description": "__solipCysme__ is a pipeline for french language, designed for the analysis of fictions and first person point of view texts, with a focus on personal pronouns.",
|
||||
"github": "thjbdvlt/solipCysme",
|
||||
"code_example": [
|
||||
"pip install https://huggingface.co/thjbdvlt/fr_solipcysme/resolve/main/fr_solipcysme-any-py3-none-any.whl",
|
||||
"",
|
||||
"import spacy",
|
||||
"",
|
||||
"nlp = spacy.load('fr_solipcysme')",
|
||||
"for i in nlp(",
|
||||
"'la MACHINE à (b)rouiller le temps s'est peut-être déraillée..?'",
|
||||
"):",
|
||||
" print(",
|
||||
" i, ",
|
||||
" i.norm_, ",
|
||||
" i.pos_, ",
|
||||
" i.morph, ",
|
||||
" i.lemma_, ",
|
||||
" i.dep_, ",
|
||||
" i._.tokentype,",
|
||||
" i._.vv_pos,",
|
||||
" i._.vv_morph",
|
||||
" )"
|
||||
],
|
||||
"code_language": "python",
|
||||
"author": "thjbdvlt",
|
||||
"author_links": {
|
||||
"github": "thjbdvlt"
|
||||
},
|
||||
"category": [
|
||||
"pipeline",
|
||||
"research",
|
||||
"models"
|
||||
],
|
||||
"tags": [
|
||||
"french"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "spacy-cleaner",
|
||||
"title": "spacy-cleaner",
|
||||
|
@ -2587,6 +2628,20 @@
|
|||
"courses"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "education",
|
||||
"id": "spacy-quickstart",
|
||||
"title": "spaCy Quickstart",
|
||||
"slogan": "Learn spaCy basics quickly by visualizing various Doc objects",
|
||||
"description": "In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library.",
|
||||
"url": "https://learnspacy.com/courses/spacy-quickstart/",
|
||||
"image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png",
|
||||
"thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png",
|
||||
"author": "Aravind Mohanoor",
|
||||
"category": [
|
||||
"courses"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "education",
|
||||
"id": "video-spacys-ner-model",
|
||||
|
|
|
@ -87,6 +87,9 @@
|
|||
margin-bottom: 0
|
||||
height: 100%
|
||||
|
||||
a, a:hover
|
||||
color: inherit
|
||||
|
||||
.banner-content-small
|
||||
display: block
|
||||
margin-bottom: 0 !important
|
||||
|
|
Loading…
Reference in New Issue
Block a user