Merge pull request #13107 from adrianeboyd/chore/update-develop-from-master-v3.8-1

Update develop from master for v3.8
This commit is contained in:
Adriane Boyd 2023-11-05 16:12:08 +01:00 committed by GitHub
commit 7174588155
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
60 changed files with 884 additions and 810 deletions

View File

@ -58,7 +58,7 @@ jobs:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-latest, windows-latest, macos-latest] os: [ubuntu-latest, windows-latest, macos-latest]
python_version: ["3.11", "3.12.0-rc.2"] python_version: ["3.12"]
include: include:
- os: windows-latest - os: windows-latest
python_version: "3.7" python_version: "3.7"
@ -68,6 +68,8 @@ jobs:
python_version: "3.9" python_version: "3.9"
- os: windows-latest - os: windows-latest
python_version: "3.10" python_version: "3.10"
- os: macos-latest
python_version: "3.11"
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
@ -115,22 +117,22 @@ jobs:
- name: Test import - name: Test import
run: python -W error -c "import spacy" run: python -W error -c "import spacy"
# - name: "Test download CLI" - name: "Test download CLI"
# run: | run: |
# python -m spacy download ca_core_news_sm python -m spacy download ca_core_news_sm
# python -m spacy download ca_core_news_md python -m spacy download ca_core_news_md
# python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')" python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
# if: matrix.python_version == '3.9' if: matrix.python_version == '3.9'
#
# - name: "Test download_url in info CLI" - name: "Test download_url in info CLI"
# run: | run: |
# python -W error -m spacy info ca_core_news_sm | grep -q download_url python -W error -m spacy info ca_core_news_sm | grep -q download_url
# if: matrix.python_version == '3.9' if: matrix.python_version == '3.9'
#
# - name: "Test no warnings on load (#11713)" - name: "Test no warnings on load (#11713)"
# run: | run: |
# python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')" python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
# if: matrix.python_version == '3.9' if: matrix.python_version == '3.9'
- name: "Test convert CLI" - name: "Test convert CLI"
run: | run: |
@ -154,17 +156,17 @@ jobs:
python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1 python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
if: matrix.python_version == '3.9' if: matrix.python_version == '3.9'
# - name: "Test assemble CLI" - name: "Test assemble CLI"
# run: | run: |
# python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')" python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
# PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
# if: matrix.python_version == '3.9' if: matrix.python_version == '3.9'
#
# - name: "Test assemble CLI vectors warning" - name: "Test assemble CLI vectors warning"
# run: | run: |
# python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')" python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
# python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113 python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
# if: matrix.python_version == '3.9' if: matrix.python_version == '3.9'
- name: "Install test requirements" - name: "Install test requirements"
run: | run: |

View File

@ -1,6 +1,6 @@
The MIT License (MIT) The MIT License (MIT)
Copyright (C) 2016-2022 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal Copyright (C) 2016-2023 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@ -16,7 +16,7 @@ model packaging, deployment and workflow management. spaCy is commercial
open-source software, released under the open-source software, released under the
[MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE). [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
💫 **Version 3.6 out now!** 💫 **Version 3.7 out now!**
[Check out the release notes here.](https://github.com/explosion/spaCy/releases) [Check out the release notes here.](https://github.com/explosion/spaCy/releases)
[![tests](https://github.com/explosion/spaCy/actions/workflows/tests.yml/badge.svg)](https://github.com/explosion/spaCy/actions/workflows/tests.yml) [![tests](https://github.com/explosion/spaCy/actions/workflows/tests.yml/badge.svg)](https://github.com/explosion/spaCy/actions/workflows/tests.yml)

View File

@ -10,7 +10,6 @@ wasabi>=0.9.1,<1.2.0
srsly>=2.4.3,<3.0.0 srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0 catalogue>=2.0.6,<2.1.0
typer>=0.3.0,<0.10.0 typer>=0.3.0,<0.10.0
pathy>=0.10.0
smart-open>=5.2.1,<7.0.0 smart-open>=5.2.1,<7.0.0
weasel>=0.1.0,<0.4.0 weasel>=0.1.0,<0.4.0
# Third party dependencies # Third party dependencies

View File

@ -56,7 +56,6 @@ install_requires =
weasel>=0.1.0,<0.4.0 weasel>=0.1.0,<0.4.0
# Third-party dependencies # Third-party dependencies
typer>=0.3.0,<0.10.0 typer>=0.3.0,<0.10.0
pathy>=0.10.0
smart-open>=5.2.1,<7.0.0 smart-open>=5.2.1,<7.0.0
tqdm>=4.38.0,<5.0.0 tqdm>=4.38.0,<5.0.0
numpy>=1.15.0; python_version < "3.9" numpy>=1.15.0; python_version < "3.9"

View File

@ -13,6 +13,7 @@ from thinc.api import Config, prefer_gpu, require_cpu, require_gpu # noqa: F401
from . import pipeline # noqa: F401 from . import pipeline # noqa: F401
from . import util from . import util
from .about import __version__ # noqa: F401 from .about import __version__ # noqa: F401
from .cli.info import info # noqa: F401
from .errors import Errors from .errors import Errors
from .glossary import explain # noqa: F401 from .glossary import explain # noqa: F401
from .language import Language from .language import Language
@ -76,9 +77,3 @@ def blank(
# We should accept both dot notation and nested dict here for consistency # We should accept both dot notation and nested dict here for consistency
config = util.dot_to_dict(config) config = util.dot_to_dict(config)
return LangClass.from_config(config, vocab=vocab, meta=meta) return LangClass.from_config(config, vocab=vocab, meta=meta)
def info(*args, **kwargs):
from .cli.info import info as cli_info
return cli_info(*args, **kwargs)

View File

@ -1,5 +1,5 @@
# fmt: off # fmt: off
__title__ = "spacy" __title__ = "spacy"
__version__ = "3.7.0" __version__ = "3.7.2"
__download_url__ = "https://github.com/explosion/spacy-models/releases/download" __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

View File

@ -22,8 +22,17 @@ from .init_pipeline import init_pipeline_cli # noqa: F401
from .package import package # noqa: F401 from .package import package # noqa: F401
from .pretrain import pretrain # noqa: F401 from .pretrain import pretrain # noqa: F401
from .profile import profile # noqa: F401 from .profile import profile # noqa: F401
from .train import train_cli # noqa: F401 from .project.assets import project_assets # type: ignore[attr-defined] # noqa: F401
from .validate import validate # noqa: F401 from .project.clone import project_clone # type: ignore[attr-defined] # noqa: F401
from .project.document import ( # type: ignore[attr-defined] # noqa: F401
project_document,
)
from .project.dvc import project_update_dvc # type: ignore[attr-defined] # noqa: F401
from .project.pull import project_pull # type: ignore[attr-defined] # noqa: F401
from .project.push import project_push # type: ignore[attr-defined] # noqa: F401
from .project.run import project_run # type: ignore[attr-defined] # noqa: F401
from .train import train_cli # type: ignore[attr-defined] # noqa: F401
from .validate import validate # type: ignore[attr-defined] # noqa: F401
@app.command("link", no_args_is_help=True, deprecated=True, hidden=True) @app.command("link", no_args_is_help=True, deprecated=True, hidden=True)

View File

@ -41,10 +41,6 @@ from ..util import (
run_command, run_command,
) )
if TYPE_CHECKING:
from pathy import FluidPath # noqa: F401
SDIST_SUFFIX = ".tar.gz" SDIST_SUFFIX = ".tar.gz"
WHEEL_SUFFIX = "-py3-none-any.whl" WHEEL_SUFFIX = "-py3-none-any.whl"

View File

View File

@ -0,0 +1 @@
from weasel.cli.assets import *

View File

@ -0,0 +1 @@
from weasel.cli.clone import *

View File

@ -0,0 +1 @@
from weasel.cli.document import *

1
spacy/cli/project/dvc.py Normal file
View File

@ -0,0 +1 @@
from weasel.cli.dvc import *

View File

@ -0,0 +1 @@
from weasel.cli.pull import *

View File

@ -0,0 +1 @@
from weasel.cli.push import *

View File

@ -0,0 +1 @@
from weasel.cli.remote_storage import *

1
spacy/cli/project/run.py Normal file
View File

@ -0,0 +1 @@
from weasel.cli.run import *

View File

@ -142,7 +142,25 @@ class SpanRenderer:
spans (list): Individual entity spans and their start, end, label, kb_id and kb_url. spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
title (str / None): Document title set in Doc.user_data['title']. title (str / None): Document title set in Doc.user_data['title'].
""" """
per_token_info = [] per_token_info = self._assemble_per_token_info(tokens, spans)
markup = self._render_markup(per_token_info)
markup = TPL_SPANS.format(content=markup, dir=self.direction)
if title:
markup = TPL_TITLE.format(title=title) + markup
return markup
@staticmethod
def _assemble_per_token_info(
tokens: List[str], spans: List[Dict[str, Any]]
) -> List[Dict[str, List[Dict[str, Any]]]]:
"""Assembles token info used to generate markup in render_spans().
tokens (List[str]): Tokens in text.
spans (List[Dict[str, Any]]): Spans in text.
RETURNS (List[Dict[str, List[Dict, str, Any]]]): Per token info needed to render HTML markup for given tokens
and spans.
"""
per_token_info: List[Dict[str, List[Dict[str, Any]]]] = []
# we must sort so that we can correctly describe when spans need to "stack" # we must sort so that we can correctly describe when spans need to "stack"
# which is determined by their start token, then span length (longer spans on top), # which is determined by their start token, then span length (longer spans on top),
# then break any remaining ties with the span label # then break any remaining ties with the span label
@ -154,21 +172,22 @@ class SpanRenderer:
s["label"], s["label"],
), ),
) )
for s in spans: for s in spans:
# this is the vertical 'slot' that the span will be rendered in # this is the vertical 'slot' that the span will be rendered in
# vertical_position = span_label_offset + (offset_step * (slot - 1)) # vertical_position = span_label_offset + (offset_step * (slot - 1))
s["render_slot"] = 0 s["render_slot"] = 0
for idx, token in enumerate(tokens): for idx, token in enumerate(tokens):
# Identify if a token belongs to a Span (and which) and if it's a # Identify if a token belongs to a Span (and which) and if it's a
# start token of said Span. We'll use this for the final HTML render # start token of said Span. We'll use this for the final HTML render
token_markup: Dict[str, Any] = {} token_markup: Dict[str, Any] = {}
token_markup["text"] = token token_markup["text"] = token
concurrent_spans = 0 intersecting_spans: List[Dict[str, Any]] = []
entities = [] entities = []
for span in spans: for span in spans:
ent = {} ent = {}
if span["start_token"] <= idx < span["end_token"]: if span["start_token"] <= idx < span["end_token"]:
concurrent_spans += 1
span_start = idx == span["start_token"] span_start = idx == span["start_token"]
ent["label"] = span["label"] ent["label"] = span["label"]
ent["is_start"] = span_start ent["is_start"] = span_start
@ -176,7 +195,12 @@ class SpanRenderer:
# When the span starts, we need to know how many other # When the span starts, we need to know how many other
# spans are on the 'span stack' and will be rendered. # spans are on the 'span stack' and will be rendered.
# This value becomes the vertical render slot for this entire span # This value becomes the vertical render slot for this entire span
span["render_slot"] = concurrent_spans span["render_slot"] = (
intersecting_spans[-1]["render_slot"]
if len(intersecting_spans)
else 0
) + 1
intersecting_spans.append(span)
ent["render_slot"] = span["render_slot"] ent["render_slot"] = span["render_slot"]
kb_id = span.get("kb_id", "") kb_id = span.get("kb_id", "")
kb_url = span.get("kb_url", "#") kb_url = span.get("kb_url", "#")
@ -193,11 +217,8 @@ class SpanRenderer:
span["render_slot"] = 0 span["render_slot"] = 0
token_markup["entities"] = entities token_markup["entities"] = entities
per_token_info.append(token_markup) per_token_info.append(token_markup)
markup = self._render_markup(per_token_info)
markup = TPL_SPANS.format(content=markup, dir=self.direction) return per_token_info
if title:
markup = TPL_TITLE.format(title=title) + markup
return markup
def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str: def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str:
"""Render the markup from per-token information""" """Render the markup from per-token information"""

View File

@ -1,3 +1,11 @@
from .candidate import Candidate, get_candidates, get_candidates_batch from .candidate import Candidate, get_candidates, get_candidates_batch
from .kb import KnowledgeBase from .kb import KnowledgeBase
from .kb_in_memory import InMemoryLookupKB from .kb_in_memory import InMemoryLookupKB
__all__ = [
"Candidate",
"KnowledgeBase",
"InMemoryLookupKB",
"get_candidates",
"get_candidates_batch",
]

View File

@ -3,4 +3,4 @@ from .levenshtein import levenshtein
from .matcher import Matcher from .matcher import Matcher
from .phrasematcher import PhraseMatcher from .phrasematcher import PhraseMatcher
__all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein"] __all__ = ["DependencyMatcher", "Matcher", "PhraseMatcher", "levenshtein"]

View File

@ -22,6 +22,7 @@ from .trainable_pipe import TrainablePipe
__all__ = [ __all__ = [
"AttributeRuler", "AttributeRuler",
"DependencyParser", "DependencyParser",
"EditTreeLemmatizer",
"EntityLinker", "EntityLinker",
"EntityRecognizer", "EntityRecognizer",
"EntityRuler", "EntityRuler",

View File

@ -731,3 +731,12 @@ def test_for_no_ent_sents():
sents = list(doc.ents[0].sents) sents = list(doc.ents[0].sents)
assert len(sents) == 1 assert len(sents) == 1
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY" assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
def test_span_api_richcmp_other(en_tokenizer):
doc1 = en_tokenizer("a b")
doc2 = en_tokenizer("b c")
assert not doc1[1:2] == doc1[1]
assert not doc1[1:2] == doc2[0]
assert not doc1[1:2] == doc2[0:1]
assert not doc1[0:1] == doc2

View File

@ -294,3 +294,12 @@ def test_missing_head_dep(en_vocab):
assert aligned_heads[0] == ref_heads[0] assert aligned_heads[0] == ref_heads[0]
assert aligned_deps[5] == ref_deps[5] assert aligned_deps[5] == ref_deps[5]
assert aligned_heads[5] == ref_heads[5] assert aligned_heads[5] == ref_heads[5]
def test_token_api_richcmp_other(en_tokenizer):
doc1 = en_tokenizer("a b")
doc2 = en_tokenizer("b c")
assert not doc1[1] == doc1[0:1]
assert not doc1[1] == doc2[1:2]
assert not doc1[1] == doc2[0]
assert not doc1[0] == doc2

View File

@ -12,7 +12,6 @@ from thinc.api import Config
import spacy import spacy
from spacy import about from spacy import about
from spacy import info as spacy_info
from spacy.cli import info from spacy.cli import info
from spacy.cli._util import parse_config_overrides, string_to_list, walk_directory from spacy.cli._util import parse_config_overrides, string_to_list, walk_directory
from spacy.cli.apply import apply from spacy.cli.apply import apply
@ -193,9 +192,6 @@ def test_cli_info():
raw_data = info(tmp_dir, exclude=[""]) raw_data = info(tmp_dir, exclude=[""])
assert raw_data["lang"] == "nl" assert raw_data["lang"] == "nl"
assert raw_data["components"] == ["textcat"] assert raw_data["components"] == ["textcat"]
raw_data = spacy_info(tmp_dir, exclude=[""])
assert raw_data["lang"] == "nl"
assert raw_data["components"] == ["textcat"]
def test_cli_converters_conllu_to_docs(): def test_cli_converters_conllu_to_docs():
@ -538,7 +534,6 @@ def test_string_to_list_intify(value):
assert string_to_list(value, intify=True) == [1, 2, 3] assert string_to_list(value, intify=True) == [1, 2, 3]
@pytest.mark.skip(reason="Temporarily skip before 3.7 models are published")
def test_download_compatibility(): def test_download_compatibility():
spec = SpecifierSet("==" + about.__version__) spec = SpecifierSet("==" + about.__version__)
spec.prereleases = False spec.prereleases = False
@ -549,7 +544,6 @@ def test_download_compatibility():
assert get_minor_version(about.__version__) == get_minor_version(version) assert get_minor_version(about.__version__) == get_minor_version(version)
@pytest.mark.skip(reason="Temporarily skip before 3.7 models are published")
def test_validate_compatibility_table(): def test_validate_compatibility_table():
spec = SpecifierSet("==" + about.__version__) spec = SpecifierSet("==" + about.__version__)
spec.prereleases = False spec.prereleases = False
@ -1067,3 +1061,8 @@ def test_debug_data_trainable_lemmatizer_not_annotated():
data = _compile_gold(train_examples, ["trainable_lemmatizer"], nlp, True) data = _compile_gold(train_examples, ["trainable_lemmatizer"], nlp, True)
assert data["no_lemma_annotations"] == 2 assert data["no_lemma_annotations"] == 2
def test_project_api_imports():
from spacy.cli import project_run
from spacy.cli.project.run import project_run # noqa: F401, F811

View File

@ -2,7 +2,7 @@ import numpy
import pytest import pytest
from spacy import displacy from spacy import displacy
from spacy.displacy.render import DependencyRenderer, EntityRenderer from spacy.displacy.render import DependencyRenderer, EntityRenderer, SpanRenderer
from spacy.lang.en import English from spacy.lang.en import English
from spacy.lang.fa import Persian from spacy.lang.fa import Persian
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
@ -468,3 +468,23 @@ def test_issue12816(en_vocab) -> None:
# Verify that the HTML tag is still escaped # Verify that the HTML tag is still escaped
html = displacy.render(doc, style="span") html = displacy.render(doc, style="span")
assert "&lt;TEST&gt;" in html assert "&lt;TEST&gt;" in html
@pytest.mark.issue(13056)
def test_displacy_span_stacking():
"""Test whether span stacking works properly for multiple overlapping spans."""
spans = [
{"start_token": 2, "end_token": 5, "label": "SkillNC"},
{"start_token": 0, "end_token": 2, "label": "Skill"},
{"start_token": 1, "end_token": 3, "label": "Skill"},
]
tokens = ["Welcome", "to", "the", "Bank", "of", "China", "."]
per_token_info = SpanRenderer._assemble_per_token_info(spans=spans, tokens=tokens)
assert len(per_token_info) == len(tokens)
assert all([len(per_token_info[i]["entities"]) == 1 for i in (0, 3, 4)])
assert all([len(per_token_info[i]["entities"]) == 2 for i in (1, 2)])
assert per_token_info[1]["entities"][0]["render_slot"] == 1
assert per_token_info[1]["entities"][1]["render_slot"] == 2
assert per_token_info[2]["entities"][0]["render_slot"] == 2
assert per_token_info[2]["entities"][1]["render_slot"] == 3

View File

@ -5,4 +5,4 @@ from .span import Span
from .span_group import SpanGroup from .span_group import SpanGroup
from .token import Token from .token import Token
__all__ = ["Doc", "Token", "Span", "SpanGroup", "DocBin", "MorphAnalysis"] __all__ = ["Doc", "DocBin", "MorphAnalysis", "Span", "SpanGroup", "Token"]

View File

@ -127,14 +127,17 @@ cdef class Span:
self._vector = vector self._vector = vector
self._vector_norm = vector_norm self._vector_norm = vector_norm
def __richcmp__(self, Span other, int op): def __richcmp__(self, object other, int op):
if other is None: if other is None:
if op == 0 or op == 1 or op == 2: if op == 0 or op == 1 or op == 2:
return False return False
else: else:
return True return True
if not isinstance(other, Span):
return False
cdef Span other_span = other
self_tuple = (self.c.start_char, self.c.end_char, self.c.label, self.c.kb_id, self.id, self.doc) self_tuple = (self.c.start_char, self.c.end_char, self.c.label, self.c.kb_id, self.id, self.doc)
other_tuple = (other.c.start_char, other.c.end_char, other.c.label, other.c.kb_id, other.id, other.doc) other_tuple = (other_span.c.start_char, other_span.c.end_char, other_span.c.label, other_span.c.kb_id, other_span.id, other_span.doc)
# < # <
if op == 0: if op == 0:
return self_tuple < other_tuple return self_tuple < other_tuple

View File

@ -53,7 +53,12 @@ class Token:
def __bytes__(self) -> bytes: ... def __bytes__(self) -> bytes: ...
def __str__(self) -> str: ... def __str__(self) -> str: ...
def __repr__(self) -> str: ... def __repr__(self) -> str: ...
def __richcmp__(self, other: Token, op: int) -> bool: ... def __lt__(self, other: Any) -> bool: ...
def __le__(self, other: Any) -> bool: ...
def __eq__(self, other: Any) -> bool: ...
def __ne__(self, other: Any) -> bool: ...
def __gt__(self, other: Any) -> bool: ...
def __ge__(self, other: Any) -> bool: ...
@property @property
def _(self) -> Underscore: ... def _(self) -> Underscore: ...
def nbor(self, i: int = ...) -> Token: ... def nbor(self, i: int = ...) -> Token: ...

View File

@ -139,17 +139,20 @@ cdef class Token:
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()
def __richcmp__(self, Token other, int op): def __richcmp__(self, object other, int op):
# http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html # http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
if other is None: if other is None:
if op in (0, 1, 2): if op in (0, 1, 2):
return False return False
else: else:
return True return True
if not isinstance(other, Token):
return False
cdef Token other_token = other
cdef Doc my_doc = self.doc cdef Doc my_doc = self.doc
cdef Doc other_doc = other.doc cdef Doc other_doc = other_token.doc
my = self.idx my = self.idx
their = other.idx their = other_token.idx
if op == 0: if op == 0:
return my < their return my < their
elif op == 2: elif op == 2:

View File

@ -16,3 +16,28 @@ from .iob_utils import ( # noqa: F401
tags_to_entities, tags_to_entities,
) )
from .loggers import console_logger # noqa: F401 from .loggers import console_logger # noqa: F401
__all__ = [
"Alignment",
"Corpus",
"Example",
"JsonlCorpus",
"PlainTextCorpus",
"biluo_tags_to_offsets",
"biluo_tags_to_spans",
"biluo_to_iob",
"create_copy_from_base_model",
"docs_to_json",
"dont_augment",
"iob_to_biluo",
"minibatch_by_padded_size",
"minibatch_by_words",
"offsets_to_biluo_tags",
"orth_variants_augmenter",
"read_json_file",
"remove_bilu_prefix",
"split_bilu_label",
"tags_to_entities",
"validate_get_examples",
"validate_examples",
]

View File

@ -1544,9 +1544,9 @@ obsolete files is left up to you.
Remotes can be defined in the `remotes` section of the Remotes can be defined in the `remotes` section of the
[`project.yml`](/usage/projects#project-yml). Under the hood, spaCy uses [`project.yml`](/usage/projects#project-yml). Under the hood, spaCy uses
[`Pathy`](https://github.com/justindujardin/pathy) to communicate with the [`cloudpathlib`](https://cloudpathlib.drivendata.org) to communicate with the
remote storages, so you can use any protocol that `Pathy` supports, including remote storages, so you can use any protocol that `cloudpathlib` supports,
[S3](https://aws.amazon.com/s3/), including [S3](https://aws.amazon.com/s3/),
[Google Cloud Storage](https://cloud.google.com/storage), and the local [Google Cloud Storage](https://cloud.google.com/storage), and the local
filesystem, although you may need to install extra dependencies to use certain filesystem, although you may need to install extra dependencies to use certain
protocols. protocols.

View File

@ -16,14 +16,6 @@ prototyping** and **prompting**, and turning unstructured responses into
## Config and implementation {id="config"} ## Config and implementation {id="config"}
An LLM component is implemented through the `LLMWrapper` class. It is accessible
through a generic `llm`
[component factory](https://spacy.io/usage/processing-pipelines#custom-components-factories)
as well as through task-specific component factories: `llm_ner`, `llm_spancat`, `llm_rel`,
`llm_textcat`, `llm_sentiment` and `llm_summarization`.
### LLMWrapper.\_\_init\_\_ {id="init",tag="method"}
> #### Example > #### Example
> >
> ```python > ```python
@ -32,13 +24,26 @@ as well as through task-specific component factories: `llm_ner`, `llm_spancat`,
> llm = nlp.add_pipe("llm", config=config) > llm = nlp.add_pipe("llm", config=config)
> >
> # Construction via add_pipe with a task-specific factory and default GPT3.5 model > # Construction via add_pipe with a task-specific factory and default GPT3.5 model
> llm = nlp.add_pipe("llm-ner") > llm = nlp.add_pipe("llm_ner")
>
> # Construction via add_pipe with a task-specific factory and custom model
> llm = nlp.add_pipe("llm_ner", config={"model": {"@llm_models": "spacy.Dolly.v1", "name": "dolly-v2-12b"}})
> >
> # Construction from class > # Construction from class
> from spacy_llm.pipeline import LLMWrapper > from spacy_llm.pipeline import LLMWrapper
> llm = LLMWrapper(vocab=nlp.vocab, task=task, model=model, cache=cache, save_io=True) > llm = LLMWrapper(vocab=nlp.vocab, task=task, model=model, cache=cache, save_io=True)
> ``` > ```
An LLM component is implemented through the `LLMWrapper` class. It is accessible
through a generic `llm`
[component factory](https://spacy.io/usage/processing-pipelines#custom-components-factories)
as well as through task-specific component factories: `llm_ner`, `llm_spancat`,
`llm_rel`, `llm_textcat`, `llm_sentiment` and `llm_summarization`. For these
factories, the GPT-3-5 model from OpenAI is used by default, but this can be
customized.
### LLMWrapper.\_\_init\_\_ {id="init",tag="method"}
Create a new pipeline instance. In your application, you would normally use a Create a new pipeline instance. In your application, you would normally use a
shortcut for this and instantiate the component using its string name and shortcut for this and instantiate the component using its string name and
[`nlp.add_pipe`](/api/language#add_pipe). [`nlp.add_pipe`](/api/language#add_pipe).
@ -255,9 +260,11 @@ prompting.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `template` | Custom prompt template to send to LLM model. Defaults to [summarization.v1.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/summarization.v1.jinja). ~~str~~ | | `template` | Custom prompt template to send to LLM model. Defaults to [summarization.v1.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/summarization.v1.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SummarizationTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SummarizationExample`. ~~Optional[Type[FewshotExample]]~~ |
| `max_n_words` | Maximum number of words to be used in summary. Note that this should not expected to work exactly. Defaults to `None`. ~~Optional[int]~~ | | `max_n_words` | Maximum number of words to be used in summary. Note that this should not expected to work exactly. Defaults to `None`. ~~Optional[int]~~ |
| `field` | Name of extension attribute to store summary in (i. e. the summary will be available in `doc._.{field}`). Defaults to `summary`. ~~str~~ | | `field` | Name of extension attribute to store summary in (i. e. the summary will be available in `doc._.{field}`). Defaults to `summary`. ~~str~~ |
@ -326,12 +333,15 @@ the v3 implementation will use a dummy example in the prompt. Technically this
means that the task will always perform few-shot prompting under the hood. means that the task will always perform few-shot prompting under the hood.
| Argument | Description | | Argument | Description |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `template` | Custom prompt template to send to LLM model. Defaults to [ner.v3.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/ner.v3.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[NERTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `NERExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ | | `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ | | `label_definitions` | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `template` | Custom prompt template to send to LLM model. Defaults to [ner.v3.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/ner.v3.jinja). ~~str~~ |
| `description` (NEW) | A description of what to recognize or not recognize as entities. ~~str~~ | | `description` (NEW) | A description of what to recognize or not recognize as entities. ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ | | `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ | | `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
@ -416,11 +426,14 @@ v1.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` (NEW) | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [ner.v2.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/ner.v2.jinja). ~~str~~ | | `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [ner.v2.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/ner.v2.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[NERTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `NERExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` (NEW) | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ | | `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ | | `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
@ -468,9 +481,12 @@ few-shot prompting.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `labels` | Comma-separated list of labels. ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[NERTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `NERExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | Comma-separated list of labels. ~~str~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ | | `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ | | `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
@ -540,13 +556,16 @@ support overlapping entities and store its annotations in `doc.spans`.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `template` | Custom prompt template to send to LLM model. Defaults to [`spancat.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v3.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SpanCatExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ | | `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ | | `label_definitions` | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `template` | Custom prompt template to send to LLM model. Defaults to [`spancat.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v3.jinja). ~~str~~ |
| `description` (NEW) | A description of what to recognize or not recognize as entities. ~~str~~ | | `description` (NEW) | A description of what to recognize or not recognize as entities. ~~str~~ |
| `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ | | `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ | | `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ | | `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
@ -569,12 +588,15 @@ support overlapping entities and store its annotations in `doc.spans`.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [`spancat.v2.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v2.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SpanCatExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ | | `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` (NEW) | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ | | `label_definitions` (NEW) | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [`spancat.v2.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v2.jinja). ~~str~~ |
| `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ | | `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ | | `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ | | `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
@ -600,10 +622,13 @@ v1 NER task to support overlapping entities and store its annotations in
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SpanCatExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | Comma-separated list of labels. ~~str~~ | | `labels` | Comma-separated list of labels. ~~str~~ |
| `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ | | `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ | | `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ | | `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
@ -637,11 +662,14 @@ prompt.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` (NEW) | Dictionary of label definitions. Included in the prompt, if set. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `template` | Custom prompt template to send to LLM model. Defaults to [`textcat.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/textcat.v3.jinja). ~~str~~ | | `template` | Custom prompt template to send to LLM model. Defaults to [`textcat.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/textcat.v3.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `TextCatExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` (NEW) | Dictionary of label definitions. Included in the prompt, if set. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
| `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ | | `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ |
| `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ | | `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ |
@ -664,10 +692,13 @@ V2 includes all v1 functionality, with an improved prompt template.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [`textcat.v2.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/textcat.v2.jinja). ~~str~~ | | `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [`textcat.v2.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/textcat.v2.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `TextCatExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
| `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ | | `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ |
| `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ | | `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ |
@ -691,13 +722,16 @@ prompting.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `labels` | Comma-separated list of labels. ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Deafults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Deafults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `TextCatExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | Comma-separated list of labels. ~~str~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
| `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Deafults to `False`. ~~bool~~ | | `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ |
| `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Deafults to `True`. ~~bool~~ | | `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ |
| `verbose` | If set to `True`, warnings will be generated when the LLM returns invalid responses. Deafults to `False`. ~~bool~~ | | `verbose` | If set to `True`, warnings will be generated when the LLM returns invalid responses. Defaults to `False`. ~~bool~~ |
To perform [few-shot learning](/usage/large-language-models#few-shot-prompts), To perform [few-shot learning](/usage/large-language-models#few-shot-prompts),
you can write down a few examples in a separate file, and provide these to be you can write down a few examples in a separate file, and provide these to be
@ -723,6 +757,25 @@ supports `.yml`, `.yaml`, `.json` and `.jsonl`.
path = "textcat_examples.json" path = "textcat_examples.json"
``` ```
If you want to perform few-shot learning with a binary classifier (i. e. a text
either should or should not be assigned to a given class), you can provide
positive and negative examples with answers of "POS" or "NEG". "POS" means that
this example should be assigned the class label defined in the configuration,
"NEG" means it shouldn't. E. g. for spam classification:
```json
[
{
"text": "You won the lottery! Wire a fee of 200$ to be able to withdraw your winnings.",
"answer": "POS"
},
{
"text": "Your order #123456789 has arrived",
"answer": "NEG"
}
]
```
### REL {id="rel"} ### REL {id="rel"}
The REL task extracts relations between named entities. The REL task extracts relations between named entities.
@ -741,11 +794,14 @@ on an upstream NER component for entities extraction.
> ``` > ```
| Argument | Description | | Argument | Description |
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `template` | Custom prompt template to send to LLM model. Defaults to [`rel.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/rel.v1.jinja). ~~str~~ | | `template` | Custom prompt template to send to LLM model. Defaults to [`rel.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/rel.v1.jinja). ~~str~~ |
| `label_definitions` | Dictionary providing a description for each relation label. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[RELTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `RELExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
| `label_definitions` | Dictionary providing a description for each relation label. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ | | `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
| `verbose` | If set to `True`, warnings will be generated when the LLM returns invalid responses. Defaults to `False`. ~~bool~~ | | `verbose` | If set to `True`, warnings will be generated when the LLM returns invalid responses. Defaults to `False`. ~~bool~~ |
@ -794,9 +850,12 @@ This task supports both zero-shot and few-shot prompting.
> ``` > ```
| Argument | Description | | Argument | Description |
| ---------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `template` | Custom prompt template to send to LLM model. Defaults to [lemma.v1.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/lemma.v1.jinja). ~~str~~ | | `template` | Custom prompt template to send to LLM model. Defaults to [lemma.v1.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/lemma.v1.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[LemmaTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `LemmaExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
The task prompts the LLM to lemmatize the passed text and return the lemmatized The task prompts the LLM to lemmatize the passed text and return the lemmatized
version as a list of tokens and their corresponding lemma. E. g. the text version as a list of tokens and their corresponding lemma. E. g. the text
@ -871,9 +930,12 @@ This task supports both zero-shot and few-shot prompting.
> ``` > ```
| Argument | Description | | Argument | Description |
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------ | | --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `template` | Custom prompt template to send to LLM model. Defaults to [sentiment.v1.jinja](./spacy_llm/tasks/templates/sentiment.v1.jinja). ~~str~~ | | `template` | Custom prompt template to send to LLM model. Defaults to [sentiment.v1.jinja](./spacy_llm/tasks/templates/sentiment.v1.jinja). ~~str~~ |
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ | | `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SentimentTask]]~~ |
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SentimentExample`. ~~Optional[Type[FewshotExample]]~~ |
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
| `field` | Name of extension attribute to store summary in (i. e. the summary will be available in `doc._.{field}`). Defaults to `sentiment`. ~~str~~ | | `field` | Name of extension attribute to store summary in (i. e. the summary will be available in `doc._.{field}`). Defaults to `sentiment`. ~~str~~ |
To perform [few-shot learning](/usage/large-language-models#few-shot-prompts), To perform [few-shot learning](/usage/large-language-models#few-shot-prompts),
@ -953,11 +1015,11 @@ provider's API.
Currently, these models are provided as part of the core library: Currently, these models are provided as part of the core library:
| Model | Provider | Supported names | Default name | Default config | | Model | Provider | Supported names | Default name | Default config |
| ----------------------------- | --------- | ---------------------------------------------------------------------------------------- | ---------------------- | ------------------------------------ | | ----------------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------ | ---------------------- | ------------------------------------ |
| `spacy.GPT-4.v1` | OpenAI | `["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]` | `"gpt-4"` | `{}` | | `spacy.GPT-4.v1` | OpenAI | `["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]` | `"gpt-4"` | `{}` |
| `spacy.GPT-4.v2` | OpenAI | `["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]` | `"gpt-4"` | `{temperature=0.0}` | | `spacy.GPT-4.v2` | OpenAI | `["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]` | `"gpt-4"` | `{temperature=0.0}` |
| `spacy.GPT-3-5.v1` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k"]` | `"gpt-3.5-turbo"` | `{}` | | `spacy.GPT-3-5.v1` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k", "gpt-3.5-turbo-instruct"]` | `"gpt-3.5-turbo"` | `{}` |
| `spacy.GPT-3-5.v2` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k"]` | `"gpt-3.5-turbo"` | `{temperature=0.0}` | | `spacy.GPT-3-5.v2` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k", "gpt-3.5-turbo-instruct"]` | `"gpt-3.5-turbo"` | `{temperature=0.0}` |
| `spacy.Davinci.v1` | OpenAI | `["davinci"]` | `"davinci"` | `{}` | | `spacy.Davinci.v1` | OpenAI | `["davinci"]` | `"davinci"` | `{}` |
| `spacy.Davinci.v2` | OpenAI | `["davinci"]` | `"davinci"` | `{temperature=0.0, max_tokens=500}` | | `spacy.Davinci.v2` | OpenAI | `["davinci"]` | `"davinci"` | `{temperature=0.0, max_tokens=500}` |
| `spacy.Text-Davinci.v1` | OpenAI | `["text-davinci-003", "text-davinci-002"]` | `"text-davinci-003"` | `{}` | | `spacy.Text-Davinci.v1` | OpenAI | `["text-davinci-003", "text-davinci-002"]` | `"text-davinci-003"` | `{}` |
@ -976,6 +1038,7 @@ Currently, these models are provided as part of the core library:
| `spacy.Ada.v2` | OpenAI | `["ada"]` | `"ada"` | `{temperature=0.0, max_tokens=500}` | | `spacy.Ada.v2` | OpenAI | `["ada"]` | `"ada"` | `{temperature=0.0, max_tokens=500}` |
| `spacy.Text-Ada.v1` | OpenAI | `["text-ada-001"]` | `"text-ada-001"` | `{}` | | `spacy.Text-Ada.v1` | OpenAI | `["text-ada-001"]` | `"text-ada-001"` | `{}` |
| `spacy.Text-Ada.v2` | OpenAI | `["text-ada-001"]` | `"text-ada-001"` | `{temperature=0.0, max_tokens=500}` | | `spacy.Text-Ada.v2` | OpenAI | `["text-ada-001"]` | `"text-ada-001"` | `{temperature=0.0, max_tokens=500}` |
| `spacy.Azure.v1` | Microsoft, OpenAI | Arbitrary values | No default | `{temperature=0.0}` |
| `spacy.Command.v1` | Cohere | `["command", "command-light", "command-light-nightly", "command-nightly"]` | `"command"` | `{}` | | `spacy.Command.v1` | Cohere | `["command", "command-light", "command-light-nightly", "command-nightly"]` | `"command"` | `{}` |
| `spacy.Claude-2.v1` | Anthropic | `["claude-2", "claude-2-100k"]` | `"claude-2"` | `{}` | | `spacy.Claude-2.v1` | Anthropic | `["claude-2", "claude-2-100k"]` | `"claude-2"` | `{}` |
| `spacy.Claude-1.v1` | Anthropic | `["claude-1", "claude-1-100k"]` | `"claude-1"` | `{}` | | `spacy.Claude-1.v1` | Anthropic | `["claude-1", "claude-1-100k"]` | `"claude-1"` | `{}` |
@ -984,10 +1047,29 @@ Currently, these models are provided as part of the core library:
| `spacy.Claude-1-3.v1` | Anthropic | `["claude-1.3", "claude-1.3-100k"]` | `"claude-1.3"` | `{}` | | `spacy.Claude-1-3.v1` | Anthropic | `["claude-1.3", "claude-1.3-100k"]` | `"claude-1.3"` | `{}` |
| `spacy.Claude-instant-1.v1` | Anthropic | `["claude-instant-1", "claude-instant-1-100k"]` | `"claude-instant-1"` | `{}` | | `spacy.Claude-instant-1.v1` | Anthropic | `["claude-instant-1", "claude-instant-1-100k"]` | `"claude-instant-1"` | `{}` |
| `spacy.Claude-instant-1-1.v1` | Anthropic | `["claude-instant-1.1", "claude-instant-1.1-100k"]` | `"claude-instant-1.1"` | `{}` | | `spacy.Claude-instant-1-1.v1` | Anthropic | `["claude-instant-1.1", "claude-instant-1.1-100k"]` | `"claude-instant-1.1"` | `{}` |
| `spacy.PaLM.v1` | Google | `["chat-bison-001", "text-bison-001"]` | `"text-bison-001"` | `{temperature=0.0}` |
To use these models, make sure that you've [set the relevant API](#api-keys) To use these models, make sure that you've [set the relevant API](#api-keys)
keys as environment variables. keys as environment variables.
**⚠️ A note on `spacy.Azure.v1`.** Working with Azure OpenAI is slightly
different than working with models from other providers:
- In Azure LLMs have to be made available by creating a _deployment_ of a given
model (e. g. GPT-3.5). This deployment can have an arbitrary name. The `name`
argument, which everywhere else denotes the model name (e. g. `claude-1.0`,
`gpt-3.5`), here refers to the _deployment name_.
- Deployed Azure OpenAI models are reachable via a resource-specific base URL,
usually of the form `https://{resource}.openai.azure.com`. Hence the URL has
to be specified via the `base_url` argument.
- Azure further expects the _API version_ to be specified. The default value for
this, via the `api_version` argument, is currently `2023-05-15` but may be
updated in the future.
- Finally, since we can't infer information about the model from the deployment
name, `spacy-llm` requires the `model_type` to be set to either
`"completions"` or `"chat"`, depending on whether the deployed model is a
completion or chat model.
#### API Keys {id="api-keys"} #### API Keys {id="api-keys"}
Note that when using hosted services, you have to ensure that the proper API Note that when using hosted services, you have to ensure that the proper API
@ -1014,6 +1096,12 @@ For Anthropic:
export ANTHROPIC_API_KEY="..." export ANTHROPIC_API_KEY="..."
``` ```
For PaLM:
```shell
export PALM_API_KEY="..."
```
### Models via HuggingFace {id="models-hf"} ### Models via HuggingFace {id="models-hf"}
These models all take the same parameters: These models all take the same parameters:
@ -1037,11 +1125,27 @@ Currently, these models are provided as part of the core library:
| Model | Provider | Supported names | HF directory | | Model | Provider | Supported names | HF directory |
| -------------------- | --------------- | ------------------------------------------------------------------------------------------------------------ | -------------------------------------- | | -------------------- | --------------- | ------------------------------------------------------------------------------------------------------------ | -------------------------------------- |
| `spacy.Dolly.v1` | Databricks | `["dolly-v2-3b", "dolly-v2-7b", "dolly-v2-12b"]` | https://huggingface.co/databricks | | `spacy.Dolly.v1` | Databricks | `["dolly-v2-3b", "dolly-v2-7b", "dolly-v2-12b"]` | https://huggingface.co/databricks |
| `spacy.Llama2.v1` | Meta AI | `["Llama-2-7b-hf", "Llama-2-13b-hf", "Llama-2-70b-hf"]` | https://huggingface.co/meta-llama |
| `spacy.Falcon.v1` | TII | `["falcon-rw-1b", "falcon-7b", "falcon-7b-instruct", "falcon-40b-instruct"]` | https://huggingface.co/tiiuae | | `spacy.Falcon.v1` | TII | `["falcon-rw-1b", "falcon-7b", "falcon-7b-instruct", "falcon-40b-instruct"]` | https://huggingface.co/tiiuae |
| `spacy.Llama2.v1` | Meta AI | `["Llama-2-7b-hf", "Llama-2-13b-hf", "Llama-2-70b-hf"]` | https://huggingface.co/meta-llama |
| `spacy.Mistral.v1` | Mistral AI | `["Mistral-7B-v0.1", "Mistral-7B-Instruct-v0.1"]` | https://huggingface.co/mistralai |
| `spacy.StableLM.v1` | Stability AI | `["stablelm-base-alpha-3b", "stablelm-base-alpha-7b", "stablelm-tuned-alpha-3b", "stablelm-tuned-alpha-7b"]` | https://huggingface.co/stabilityai | | `spacy.StableLM.v1` | Stability AI | `["stablelm-base-alpha-3b", "stablelm-base-alpha-7b", "stablelm-tuned-alpha-3b", "stablelm-tuned-alpha-7b"]` | https://huggingface.co/stabilityai |
| `spacy.OpenLLaMA.v1` | OpenLM Research | `["open_llama_3b", "open_llama_7b", "open_llama_7b_v2", "open_llama_13b"]` | https://huggingface.co/openlm-research | | `spacy.OpenLLaMA.v1` | OpenLM Research | `["open_llama_3b", "open_llama_7b", "open_llama_7b_v2", "open_llama_13b"]` | https://huggingface.co/openlm-research |
<Infobox variant="warning" title="Gated models on Hugging Face" id="hf_licensing">
Some models available on Hugging Face (HF), such as Llama 2, are _gated models_.
That means that users have to fulfill certain requirements to be allowed access
to these models. In the case of Llama 2 you'll need to request agree to Meta's
Terms of Service while logged in with your HF account. After Meta grants you
permission to use Llama 2, you'll be able to download and use the model.
This requires that you are logged in with your HF account on your local
machine - check out the HF quick start documentation. In a nutshell, you'll need
to create an access token on HF and log in to HF using your access token, e. g.
with `huggingface-cli login`.
</Infobox>
Note that Hugging Face will download the model the first time you use it - you Note that Hugging Face will download the model the first time you use it - you
can can
[define the cached directory](https://huggingface.co/docs/huggingface_hub/main/en/guides/manage-cache) [define the cached directory](https://huggingface.co/docs/huggingface_hub/main/en/guides/manage-cache)

View File

@ -89,6 +89,21 @@ architectures and their arguments and hyperparameters.
| `negative_weight` <Tag variant="new">3.5.1</Tag> | Multiplier for the loss terms. It can be used to downweight the negative samples if there are too many. It is only used when `add_negative_label` is `True`. Defaults to `1.0`. ~~float~~ | | `negative_weight` <Tag variant="new">3.5.1</Tag> | Multiplier for the loss terms. It can be used to downweight the negative samples if there are too many. It is only used when `add_negative_label` is `True`. Defaults to `1.0`. ~~float~~ |
| `allow_overlap` <Tag variant="new">3.5.1</Tag> | If `True`, the data is assumed to contain overlapping spans. It is only available when `max_positive` is exactly 1. Defaults to `True`. ~~bool~~ | | `allow_overlap` <Tag variant="new">3.5.1</Tag> | If `True`, the data is assumed to contain overlapping spans. It is only available when `max_positive` is exactly 1. Defaults to `True`. ~~bool~~ |
<Infobox variant="warning">
If you set a non-default value for `spans_key`, you'll have to update
`[training.score_weights]` as well so that weights are computed properly. E. g.
for `spans_key == "myspankey"`, include this in your config:
```ini
[training.score_weights]
spans_myspankey_f = 1.0
spans_myspankey_p = 0.0
spans_myspankey_r = 0.0
```
</Infobox>
```python ```python
%%GITHUB_SPACY/spacy/pipeline/spancat.py %%GITHUB_SPACY/spacy/pipeline/spancat.py
``` ```

View File

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -31,8 +31,6 @@ for ent in doc.ents:
Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what
our example sentence and its named entities look like: our example sentence and its named entities look like:
<Iframe <Standalone height={120}>
title="displaCy visualization of entities" <div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}><mark style={{ background: '#7aecec', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Apple <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> is looking at buying <mark style={{ background: '#feca74', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>U.K. <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>GPE</span></mark> startup for <mark style={{ background: '#e4e7d2', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>$1 billion <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>MONEY</span></mark></div>
src="/images/displacy-ent1.html" </Standalone>
height={100}
/>

View File

@ -56,8 +56,7 @@ for token in doc:
Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what
our example sentence and its dependencies look like: our example sentence and its dependencies look like:
<Iframe <ImageScrollable
title="displaCy visualization of dependencies and entities" src="/images/displacy-long.svg"
src="/images/displacy-long.html" width={1975}
height={450}
/> />

View File

@ -170,8 +170,8 @@ to be `"databricks/dolly-v2-12b"` for better performance.
### Example 3: Create the component directly in Python {id="example-3"} ### Example 3: Create the component directly in Python {id="example-3"}
The `llm` component behaves as any other component does, and there are The `llm` component behaves as any other component does, and there are
[task-specific components](/api/large-language-models#config) defined to [task-specific components](/api/large-language-models#config) defined to help
help you hit the ground running with a reasonable built-in task implementation. you hit the ground running with a reasonable built-in task implementation.
```python ```python
import spacy import spacy
@ -436,7 +436,7 @@ respectively. Alternatively you can use LangChain to access hosted or local
models by specifying one of the models registered with the `langchain.` prefix. models by specifying one of the models registered with the `langchain.` prefix.
<Infobox> <Infobox>
_Why LangChain if there are also are a native REST and a HuggingFace interface? When should I use what?_ _Why LangChain if there are also are native REST and HuggingFace interfaces? When should I use what?_
Third-party libraries like `langchain` focus on prompt management, integration Third-party libraries like `langchain` focus on prompt management, integration
of many different LLM APIs, and other related features such as conversational of many different LLM APIs, and other related features such as conversational
@ -476,6 +476,7 @@ provider's documentation.
| [`spacy.Curie.v2`](/api/large-language-models#models-rest) | OpenAIs `curie` model family. | | [`spacy.Curie.v2`](/api/large-language-models#models-rest) | OpenAIs `curie` model family. |
| [`spacy.Babbage.v2`](/api/large-language-models#models-rest) | OpenAIs `babbage` model family. | | [`spacy.Babbage.v2`](/api/large-language-models#models-rest) | OpenAIs `babbage` model family. |
| [`spacy.Ada.v2`](/api/large-language-models#models-rest) | OpenAIs `ada` model family. | | [`spacy.Ada.v2`](/api/large-language-models#models-rest) | OpenAIs `ada` model family. |
| [`spacy.Azure.v1`](/api/large-language-models#models-rest) | Azure's OpenAI models. |
| [`spacy.Command.v1`](/api/large-language-models#models-rest) | Coheres `command` model family. | | [`spacy.Command.v1`](/api/large-language-models#models-rest) | Coheres `command` model family. |
| [`spacy.Claude-2.v1`](/api/large-language-models#models-rest) | Anthropics `claude-2` model family. | | [`spacy.Claude-2.v1`](/api/large-language-models#models-rest) | Anthropics `claude-2` model family. |
| [`spacy.Claude-1.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1` model family. | | [`spacy.Claude-1.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1` model family. |
@ -484,8 +485,10 @@ provider's documentation.
| [`spacy.Claude-1-0.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1.0` model family. | | [`spacy.Claude-1-0.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1.0` model family. |
| [`spacy.Claude-1-2.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1.2` model family. | | [`spacy.Claude-1-2.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1.2` model family. |
| [`spacy.Claude-1-3.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1.3` model family. | | [`spacy.Claude-1-3.v1`](/api/large-language-models#models-rest) | Anthropics `claude-1.3` model family. |
| [`spacy.PaLM.v1`](/api/large-language-models#models-rest) | Googles `PaLM` model family. |
| [`spacy.Dolly.v1`](/api/large-language-models#models-hf) | Dolly models through HuggingFace. | | [`spacy.Dolly.v1`](/api/large-language-models#models-hf) | Dolly models through HuggingFace. |
| [`spacy.Falcon.v1`](/api/large-language-models#models-hf) | Falcon models through HuggingFace. | | [`spacy.Falcon.v1`](/api/large-language-models#models-hf) | Falcon models through HuggingFace. |
| [`spacy.Mistral.v1`](/api/large-language-models#models-hf) | Mistral models through HuggingFace. |
| [`spacy.Llama2.v1`](/api/large-language-models#models-hf) | Llama2 models through HuggingFace. | | [`spacy.Llama2.v1`](/api/large-language-models#models-hf) | Llama2 models through HuggingFace. |
| [`spacy.StableLM.v1`](/api/large-language-models#models-hf) | StableLM models through HuggingFace. | | [`spacy.StableLM.v1`](/api/large-language-models#models-hf) | StableLM models through HuggingFace. |
| [`spacy.OpenLLaMA.v1`](/api/large-language-models#models-hf) | OpenLLaMA models through HuggingFace. | | [`spacy.OpenLLaMA.v1`](/api/large-language-models#models-hf) | OpenLLaMA models through HuggingFace. |

View File

@ -290,11 +290,7 @@ for token in doc:
| toward | `prep` | shift | `NOUN` | manufacturers | | toward | `prep` | shift | `NOUN` | manufacturers |
| manufacturers | `pobj` | toward | `ADP` | | | manufacturers | `pobj` | toward | `ADP` | |
<Iframe <ImageScrollable src="/images/displacy-long2.svg" width={1275} />
title="displaCy visualization of dependencies and entities 2"
src="/images/displacy-long2.html"
height={450}
/>
Because the syntactic relations form a tree, every word has **exactly one Because the syntactic relations form a tree, every word has **exactly one
head**. You can therefore iterate over the arcs in the tree by iterating over head**. You can therefore iterate over the arcs in the tree by iterating over
@ -709,11 +705,9 @@ doc = nlp(text)
displacy.serve(doc, style="ent") displacy.serve(doc, style="ent")
``` ```
<Iframe <Standalone height={180}>
title="displaCy visualizer for entities" <div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>When <mark style={{ background: '#aa9cfc', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Sebastian Thrun <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>PERSON</span></mark> started working on self-driving cars at <mark style={{ background: '#7aecec', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Google <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> in <mark style={{ background: '#bfe1d9', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>2007 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>DATE</span></mark>, few people outside of the company took him seriously.</div>
src="/images/displacy-ent2.html" </Standalone>
height={180}
/>
## Entity Linking {id="entity-linking"} ## Entity Linking {id="entity-linking"}
@ -723,6 +717,10 @@ identifier from a knowledge base (KB). You can create your own
[`KnowledgeBase`](/api/kb) and [train](/usage/training) a new [`KnowledgeBase`](/api/kb) and [train](/usage/training) a new
[`EntityLinker`](/api/entitylinker) using that custom knowledge base. [`EntityLinker`](/api/entitylinker) using that custom knowledge base.
As an example on how to define a KnowledgeBase and train an entity linker model,
see [`this tutorial`](https://github.com/explosion/projects/blob/v3/tutorials/nel_emerson)
using [spaCy projects](/usage/projects).
### Accessing entity identifiers {id="entity-linking-accessing",model="entity linking"} ### Accessing entity identifiers {id="entity-linking-accessing",model="entity linking"}
The annotated KB identifier is accessible as either a hash value or as a string, The annotated KB identifier is accessible as either a hash value or as a string,
@ -733,6 +731,7 @@ object, or the `ent_kb_id` and `ent_kb_id_` attributes of a
```python ```python
import spacy import spacy
# "my_custom_el_pipeline" is assumed to be a custom NLP pipeline that was trained and serialized to disk
nlp = spacy.load("my_custom_el_pipeline") nlp = spacy.load("my_custom_el_pipeline")
doc = nlp("Ada Lovelace was born in London") doc = nlp("Ada Lovelace was born in London")

View File

@ -656,9 +656,9 @@ locally.
You can list one or more remotes in the `remotes` section of your You can list one or more remotes in the `remotes` section of your
[`project.yml`](#project-yml) by mapping a string name to the URL of the [`project.yml`](#project-yml) by mapping a string name to the URL of the
storage. Under the hood, spaCy uses storage. Under the hood, spaCy uses
[`Pathy`](https://github.com/justindujardin/pathy) to communicate with the [`cloudpathlib`](https://cloudpathlib.drivendata.org) to communicate with the
remote storages, so you can use any protocol that `Pathy` supports, including remote storages, so you can use any protocol that `cloudpathlib` supports,
[S3](https://aws.amazon.com/s3/), including [S3](https://aws.amazon.com/s3/),
[Google Cloud Storage](https://cloud.google.com/storage), and the local [Google Cloud Storage](https://cloud.google.com/storage), and the local
filesystem, although you may need to install extra dependencies to use certain filesystem, although you may need to install extra dependencies to use certain
protocols. protocols.

View File

@ -1144,10 +1144,9 @@ relations and tokens we want to match:
> displacy.serve(doc) > displacy.serve(doc)
> ``` > ```
<Iframe <ImageScrollable
title="displaCy visualization of dependencies" src="/images/displacy-dep-founded.svg"
src="/images/displacy-dep-founded.html" width={925}
height={450}
/> />
The relations we're interested in are: The relations we're interested in are:

View File

@ -586,11 +586,9 @@ After installing the package, the custom colors will be used when visualizing
text with `displacy`. Whenever the label `SNEK` is assigned, it will be text with `displacy`. Whenever the label `SNEK` is assigned, it will be
displayed in `#3dff74`. displayed in `#3dff74`.
<Iframe <Standalone height={100}>
title="displaCy visualization of entities" <div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>🌱🌿 <mark style={{ background: '#3dff74', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>🐍 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>SNEK</span></mark> ____ 🌳🌲 ____ <mark style={{ background: '#cfc5ff', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>👨‍🌾 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>HUMAN</span></mark> 🏘️</div>
src="/images/displacy-ent-snek.html" </Standalone>
height={100}
/>
## Saving, loading and distributing trained pipelines {id="models"} ## Saving, loading and distributing trained pipelines {id="models"}

View File

@ -77,11 +77,9 @@ doc.spans["custom"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
displacy.serve(doc, style="span", options={"spans_key": "custom"}) displacy.serve(doc, style="span", options={"spans_key": "custom"})
``` ```
<Iframe <Standalone height={100}>
title="displaCy visualizer for overlapping spans" <div style={{ lineHeight: 2.5, direction: 'ltr', fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18 }}>Welcome to the <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>Bank<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#7aecec', top: 40, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#7aecec', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>ORG</span></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>of <span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>China<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#feca74', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>GPE</span></span></span>.</div>
src="/images/displacy-span.html" </Standalone>
height={180}
/>
## Additional features and improvements ## Additional features and improvements

140
website/docs/usage/v3-7.mdx Normal file
View File

@ -0,0 +1,140 @@
---
title: What's New in v3.7
teaser: New features and how to upgrade
menu:
- ['New Features', 'features']
- ['Upgrading Notes', 'upgrading']
---
## New features {id="features",hidden="true"}
spaCy v3.7 adds support for Python 3.12, introduces the new standalone library
[Weasel](https://github.com/explosion/weasel) for project workflows, and updates
the transformer-based trained pipelines to use our new
[Curated Transformers](https://github.com/explosion/curated-transformers)
library.
This release drops support for Python 3.6.
### Weasel {id="weasel"}
The [spaCy projects](/usage/projects) functionality has been moved into a new
standalone library [Weasel](https://github.com/explosion/weasel). This brings
minor changes to spaCy-specific settings in spaCy projects (see
[upgrading](#upgrading) below), but also makes it possible to use the same
workflow functionality outside of spaCy.
All `spacy project` commands should run as before, just now they're using Weasel
under the hood.
<Infobox title="Remote storage for Python 3.12" variant="warning">
Remote storage for spaCy projects is not yet supported for Python 3.12. Use
Python 3.11 or earlier for remote storage.
</Infobox>
### Registered vectors {id="custom-vectors"}
You can specify a custom registered vectors class under `[nlp.vectors]` in order
to use static vectors in formats other than the ones supported by
[`Vectors`](/api/vectors). To implement your custom vectors, extend the abstract
class [`BaseVectors`](/api/basevectors). See an example using
[BPEmb subword embeddings](/usage/embeddings-transformers#custom-vectors).
### Additional features and improvements {id="additional-features-and-improvements"}
- Add support for Python 3.12.
- Extend to Thinc v8.2.
- Extend `transformers` extra to `spacy-transformers` v1.3.
- Add `--spans-key` option for CLI evaluation with `spacy benchmark accuracy`.
- Load the CLI module lazily for `spacy.info`.
- Add type stubs for for `spacy.training.example`.
- Warn for unsupported pattern keys in dependency matcher.
- `Language.replace_listeners`: Pass the replaced listener and the `tok2vec`
pipe to the callback in order to support `spacy-curated-transformers`.
- Always use `tqdm` with `disable=None` in order to disable output in
non-interactive environments.
- Language updates:
- Add left and right pointing angle brackets as punctuation to ancient Greek.
- Update example sentences for Turkish.
- Package setup updates:
- Update NumPy build constraints for NumPy 1.25+. For Python 3.9+, it is no
longer necessary to set build constraints while building binary wheels.
- Refactor Cython profiling in order to disable profiling for Python 3.12 in
the package setup, since Cython does not currently support profiling for
Python 3.12.
## Trained pipelines {id="pipelines"}
### Pipeline updates {id="pipeline-updates"}
The transformer-based `trf` pipelines have been updated to use our new
[Curated Transformers](https://github.com/explosion/curated-transformers)
library using the Thinc model wrappers and pipeline component from
[spaCy Curated Transformers](https://github.com/explosion/spacy-curated-transformers).
## Notes about upgrading from v3.6 {id="upgrading"}
This release drops support for Python 3.6, drops mypy checks for Python 3.7 and
removes the `ray` extra. In addition there are several minor changes for spaCy
projects described in the following section.
### Backwards incompatibilities for spaCy Projects {id="upgrading-projects"}
`spacy project` has a few backwards incompatibilities due to the transition to
the standalone library [Weasel](https://github.com/explosion/weasel), which is
not as tightly coupled to spaCy. Weasel produces warnings when it detects older
spaCy-specific settings in your environment or project config.
- Support for the `spacy_version` configuration key has been dropped.
- Support for the `check_requirements` configuration key has been dropped due to
the deprecation of `pkg_resources`.
- The `SPACY_CONFIG_OVERRIDES` environment variable is no longer checked. You
can set configuration overrides using `WEASEL_CONFIG_OVERRIDES`.
- Support for `SPACY_PROJECT_USE_GIT_VERSION` environment variable has been
dropped.
- Error codes are now Weasel-specific and do not follow spaCy error codes.
### Pipeline package version compatibility {id="version-compat"}
> #### Using legacy implementations
>
> In spaCy v3, you'll still be able to load and reference legacy implementations
> via [`spacy-legacy`](https://github.com/explosion/spacy-legacy), even if the
> components or architectures change and newer versions are available in the
> core library.
When you're loading a pipeline package trained with an earlier version of spaCy
v3, you will see a warning telling you that the pipeline may be incompatible.
This doesn't necessarily have to be true, but we recommend running your
pipelines against your test suite or evaluation data to make sure there are no
unexpected results.
If you're using one of the [trained pipelines](/models) we provide, you should
run [`spacy download`](/api/cli#download) to update to the latest version. To
see an overview of all installed packages and their compatibility, you can run
[`spacy validate`](/api/cli#validate).
If you've trained your own custom pipeline and you've confirmed that it's still
working as expected, you can update the spaCy version requirements in the
[`meta.json`](/api/data-formats#meta):
```diff
- "spacy_version": ">=3.6.0,<3.7.0",
+ "spacy_version": ">=3.6.0,<3.8.0",
```
### Updating v3.6 configs
To update a config from spaCy v3.6 with the new v3.7 settings, run
[`init fill-config`](/api/cli#init-fill-config):
```cli
$ python -m spacy init fill-config config-v3.6.cfg config-v3.7.cfg
```
In many cases ([`spacy train`](/api/cli#train),
[`spacy.load`](/api/top-level#spacy.load)), the new defaults will be filled in
automatically, but you'll need to fill in the new settings to run
[`debug config`](/api/cli#debug) and [`debug data`](/api/cli#debug-data).

View File

@ -119,11 +119,9 @@ doc = nlp(text)
displacy.serve(doc, style="ent") displacy.serve(doc, style="ent")
``` ```
<Iframe <Standalone height={180}>
title="displaCy visualizer for entities" <div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>When <mark style={{ background: '#aa9cfc', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Sebastian Thrun <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>PERSON</span></mark> started working on self-driving cars at <mark style={{ background: '#7aecec', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Google <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> in <mark style={{ background: '#bfe1d9', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>2007 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>DATE</span></mark>, few people outside of the company took him seriously.</div>
src="/images/displacy-ent2.html" </Standalone>
height={180}
/>
The entity visualizer lets you customize the following `options`: The entity visualizer lets you customize the following `options`:
@ -148,11 +146,9 @@ use the `colors` setting to add your own colors for them.
> displacy.serve(doc, style="ent", options=options) > displacy.serve(doc, style="ent", options=options)
> ``` > ```
<Iframe <Standalone height={225}>
title="displaCy visualizer for entities (custom styling)" <div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>But <mark style={{ background: 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Google <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> is starting from behind. The company made a late push into hardware, and <mark style={{ background: 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Apple <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark>s Siri, available on iPhones, and <mark style={{ background: 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Amazon <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark>s Alexa software, which runs on its Echo and Dot devices, have clear leads in consumer adoption.</div>
src="/images/displacy-ent-custom.html" </Standalone>
height={225}
/>
The above example uses a little trick: Since the background color values are The above example uses a little trick: Since the background color values are
added as the `background` style attribute, you can use any added as the `background` style attribute, you can use any
@ -197,11 +193,9 @@ doc.spans["sc"] = [
displacy.serve(doc, style="span") displacy.serve(doc, style="span")
``` ```
<Iframe <Standalone height={100}>
title="displaCy visualizer for overlapping spans" <div style={{ lineHeight: 2.5, direction: 'ltr', fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18 }}>Welcome to the <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>Bank<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#7aecec', top: 40, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#7aecec', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>ORG</span></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>of <span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>China<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#feca74', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>GPE</span></span></span>.</div>
src="/images/displacy-span.html" </Standalone>
height={180}
/>
The span visualizer lets you customize the following `options`: The span visualizer lets you customize the following `options`:
@ -223,11 +217,9 @@ specify which one displaCy should use with `spans_key` (`sc` is the default).
> displacy.serve(doc, style="span", options=options) > displacy.serve(doc, style="span", options=options)
> ``` > ```
<Iframe <Standalone height={100}>
title="displaCy visualizer for spans (custom spans_key)" <div style={{ lineHeight: 2.5, direction: 'ltr', fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18 }}>Welcome to the <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>Bank<span style={{ background: '#ddd', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#ddd', top: 40, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#ddd', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>BANK</span></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>of <span style={{ background: '#ddd', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>China<span style={{ background: '#ddd', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span>.</div>
src="/images/displacy-span-custom.html" </Standalone>
height={225}
/>
## Using displaCy in Jupyter notebooks {id="jupyter"} ## Using displaCy in Jupyter notebooks {id="jupyter"}

View File

@ -9,13 +9,9 @@
{ "text": "Models & Languages", "url": "/usage/models" }, { "text": "Models & Languages", "url": "/usage/models" },
{ "text": "Facts & Figures", "url": "/usage/facts-figures" }, { "text": "Facts & Figures", "url": "/usage/facts-figures" },
{ "text": "spaCy 101", "url": "/usage/spacy-101" }, { "text": "spaCy 101", "url": "/usage/spacy-101" },
{ "text": "New in v3.0", "url": "/usage/v3" }, { "text": "New in v3.7", "url": "/usage/v3-7" },
{ "text": "New in v3.1", "url": "/usage/v3-1" }, { "text": "New in v3.6", "url": "/usage/v3-6" },
{ "text": "New in v3.2", "url": "/usage/v3-2" }, { "text": "New in v3.5", "url": "/usage/v3-5" }
{ "text": "New in v3.3", "url": "/usage/v3-3" },
{ "text": "New in v3.4", "url": "/usage/v3-4" },
{ "text": "New in v3.5", "url": "/usage/v3-5" },
{ "text": "New in v3.6", "url": "/usage/v3-6" }
] ]
}, },
{ {

View File

@ -27,7 +27,7 @@
"indexName": "spacy" "indexName": "spacy"
}, },
"binderUrl": "explosion/spacy-io-binder", "binderUrl": "explosion/spacy-io-binder",
"binderVersion": "3.6", "binderVersion": "3.7",
"sections": [ "sections": [
{ "id": "usage", "title": "Usage Documentation", "theme": "blue" }, { "id": "usage", "title": "Usage Documentation", "theme": "blue" },
{ "id": "models", "title": "Models Documentation", "theme": "blue" }, { "id": "models", "title": "Models Documentation", "theme": "blue" },

View File

Before

Width:  |  Height:  |  Size: 5.1 KiB

After

Width:  |  Height:  |  Size: 5.1 KiB

View File

@ -1,80 +0,0 @@
<div
class="entities"
style="
line-height: 2.5;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 18px;
"
>But
<mark
class="entity"
style="
background: linear-gradient(90deg, #aa9cfc, #fc9ce7);
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>Google
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>ORG</span
></mark
>is starting from behind. The company made a late push into hardware, and
<mark
class="entity"
style="
background: linear-gradient(90deg, #aa9cfc, #fc9ce7);
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>Apple
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>ORG</span
></mark
>s Siri, available on iPhones, and
<mark
class="entity"
style="
background: linear-gradient(90deg, #aa9cfc, #fc9ce7);
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>Amazon
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>ORG</span
></mark
>s Alexa software, which runs on its Echo and Dot devices, have clear leads in consumer
adoption.</div
>

View File

@ -1,59 +0,0 @@
<div
class="entities"
style="
line-height: 2.5;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 16px;
"
>
🌱🌿
<mark
class="entity"
style="
background: #3dff74;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>🐍
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>SNEK</span
></mark
>
____ 🌳🌲 ____
<mark
class="entity"
style="
background: #cfc5ff;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>👨‍🌾
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>HUMAN</span
></mark
>
🏘️
</div>

View File

@ -1,84 +0,0 @@
<div
class="entities"
style="
line-height: 2.5;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 16px;
"
>
<mark
class="entity"
style="
background: #7aecec;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>
Apple
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>ORG</span
>
</mark>
is looking at buying
<mark
class="entity"
style="
background: #feca74;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>
U.K.
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>GPE</span
>
</mark>
startup for
<mark
class="entity"
style="
background: #e4e7d2;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>
$1 billion
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>MONEY</span
>
</mark>
</div>

View File

@ -1,86 +0,0 @@
<div
class="entities"
style="
line-height: 2.5;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 18px;
"
>
When
<mark
class="entity"
style="
background: #aa9cfc;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>
Sebastian Thrun
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>PERSON</span
>
</mark>
started working on self-driving cars at
<mark
class="entity"
style="
background: #7aecec;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>
Google
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>ORG</span
>
</mark>
in
<mark
class="entity"
style="
background: #bfe1d9;
padding: 0.45em 0.6em;
margin: 0 0.25em;
line-height: 1;
border-radius: 0.35em;
"
>
2007
<span
style="
font-size: 0.8em;
font-weight: bold;
line-height: 1;
border-radius: 0.35em;
text-transform: uppercase;
vertical-align: middle;
margin-left: 0.5rem;
"
>DATE</span
>
</mark>
, few people outside of the company took him seriously.
</div>

View File

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -0,0 +1,212 @@
<svg
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
id="0"
class="displacy"
width="1275"
height="399.5"
style="
max-width: none;
height: 399.5px;
color: #000000;
background: #ffffff;
font-family: Arial;
"
>
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
<tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
</text>
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
<tspan class="displacy-word" fill="currentColor" x="225">cars</tspan>
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="225">NOUN</tspan>
</text>
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
<tspan class="displacy-word" fill="currentColor" x="400">shift</tspan>
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="400">VERB</tspan>
</text>
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
<tspan class="displacy-word" fill="currentColor" x="575">insurance</tspan>
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="575">NOUN</tspan>
</text>
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
<tspan class="displacy-word" fill="currentColor" x="750">liability</tspan>
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="750">NOUN</tspan>
</text>
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
<tspan class="displacy-word" fill="currentColor" x="925">toward</tspan>
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="925">ADP</tspan>
</text>
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
<tspan class="displacy-word" fill="currentColor" x="1100">manufacturers</tspan>
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="1100">NOUN</tspan>
</text>
<g class="displacy-arrow">
<path
class="displacy-arc"
id="arrow-0-0"
stroke-width="2px"
d="M70,264.5 C70,177.0 215.0,177.0 215.0,264.5"
fill="none"
stroke="currentColor"
></path>
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
<textpath
xlink:href="#arrow-0-0"
class="displacy-label"
startOffset="50%"
fill="currentColor"
text-anchor="middle"
>
amod
</textpath>
</text>
<path
class="displacy-arrowhead"
d="M70,266.5 L62,254.5 78,254.5"
fill="currentColor"
></path>
</g>
<g class="displacy-arrow">
<path
class="displacy-arc"
id="arrow-0-1"
stroke-width="2px"
d="M245,264.5 C245,177.0 390.0,177.0 390.0,264.5"
fill="none"
stroke="currentColor"
></path>
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
<textpath
xlink:href="#arrow-0-1"
class="displacy-label"
startOffset="50%"
fill="currentColor"
text-anchor="middle"
>
nsubj
</textpath>
</text>
<path
class="displacy-arrowhead"
d="M245,266.5 L237,254.5 253,254.5"
fill="currentColor"
></path>
</g>
<g class="displacy-arrow">
<path
class="displacy-arc"
id="arrow-0-2"
stroke-width="2px"
d="M595,264.5 C595,177.0 740.0,177.0 740.0,264.5"
fill="none"
stroke="currentColor"
></path>
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
<textpath
xlink:href="#arrow-0-2"
class="displacy-label"
startOffset="50%"
fill="currentColor"
text-anchor="middle"
>
compound
</textpath>
</text>
<path
class="displacy-arrowhead"
d="M595,266.5 L587,254.5 603,254.5"
fill="currentColor"
></path>
</g>
<g class="displacy-arrow">
<path
class="displacy-arc"
id="arrow-0-3"
stroke-width="2px"
d="M420,264.5 C420,89.5 745.0,89.5 745.0,264.5"
fill="none"
stroke="currentColor"
></path>
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
<textpath
xlink:href="#arrow-0-3"
class="displacy-label"
startOffset="50%"
fill="currentColor"
text-anchor="middle"
>
dobj
</textpath>
</text>
<path
class="displacy-arrowhead"
d="M745.0,266.5 L753.0,254.5 737.0,254.5"
fill="currentColor"
></path>
</g>
<g class="displacy-arrow">
<path
class="displacy-arc"
id="arrow-0-4"
stroke-width="2px"
d="M420,264.5 C420,2.0 925.0,2.0 925.0,264.5"
fill="none"
stroke="currentColor"
></path>
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
<textpath
xlink:href="#arrow-0-4"
class="displacy-label"
startOffset="50%"
fill="currentColor"
text-anchor="middle"
>
prep
</textpath>
</text>
<path
class="displacy-arrowhead"
d="M925.0,266.5 L933.0,254.5 917.0,254.5"
fill="currentColor"
></path>
</g>
<g class="displacy-arrow">
<path
class="displacy-arc"
id="arrow-0-5"
stroke-width="2px"
d="M945,264.5 C945,177.0 1090.0,177.0 1090.0,264.5"
fill="none"
stroke="currentColor"
></path>
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
<textpath
xlink:href="#arrow-0-5"
class="displacy-label"
startOffset="50%"
fill="currentColor"
text-anchor="middle"
>
pobj
</textpath>
</text>
<path
class="displacy-arrowhead"
d="M1090.0,266.5 L1098.0,254.5 1082.0,254.5"
fill="currentColor"
></path>
</g>
</svg>

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -1,84 +0,0 @@
<div
class="spans"
style="
line-height: 2.5;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 18px;
direction: ltr;
"
>
Welcome to the
<span style="font-weight: bold; display: inline-block; position: relative">
Bank
<span
style="
background: #ddd;
top: 40px;
height: 4px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
</span>
<span
style="
background: #ddd;
top: 40px;
height: 4px;
border-top-left-radius: 3px;
border-bottom-left-radius: 3px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
<span
style="
background: #ddd;
color: #000;
top: -0.5em;
padding: 2px 3px;
position: absolute;
font-size: 0.6em;
font-weight: bold;
line-height: 1;
border-radius: 3px;
"
>
BANK
</span>
</span>
</span>
<span style="font-weight: bold; display: inline-block; position: relative">
of
<span
style="
background: #ddd;
top: 40px;
height: 4px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
</span>
</span>
<span style="font-weight: bold; display: inline-block; position: relative">
China
<span
style="
background: #ddd;
top: 40px;
height: 4px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
</span>
</span>
.
</div>

View File

@ -1,123 +0,0 @@
<div
class="spans"
style="
line-height: 2.5;
direction: ltr;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 18px;
"
>
Welcome to the
<span style="font-weight: bold; display: inline-block; position: relative">
Bank
<span
style="
background: #7aecec;
top: 40px;
height: 4px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
</span>
<span
style="
background: #7aecec;
top: 40px;
height: 4px;
border-top-left-radius: 3px;
border-bottom-left-radius: 3px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
<span
style="
background: #7aecec;
color: #000;
top: -0.5em;
padding: 2px 3px;
position: absolute;
font-size: 0.6em;
font-weight: bold;
line-height: 1;
border-radius: 3px;
"
>
ORG
</span>
</span>
</span>
<span style="font-weight: bold; display: inline-block; position: relative">
of
<span
style="
background: #7aecec;
top: 40px;
height: 4px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
</span>
</span>
<span style="font-weight: bold; display: inline-block; position: relative">
China
<span
style="
background: #7aecec;
top: 40px;
height: 4px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
</span>
<span
style="
background: #feca74;
top: 57px;
height: 4px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
</span>
<span
style="
background: #feca74;
top: 57px;
height: 4px;
border-top-left-radius: 3px;
border-bottom-left-radius: 3px;
left: -1px;
width: calc(100% + 2px);
position: absolute;
"
>
<span
style="
background: #feca74;
color: #000;
top: -0.5em;
padding: 2px 3px;
position: absolute;
font-size: 0.6em;
font-weight: bold;
line-height: 1;
border-radius: 3px;
"
>
GPE
</span>
</span>
</span>
.
</div>

View File

@ -107,6 +107,22 @@ const Image = ({ src, alt, title, href, ...props }) => {
) )
} }
const ImageScrollable = ({ src, alt, width, ...props }) => {
return (
<figure className={classNames(classes.standalone, classes.scrollable)}>
<img className={classes['image-scrollable']} src={src} alt={alt} width={width} height="auto" />
</figure>
)
}
const Standalone = ({ height, children, ...props }) => {
return (
<figure className={classes.standalone} style={{ height }}>
{children}
</figure>
)
}
const ImageFill = ({ image, ...props }) => { const ImageFill = ({ image, ...props }) => {
return ( return (
<span <span
@ -137,4 +153,4 @@ const GoogleSheet = ({ id, link, height, button = 'View full table' }) => {
) )
} }
export { YouTube, SoundCloud, Iframe, Image, ImageFill, GoogleSheet } export { YouTube, SoundCloud, Iframe, Image, ImageFill, ImageScrollable, GoogleSheet, Standalone }

View File

@ -13,7 +13,7 @@ import Aside from './components/aside'
import Button from './components/button' import Button from './components/button'
import Tag from './components/tag' import Tag from './components/tag'
import Grid from './components/grid' import Grid from './components/grid'
import { YouTube, SoundCloud, Iframe, Image, GoogleSheet } from './components/embed' import { YouTube, SoundCloud, Iframe, Image, ImageScrollable, GoogleSheet, Standalone } from './components/embed'
import Project from './widgets/project' import Project from './widgets/project'
import { Integration, IntegrationLogo } from './widgets/integration.js' import { Integration, IntegrationLogo } from './widgets/integration.js'
import { Logos, Colors, Patterns } from './widgets/styleguide' import { Logos, Colors, Patterns } from './widgets/styleguide'
@ -90,6 +90,8 @@ export const remarkComponents = {
* For regular img elements it is not possible to pass properties * For regular img elements it is not possible to pass properties
*/ */
Image, Image,
ImageScrollable,
Standalone,
Label, Label,
Logos, Logos,

View File

@ -26,12 +26,20 @@
padding: var(--spacing-xs) padding: var(--spacing-xs)
margin-bottom: var(--spacing-md) margin-bottom: var(--spacing-md)
.scrollable
max-width: 100%
overflow: auto
.image .image
position: relative position: relative
display: block display: block
max-width: 100% max-width: 100%
margin: 0 auto margin: 0 auto
.image-scrollable
display: block
max-width: fit-content
.figure-fill .figure-fill
display: block display: block
position: relative position: relative

View File

@ -58,8 +58,8 @@ const AlertSpace = ({ nightly, legacy }) => {
} }
const navAlert = ( const navAlert = (
<Link to="/usage/v3-6" noLinkLayout> <Link to="https://form.typeform.com/to/WlflqP1b" noLinkLayout>
<strong>💥 Out now:</strong> spaCy v3.6 💥 Interested in <strong>Premium spaCy Models</strong>?
</Link> </Link>
) )