Merge pull request #13107 from adrianeboyd/chore/update-develop-from-master-v3.8-1
Update develop from master for v3.8
58
.github/workflows/tests.yml
vendored
|
@ -58,7 +58,7 @@ jobs:
|
|||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python_version: ["3.11", "3.12.0-rc.2"]
|
||||
python_version: ["3.12"]
|
||||
include:
|
||||
- os: windows-latest
|
||||
python_version: "3.7"
|
||||
|
@ -68,6 +68,8 @@ jobs:
|
|||
python_version: "3.9"
|
||||
- os: windows-latest
|
||||
python_version: "3.10"
|
||||
- os: macos-latest
|
||||
python_version: "3.11"
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
|
@ -115,22 +117,22 @@ jobs:
|
|||
- name: Test import
|
||||
run: python -W error -c "import spacy"
|
||||
|
||||
# - name: "Test download CLI"
|
||||
# run: |
|
||||
# python -m spacy download ca_core_news_sm
|
||||
# python -m spacy download ca_core_news_md
|
||||
# python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
|
||||
# if: matrix.python_version == '3.9'
|
||||
#
|
||||
# - name: "Test download_url in info CLI"
|
||||
# run: |
|
||||
# python -W error -m spacy info ca_core_news_sm | grep -q download_url
|
||||
# if: matrix.python_version == '3.9'
|
||||
#
|
||||
# - name: "Test no warnings on load (#11713)"
|
||||
# run: |
|
||||
# python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
|
||||
# if: matrix.python_version == '3.9'
|
||||
- name: "Test download CLI"
|
||||
run: |
|
||||
python -m spacy download ca_core_news_sm
|
||||
python -m spacy download ca_core_news_md
|
||||
python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test download_url in info CLI"
|
||||
run: |
|
||||
python -W error -m spacy info ca_core_news_sm | grep -q download_url
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test no warnings on load (#11713)"
|
||||
run: |
|
||||
python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test convert CLI"
|
||||
run: |
|
||||
|
@ -154,17 +156,17 @@ jobs:
|
|||
python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
# - name: "Test assemble CLI"
|
||||
# run: |
|
||||
# python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
|
||||
# PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
# if: matrix.python_version == '3.9'
|
||||
#
|
||||
# - name: "Test assemble CLI vectors warning"
|
||||
# run: |
|
||||
# python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
|
||||
# python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
|
||||
# if: matrix.python_version == '3.9'
|
||||
- name: "Test assemble CLI"
|
||||
run: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
|
||||
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test assemble CLI vectors warning"
|
||||
run: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
|
||||
python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Install test requirements"
|
||||
run: |
|
||||
|
|
2
LICENSE
|
@ -1,6 +1,6 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (C) 2016-2022 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
|
||||
Copyright (C) 2016-2023 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
@ -16,7 +16,7 @@ model packaging, deployment and workflow management. spaCy is commercial
|
|||
open-source software, released under the
|
||||
[MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
|
||||
|
||||
💫 **Version 3.6 out now!**
|
||||
💫 **Version 3.7 out now!**
|
||||
[Check out the release notes here.](https://github.com/explosion/spaCy/releases)
|
||||
|
||||
[](https://github.com/explosion/spaCy/actions/workflows/tests.yml)
|
||||
|
|
|
@ -10,7 +10,6 @@ wasabi>=0.9.1,<1.2.0
|
|||
srsly>=2.4.3,<3.0.0
|
||||
catalogue>=2.0.6,<2.1.0
|
||||
typer>=0.3.0,<0.10.0
|
||||
pathy>=0.10.0
|
||||
smart-open>=5.2.1,<7.0.0
|
||||
weasel>=0.1.0,<0.4.0
|
||||
# Third party dependencies
|
||||
|
|
|
@ -56,7 +56,6 @@ install_requires =
|
|||
weasel>=0.1.0,<0.4.0
|
||||
# Third-party dependencies
|
||||
typer>=0.3.0,<0.10.0
|
||||
pathy>=0.10.0
|
||||
smart-open>=5.2.1,<7.0.0
|
||||
tqdm>=4.38.0,<5.0.0
|
||||
numpy>=1.15.0; python_version < "3.9"
|
||||
|
|
|
@ -13,6 +13,7 @@ from thinc.api import Config, prefer_gpu, require_cpu, require_gpu # noqa: F401
|
|||
from . import pipeline # noqa: F401
|
||||
from . import util
|
||||
from .about import __version__ # noqa: F401
|
||||
from .cli.info import info # noqa: F401
|
||||
from .errors import Errors
|
||||
from .glossary import explain # noqa: F401
|
||||
from .language import Language
|
||||
|
@ -76,9 +77,3 @@ def blank(
|
|||
# We should accept both dot notation and nested dict here for consistency
|
||||
config = util.dot_to_dict(config)
|
||||
return LangClass.from_config(config, vocab=vocab, meta=meta)
|
||||
|
||||
|
||||
def info(*args, **kwargs):
|
||||
from .cli.info import info as cli_info
|
||||
|
||||
return cli_info(*args, **kwargs)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# fmt: off
|
||||
__title__ = "spacy"
|
||||
__version__ = "3.7.0"
|
||||
__version__ = "3.7.2"
|
||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||
|
|
|
@ -22,8 +22,17 @@ from .init_pipeline import init_pipeline_cli # noqa: F401
|
|||
from .package import package # noqa: F401
|
||||
from .pretrain import pretrain # noqa: F401
|
||||
from .profile import profile # noqa: F401
|
||||
from .train import train_cli # noqa: F401
|
||||
from .validate import validate # noqa: F401
|
||||
from .project.assets import project_assets # type: ignore[attr-defined] # noqa: F401
|
||||
from .project.clone import project_clone # type: ignore[attr-defined] # noqa: F401
|
||||
from .project.document import ( # type: ignore[attr-defined] # noqa: F401
|
||||
project_document,
|
||||
)
|
||||
from .project.dvc import project_update_dvc # type: ignore[attr-defined] # noqa: F401
|
||||
from .project.pull import project_pull # type: ignore[attr-defined] # noqa: F401
|
||||
from .project.push import project_push # type: ignore[attr-defined] # noqa: F401
|
||||
from .project.run import project_run # type: ignore[attr-defined] # noqa: F401
|
||||
from .train import train_cli # type: ignore[attr-defined] # noqa: F401
|
||||
from .validate import validate # type: ignore[attr-defined] # noqa: F401
|
||||
|
||||
|
||||
@app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
|
||||
|
|
|
@ -41,10 +41,6 @@ from ..util import (
|
|||
run_command,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathy import FluidPath # noqa: F401
|
||||
|
||||
|
||||
SDIST_SUFFIX = ".tar.gz"
|
||||
WHEEL_SUFFIX = "-py3-none-any.whl"
|
||||
|
||||
|
|
0
spacy/cli/project/__init__.py
Normal file
1
spacy/cli/project/assets.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.assets import *
|
1
spacy/cli/project/clone.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.clone import *
|
1
spacy/cli/project/document.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.document import *
|
1
spacy/cli/project/dvc.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.dvc import *
|
1
spacy/cli/project/pull.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.pull import *
|
1
spacy/cli/project/push.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.push import *
|
1
spacy/cli/project/remote_storage.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.remote_storage import *
|
1
spacy/cli/project/run.py
Normal file
|
@ -0,0 +1 @@
|
|||
from weasel.cli.run import *
|
|
@ -142,7 +142,25 @@ class SpanRenderer:
|
|||
spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
|
||||
title (str / None): Document title set in Doc.user_data['title'].
|
||||
"""
|
||||
per_token_info = []
|
||||
per_token_info = self._assemble_per_token_info(tokens, spans)
|
||||
markup = self._render_markup(per_token_info)
|
||||
markup = TPL_SPANS.format(content=markup, dir=self.direction)
|
||||
if title:
|
||||
markup = TPL_TITLE.format(title=title) + markup
|
||||
return markup
|
||||
|
||||
@staticmethod
|
||||
def _assemble_per_token_info(
|
||||
tokens: List[str], spans: List[Dict[str, Any]]
|
||||
) -> List[Dict[str, List[Dict[str, Any]]]]:
|
||||
"""Assembles token info used to generate markup in render_spans().
|
||||
tokens (List[str]): Tokens in text.
|
||||
spans (List[Dict[str, Any]]): Spans in text.
|
||||
RETURNS (List[Dict[str, List[Dict, str, Any]]]): Per token info needed to render HTML markup for given tokens
|
||||
and spans.
|
||||
"""
|
||||
per_token_info: List[Dict[str, List[Dict[str, Any]]]] = []
|
||||
|
||||
# we must sort so that we can correctly describe when spans need to "stack"
|
||||
# which is determined by their start token, then span length (longer spans on top),
|
||||
# then break any remaining ties with the span label
|
||||
|
@ -154,21 +172,22 @@ class SpanRenderer:
|
|||
s["label"],
|
||||
),
|
||||
)
|
||||
|
||||
for s in spans:
|
||||
# this is the vertical 'slot' that the span will be rendered in
|
||||
# vertical_position = span_label_offset + (offset_step * (slot - 1))
|
||||
s["render_slot"] = 0
|
||||
|
||||
for idx, token in enumerate(tokens):
|
||||
# Identify if a token belongs to a Span (and which) and if it's a
|
||||
# start token of said Span. We'll use this for the final HTML render
|
||||
token_markup: Dict[str, Any] = {}
|
||||
token_markup["text"] = token
|
||||
concurrent_spans = 0
|
||||
intersecting_spans: List[Dict[str, Any]] = []
|
||||
entities = []
|
||||
for span in spans:
|
||||
ent = {}
|
||||
if span["start_token"] <= idx < span["end_token"]:
|
||||
concurrent_spans += 1
|
||||
span_start = idx == span["start_token"]
|
||||
ent["label"] = span["label"]
|
||||
ent["is_start"] = span_start
|
||||
|
@ -176,7 +195,12 @@ class SpanRenderer:
|
|||
# When the span starts, we need to know how many other
|
||||
# spans are on the 'span stack' and will be rendered.
|
||||
# This value becomes the vertical render slot for this entire span
|
||||
span["render_slot"] = concurrent_spans
|
||||
span["render_slot"] = (
|
||||
intersecting_spans[-1]["render_slot"]
|
||||
if len(intersecting_spans)
|
||||
else 0
|
||||
) + 1
|
||||
intersecting_spans.append(span)
|
||||
ent["render_slot"] = span["render_slot"]
|
||||
kb_id = span.get("kb_id", "")
|
||||
kb_url = span.get("kb_url", "#")
|
||||
|
@ -193,11 +217,8 @@ class SpanRenderer:
|
|||
span["render_slot"] = 0
|
||||
token_markup["entities"] = entities
|
||||
per_token_info.append(token_markup)
|
||||
markup = self._render_markup(per_token_info)
|
||||
markup = TPL_SPANS.format(content=markup, dir=self.direction)
|
||||
if title:
|
||||
markup = TPL_TITLE.format(title=title) + markup
|
||||
return markup
|
||||
|
||||
return per_token_info
|
||||
|
||||
def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str:
|
||||
"""Render the markup from per-token information"""
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
from .candidate import Candidate, get_candidates, get_candidates_batch
|
||||
from .kb import KnowledgeBase
|
||||
from .kb_in_memory import InMemoryLookupKB
|
||||
|
||||
__all__ = [
|
||||
"Candidate",
|
||||
"KnowledgeBase",
|
||||
"InMemoryLookupKB",
|
||||
"get_candidates",
|
||||
"get_candidates_batch",
|
||||
]
|
||||
|
|
|
@ -3,4 +3,4 @@ from .levenshtein import levenshtein
|
|||
from .matcher import Matcher
|
||||
from .phrasematcher import PhraseMatcher
|
||||
|
||||
__all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein"]
|
||||
__all__ = ["DependencyMatcher", "Matcher", "PhraseMatcher", "levenshtein"]
|
||||
|
|
|
@ -22,6 +22,7 @@ from .trainable_pipe import TrainablePipe
|
|||
__all__ = [
|
||||
"AttributeRuler",
|
||||
"DependencyParser",
|
||||
"EditTreeLemmatizer",
|
||||
"EntityLinker",
|
||||
"EntityRecognizer",
|
||||
"EntityRuler",
|
||||
|
|
|
@ -731,3 +731,12 @@ def test_for_no_ent_sents():
|
|||
sents = list(doc.ents[0].sents)
|
||||
assert len(sents) == 1
|
||||
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
|
||||
|
||||
|
||||
def test_span_api_richcmp_other(en_tokenizer):
|
||||
doc1 = en_tokenizer("a b")
|
||||
doc2 = en_tokenizer("b c")
|
||||
assert not doc1[1:2] == doc1[1]
|
||||
assert not doc1[1:2] == doc2[0]
|
||||
assert not doc1[1:2] == doc2[0:1]
|
||||
assert not doc1[0:1] == doc2
|
||||
|
|
|
@ -294,3 +294,12 @@ def test_missing_head_dep(en_vocab):
|
|||
assert aligned_heads[0] == ref_heads[0]
|
||||
assert aligned_deps[5] == ref_deps[5]
|
||||
assert aligned_heads[5] == ref_heads[5]
|
||||
|
||||
|
||||
def test_token_api_richcmp_other(en_tokenizer):
|
||||
doc1 = en_tokenizer("a b")
|
||||
doc2 = en_tokenizer("b c")
|
||||
assert not doc1[1] == doc1[0:1]
|
||||
assert not doc1[1] == doc2[1:2]
|
||||
assert not doc1[1] == doc2[0]
|
||||
assert not doc1[0] == doc2
|
||||
|
|
|
@ -12,7 +12,6 @@ from thinc.api import Config
|
|||
|
||||
import spacy
|
||||
from spacy import about
|
||||
from spacy import info as spacy_info
|
||||
from spacy.cli import info
|
||||
from spacy.cli._util import parse_config_overrides, string_to_list, walk_directory
|
||||
from spacy.cli.apply import apply
|
||||
|
@ -193,9 +192,6 @@ def test_cli_info():
|
|||
raw_data = info(tmp_dir, exclude=[""])
|
||||
assert raw_data["lang"] == "nl"
|
||||
assert raw_data["components"] == ["textcat"]
|
||||
raw_data = spacy_info(tmp_dir, exclude=[""])
|
||||
assert raw_data["lang"] == "nl"
|
||||
assert raw_data["components"] == ["textcat"]
|
||||
|
||||
|
||||
def test_cli_converters_conllu_to_docs():
|
||||
|
@ -538,7 +534,6 @@ def test_string_to_list_intify(value):
|
|||
assert string_to_list(value, intify=True) == [1, 2, 3]
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Temporarily skip before 3.7 models are published")
|
||||
def test_download_compatibility():
|
||||
spec = SpecifierSet("==" + about.__version__)
|
||||
spec.prereleases = False
|
||||
|
@ -549,7 +544,6 @@ def test_download_compatibility():
|
|||
assert get_minor_version(about.__version__) == get_minor_version(version)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Temporarily skip before 3.7 models are published")
|
||||
def test_validate_compatibility_table():
|
||||
spec = SpecifierSet("==" + about.__version__)
|
||||
spec.prereleases = False
|
||||
|
@ -1067,3 +1061,8 @@ def test_debug_data_trainable_lemmatizer_not_annotated():
|
|||
|
||||
data = _compile_gold(train_examples, ["trainable_lemmatizer"], nlp, True)
|
||||
assert data["no_lemma_annotations"] == 2
|
||||
|
||||
|
||||
def test_project_api_imports():
|
||||
from spacy.cli import project_run
|
||||
from spacy.cli.project.run import project_run # noqa: F401, F811
|
||||
|
|
|
@ -2,7 +2,7 @@ import numpy
|
|||
import pytest
|
||||
|
||||
from spacy import displacy
|
||||
from spacy.displacy.render import DependencyRenderer, EntityRenderer
|
||||
from spacy.displacy.render import DependencyRenderer, EntityRenderer, SpanRenderer
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.fa import Persian
|
||||
from spacy.tokens import Doc, Span
|
||||
|
@ -468,3 +468,23 @@ def test_issue12816(en_vocab) -> None:
|
|||
# Verify that the HTML tag is still escaped
|
||||
html = displacy.render(doc, style="span")
|
||||
assert "<TEST>" in html
|
||||
|
||||
|
||||
@pytest.mark.issue(13056)
|
||||
def test_displacy_span_stacking():
|
||||
"""Test whether span stacking works properly for multiple overlapping spans."""
|
||||
spans = [
|
||||
{"start_token": 2, "end_token": 5, "label": "SkillNC"},
|
||||
{"start_token": 0, "end_token": 2, "label": "Skill"},
|
||||
{"start_token": 1, "end_token": 3, "label": "Skill"},
|
||||
]
|
||||
tokens = ["Welcome", "to", "the", "Bank", "of", "China", "."]
|
||||
per_token_info = SpanRenderer._assemble_per_token_info(spans=spans, tokens=tokens)
|
||||
|
||||
assert len(per_token_info) == len(tokens)
|
||||
assert all([len(per_token_info[i]["entities"]) == 1 for i in (0, 3, 4)])
|
||||
assert all([len(per_token_info[i]["entities"]) == 2 for i in (1, 2)])
|
||||
assert per_token_info[1]["entities"][0]["render_slot"] == 1
|
||||
assert per_token_info[1]["entities"][1]["render_slot"] == 2
|
||||
assert per_token_info[2]["entities"][0]["render_slot"] == 2
|
||||
assert per_token_info[2]["entities"][1]["render_slot"] == 3
|
||||
|
|
|
@ -5,4 +5,4 @@ from .span import Span
|
|||
from .span_group import SpanGroup
|
||||
from .token import Token
|
||||
|
||||
__all__ = ["Doc", "Token", "Span", "SpanGroup", "DocBin", "MorphAnalysis"]
|
||||
__all__ = ["Doc", "DocBin", "MorphAnalysis", "Span", "SpanGroup", "Token"]
|
||||
|
|
|
@ -127,14 +127,17 @@ cdef class Span:
|
|||
self._vector = vector
|
||||
self._vector_norm = vector_norm
|
||||
|
||||
def __richcmp__(self, Span other, int op):
|
||||
def __richcmp__(self, object other, int op):
|
||||
if other is None:
|
||||
if op == 0 or op == 1 or op == 2:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
if not isinstance(other, Span):
|
||||
return False
|
||||
cdef Span other_span = other
|
||||
self_tuple = (self.c.start_char, self.c.end_char, self.c.label, self.c.kb_id, self.id, self.doc)
|
||||
other_tuple = (other.c.start_char, other.c.end_char, other.c.label, other.c.kb_id, other.id, other.doc)
|
||||
other_tuple = (other_span.c.start_char, other_span.c.end_char, other_span.c.label, other_span.c.kb_id, other_span.id, other_span.doc)
|
||||
# <
|
||||
if op == 0:
|
||||
return self_tuple < other_tuple
|
||||
|
|
|
@ -53,7 +53,12 @@ class Token:
|
|||
def __bytes__(self) -> bytes: ...
|
||||
def __str__(self) -> str: ...
|
||||
def __repr__(self) -> str: ...
|
||||
def __richcmp__(self, other: Token, op: int) -> bool: ...
|
||||
def __lt__(self, other: Any) -> bool: ...
|
||||
def __le__(self, other: Any) -> bool: ...
|
||||
def __eq__(self, other: Any) -> bool: ...
|
||||
def __ne__(self, other: Any) -> bool: ...
|
||||
def __gt__(self, other: Any) -> bool: ...
|
||||
def __ge__(self, other: Any) -> bool: ...
|
||||
@property
|
||||
def _(self) -> Underscore: ...
|
||||
def nbor(self, i: int = ...) -> Token: ...
|
||||
|
|
|
@ -139,17 +139,20 @@ cdef class Token:
|
|||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
def __richcmp__(self, Token other, int op):
|
||||
def __richcmp__(self, object other, int op):
|
||||
# http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
|
||||
if other is None:
|
||||
if op in (0, 1, 2):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
if not isinstance(other, Token):
|
||||
return False
|
||||
cdef Token other_token = other
|
||||
cdef Doc my_doc = self.doc
|
||||
cdef Doc other_doc = other.doc
|
||||
cdef Doc other_doc = other_token.doc
|
||||
my = self.idx
|
||||
their = other.idx
|
||||
their = other_token.idx
|
||||
if op == 0:
|
||||
return my < their
|
||||
elif op == 2:
|
||||
|
|
|
@ -16,3 +16,28 @@ from .iob_utils import ( # noqa: F401
|
|||
tags_to_entities,
|
||||
)
|
||||
from .loggers import console_logger # noqa: F401
|
||||
|
||||
__all__ = [
|
||||
"Alignment",
|
||||
"Corpus",
|
||||
"Example",
|
||||
"JsonlCorpus",
|
||||
"PlainTextCorpus",
|
||||
"biluo_tags_to_offsets",
|
||||
"biluo_tags_to_spans",
|
||||
"biluo_to_iob",
|
||||
"create_copy_from_base_model",
|
||||
"docs_to_json",
|
||||
"dont_augment",
|
||||
"iob_to_biluo",
|
||||
"minibatch_by_padded_size",
|
||||
"minibatch_by_words",
|
||||
"offsets_to_biluo_tags",
|
||||
"orth_variants_augmenter",
|
||||
"read_json_file",
|
||||
"remove_bilu_prefix",
|
||||
"split_bilu_label",
|
||||
"tags_to_entities",
|
||||
"validate_get_examples",
|
||||
"validate_examples",
|
||||
]
|
||||
|
|
|
@ -1544,9 +1544,9 @@ obsolete files is left up to you.
|
|||
|
||||
Remotes can be defined in the `remotes` section of the
|
||||
[`project.yml`](/usage/projects#project-yml). Under the hood, spaCy uses
|
||||
[`Pathy`](https://github.com/justindujardin/pathy) to communicate with the
|
||||
remote storages, so you can use any protocol that `Pathy` supports, including
|
||||
[S3](https://aws.amazon.com/s3/),
|
||||
[`cloudpathlib`](https://cloudpathlib.drivendata.org) to communicate with the
|
||||
remote storages, so you can use any protocol that `cloudpathlib` supports,
|
||||
including [S3](https://aws.amazon.com/s3/),
|
||||
[Google Cloud Storage](https://cloud.google.com/storage), and the local
|
||||
filesystem, although you may need to install extra dependencies to use certain
|
||||
protocols.
|
||||
|
|
|
@ -16,14 +16,6 @@ prototyping** and **prompting**, and turning unstructured responses into
|
|||
|
||||
## Config and implementation {id="config"}
|
||||
|
||||
An LLM component is implemented through the `LLMWrapper` class. It is accessible
|
||||
through a generic `llm`
|
||||
[component factory](https://spacy.io/usage/processing-pipelines#custom-components-factories)
|
||||
as well as through task-specific component factories: `llm_ner`, `llm_spancat`, `llm_rel`,
|
||||
`llm_textcat`, `llm_sentiment` and `llm_summarization`.
|
||||
|
||||
### LLMWrapper.\_\_init\_\_ {id="init",tag="method"}
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
|
@ -32,13 +24,26 @@ as well as through task-specific component factories: `llm_ner`, `llm_spancat`,
|
|||
> llm = nlp.add_pipe("llm", config=config)
|
||||
>
|
||||
> # Construction via add_pipe with a task-specific factory and default GPT3.5 model
|
||||
> llm = nlp.add_pipe("llm-ner")
|
||||
> llm = nlp.add_pipe("llm_ner")
|
||||
>
|
||||
> # Construction via add_pipe with a task-specific factory and custom model
|
||||
> llm = nlp.add_pipe("llm_ner", config={"model": {"@llm_models": "spacy.Dolly.v1", "name": "dolly-v2-12b"}})
|
||||
>
|
||||
> # Construction from class
|
||||
> from spacy_llm.pipeline import LLMWrapper
|
||||
> llm = LLMWrapper(vocab=nlp.vocab, task=task, model=model, cache=cache, save_io=True)
|
||||
> ```
|
||||
|
||||
An LLM component is implemented through the `LLMWrapper` class. It is accessible
|
||||
through a generic `llm`
|
||||
[component factory](https://spacy.io/usage/processing-pipelines#custom-components-factories)
|
||||
as well as through task-specific component factories: `llm_ner`, `llm_spancat`,
|
||||
`llm_rel`, `llm_textcat`, `llm_sentiment` and `llm_summarization`. For these
|
||||
factories, the GPT-3-5 model from OpenAI is used by default, but this can be
|
||||
customized.
|
||||
|
||||
### LLMWrapper.\_\_init\_\_ {id="init",tag="method"}
|
||||
|
||||
Create a new pipeline instance. In your application, you would normally use a
|
||||
shortcut for this and instantiate the component using its string name and
|
||||
[`nlp.add_pipe`](/api/language#add_pipe).
|
||||
|
@ -255,9 +260,11 @@ prompting.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [summarization.v1.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/summarization.v1.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SummarizationTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SummarizationExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `max_n_words` | Maximum number of words to be used in summary. Note that this should not expected to work exactly. Defaults to `None`. ~~Optional[int]~~ |
|
||||
| `field` | Name of extension attribute to store summary in (i. e. the summary will be available in `doc._.{field}`). Defaults to `summary`. ~~str~~ |
|
||||
|
||||
|
@ -326,12 +333,15 @@ the v3 implementation will use a dummy example in the prompt. Technically this
|
|||
means that the task will always perform few-shot prompting under the hood.
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [ner.v3.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/ner.v3.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[NERTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `NERExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [ner.v3.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/ner.v3.jinja). ~~str~~ |
|
||||
| `description` (NEW) | A description of what to recognize or not recognize as entities. ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
|
||||
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
|
||||
|
@ -416,11 +426,14 @@ v1.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` (NEW) | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [ner.v2.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/ner.v2.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[NERTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `NERExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` (NEW) | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
|
||||
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
|
||||
|
@ -468,9 +481,12 @@ few-shot prompting.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `labels` | Comma-separated list of labels. ~~str~~ |
|
||||
| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[NERTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `NERExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | Comma-separated list of labels. ~~str~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
|
||||
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
|
||||
|
@ -540,13 +556,16 @@ support overlapping entities and store its annotations in `doc.spans`.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [`spancat.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v3.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SpanCatExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [`spancat.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v3.jinja). ~~str~~ |
|
||||
| `description` (NEW) | A description of what to recognize or not recognize as entities. ~~str~~ |
|
||||
| `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
|
||||
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
|
||||
|
@ -569,12 +588,15 @@ support overlapping entities and store its annotations in `doc.spans`.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [`spancat.v2.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v2.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SpanCatExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` (NEW) | Optional dict mapping a label to a description of that label. These descriptions are added to the prompt to help instruct the LLM on what to extract. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [`spancat.v2.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/spancat.v2.jinja). ~~str~~ |
|
||||
| `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
|
||||
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
|
||||
|
@ -600,10 +622,13 @@ v1 NER task to support overlapping entities and store its annotations in
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SpanCatExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | Comma-separated list of labels. ~~str~~ |
|
||||
| `spans_key` | Key of the `Doc.spans` dict to save the spans under. Defaults to `"sc"`. ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, defaults to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `alignment_mode` | Alignment mode in case the LLM returns entities that do not align with token boundaries. Options are `"strict"`, `"contract"` or `"expand"`. Defaults to `"contract"`. ~~str~~ |
|
||||
| `case_sensitive_matching` | Whether to search without case sensitivity. Defaults to `False`. ~~bool~~ |
|
||||
|
@ -637,11 +662,14 @@ prompt.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` (NEW) | Dictionary of label definitions. Included in the prompt, if set. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [`textcat.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/textcat.v3.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `TextCatExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` (NEW) | Dictionary of label definitions. Included in the prompt, if set. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ |
|
||||
| `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ |
|
||||
|
@ -664,10 +692,13 @@ V2 includes all v1 functionality, with an improved prompt template.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `template` (NEW) | Custom prompt template to send to LLM model. Defaults to [`textcat.v2.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/textcat.v2.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `TextCatExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ |
|
||||
| `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ |
|
||||
|
@ -691,13 +722,16 @@ prompting.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `labels` | Comma-separated list of labels. ~~str~~ |
|
||||
| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Deafults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SpanCatTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `TextCatExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | Comma-separated list of labels. ~~str~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Deafults to `False`. ~~bool~~ |
|
||||
| `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Deafults to `True`. ~~bool~~ |
|
||||
| `verbose` | If set to `True`, warnings will be generated when the LLM returns invalid responses. Deafults to `False`. ~~bool~~ |
|
||||
| `exclusive_classes` | If set to `True`, only one label per document should be valid. If set to `False`, one document can have multiple labels. Defaults to `False`. ~~bool~~ |
|
||||
| `allow_none` | When set to `True`, allows the LLM to not return any of the given label. The resulting dict in `doc.cats` will have `0.0` scores for all labels. Defaults to `True`. ~~bool~~ |
|
||||
| `verbose` | If set to `True`, warnings will be generated when the LLM returns invalid responses. Defaults to `False`. ~~bool~~ |
|
||||
|
||||
To perform [few-shot learning](/usage/large-language-models#few-shot-prompts),
|
||||
you can write down a few examples in a separate file, and provide these to be
|
||||
|
@ -723,6 +757,25 @@ supports `.yml`, `.yaml`, `.json` and `.jsonl`.
|
|||
path = "textcat_examples.json"
|
||||
```
|
||||
|
||||
If you want to perform few-shot learning with a binary classifier (i. e. a text
|
||||
either should or should not be assigned to a given class), you can provide
|
||||
positive and negative examples with answers of "POS" or "NEG". "POS" means that
|
||||
this example should be assigned the class label defined in the configuration,
|
||||
"NEG" means it shouldn't. E. g. for spam classification:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"text": "You won the lottery! Wire a fee of 200$ to be able to withdraw your winnings.",
|
||||
"answer": "POS"
|
||||
},
|
||||
{
|
||||
"text": "Your order #123456789 has arrived",
|
||||
"answer": "NEG"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### REL {id="rel"}
|
||||
|
||||
The REL task extracts relations between named entities.
|
||||
|
@ -741,11 +794,14 @@ on an upstream NER component for entities extraction.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [`rel.v3.jinja`](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/rel.v1.jinja). ~~str~~ |
|
||||
| `label_definitions` | Dictionary providing a description for each relation label. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[RELTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `RELExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `labels` | List of labels or str of comma-separated list of labels. ~~Union[List[str], str]~~ |
|
||||
| `label_definitions` | Dictionary providing a description for each relation label. Defaults to `None`. ~~Optional[Dict[str, str]]~~ |
|
||||
| `normalizer` | Function that normalizes the labels as returned by the LLM. If `None`, falls back to `spacy.LowercaseNormalizer.v1`. Defaults to `None`. ~~Optional[Callable[[str], str]]~~ |
|
||||
| `verbose` | If set to `True`, warnings will be generated when the LLM returns invalid responses. Defaults to `False`. ~~bool~~ |
|
||||
|
||||
|
@ -794,9 +850,12 @@ This task supports both zero-shot and few-shot prompting.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ---------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [lemma.v1.jinja](https://github.com/explosion/spacy-llm/blob/main/spacy_llm/tasks/templates/lemma.v1.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[LemmaTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `LemmaExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
|
||||
The task prompts the LLM to lemmatize the passed text and return the lemmatized
|
||||
version as a list of tokens and their corresponding lemma. E. g. the text
|
||||
|
@ -871,9 +930,12 @@ This task supports both zero-shot and few-shot prompting.
|
|||
> ```
|
||||
|
||||
| Argument | Description |
|
||||
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `template` | Custom prompt template to send to LLM model. Defaults to [sentiment.v1.jinja](./spacy_llm/tasks/templates/sentiment.v1.jinja). ~~str~~ |
|
||||
| `examples` | Optional function that generates examples for few-shot learning. Defaults to `None`. ~~Optional[Callable[[], Iterable[Any]]]~~ |
|
||||
| `parse_responses` (NEW) | Callable for parsing LLM responses for this task. Defaults to the internal parsing method for this task. ~~Optional[TaskResponseParser[SentimentTask]]~~ |
|
||||
| `prompt_example_type` (NEW) | Type to use for fewshot examples. Defaults to `SentimentExample`. ~~Optional[Type[FewshotExample]]~~ |
|
||||
| `scorer` (NEW) | Scorer function that evaluates the task performance on provided examples. Defaults to the metric used by spaCy. ~~Optional[Scorer]~~ |
|
||||
| `field` | Name of extension attribute to store summary in (i. e. the summary will be available in `doc._.{field}`). Defaults to `sentiment`. ~~str~~ |
|
||||
|
||||
To perform [few-shot learning](/usage/large-language-models#few-shot-prompts),
|
||||
|
@ -953,11 +1015,11 @@ provider's API.
|
|||
Currently, these models are provided as part of the core library:
|
||||
|
||||
| Model | Provider | Supported names | Default name | Default config |
|
||||
| ----------------------------- | --------- | ---------------------------------------------------------------------------------------- | ---------------------- | ------------------------------------ |
|
||||
| ----------------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------ | ---------------------- | ------------------------------------ |
|
||||
| `spacy.GPT-4.v1` | OpenAI | `["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]` | `"gpt-4"` | `{}` |
|
||||
| `spacy.GPT-4.v2` | OpenAI | `["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]` | `"gpt-4"` | `{temperature=0.0}` |
|
||||
| `spacy.GPT-3-5.v1` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k"]` | `"gpt-3.5-turbo"` | `{}` |
|
||||
| `spacy.GPT-3-5.v2` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k"]` | `"gpt-3.5-turbo"` | `{temperature=0.0}` |
|
||||
| `spacy.GPT-3-5.v1` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k", "gpt-3.5-turbo-instruct"]` | `"gpt-3.5-turbo"` | `{}` |
|
||||
| `spacy.GPT-3-5.v2` | OpenAI | `["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613-16k", "gpt-3.5-turbo-instruct"]` | `"gpt-3.5-turbo"` | `{temperature=0.0}` |
|
||||
| `spacy.Davinci.v1` | OpenAI | `["davinci"]` | `"davinci"` | `{}` |
|
||||
| `spacy.Davinci.v2` | OpenAI | `["davinci"]` | `"davinci"` | `{temperature=0.0, max_tokens=500}` |
|
||||
| `spacy.Text-Davinci.v1` | OpenAI | `["text-davinci-003", "text-davinci-002"]` | `"text-davinci-003"` | `{}` |
|
||||
|
@ -976,6 +1038,7 @@ Currently, these models are provided as part of the core library:
|
|||
| `spacy.Ada.v2` | OpenAI | `["ada"]` | `"ada"` | `{temperature=0.0, max_tokens=500}` |
|
||||
| `spacy.Text-Ada.v1` | OpenAI | `["text-ada-001"]` | `"text-ada-001"` | `{}` |
|
||||
| `spacy.Text-Ada.v2` | OpenAI | `["text-ada-001"]` | `"text-ada-001"` | `{temperature=0.0, max_tokens=500}` |
|
||||
| `spacy.Azure.v1` | Microsoft, OpenAI | Arbitrary values | No default | `{temperature=0.0}` |
|
||||
| `spacy.Command.v1` | Cohere | `["command", "command-light", "command-light-nightly", "command-nightly"]` | `"command"` | `{}` |
|
||||
| `spacy.Claude-2.v1` | Anthropic | `["claude-2", "claude-2-100k"]` | `"claude-2"` | `{}` |
|
||||
| `spacy.Claude-1.v1` | Anthropic | `["claude-1", "claude-1-100k"]` | `"claude-1"` | `{}` |
|
||||
|
@ -984,10 +1047,29 @@ Currently, these models are provided as part of the core library:
|
|||
| `spacy.Claude-1-3.v1` | Anthropic | `["claude-1.3", "claude-1.3-100k"]` | `"claude-1.3"` | `{}` |
|
||||
| `spacy.Claude-instant-1.v1` | Anthropic | `["claude-instant-1", "claude-instant-1-100k"]` | `"claude-instant-1"` | `{}` |
|
||||
| `spacy.Claude-instant-1-1.v1` | Anthropic | `["claude-instant-1.1", "claude-instant-1.1-100k"]` | `"claude-instant-1.1"` | `{}` |
|
||||
| `spacy.PaLM.v1` | Google | `["chat-bison-001", "text-bison-001"]` | `"text-bison-001"` | `{temperature=0.0}` |
|
||||
|
||||
To use these models, make sure that you've [set the relevant API](#api-keys)
|
||||
keys as environment variables.
|
||||
|
||||
**⚠️ A note on `spacy.Azure.v1`.** Working with Azure OpenAI is slightly
|
||||
different than working with models from other providers:
|
||||
|
||||
- In Azure LLMs have to be made available by creating a _deployment_ of a given
|
||||
model (e. g. GPT-3.5). This deployment can have an arbitrary name. The `name`
|
||||
argument, which everywhere else denotes the model name (e. g. `claude-1.0`,
|
||||
`gpt-3.5`), here refers to the _deployment name_.
|
||||
- Deployed Azure OpenAI models are reachable via a resource-specific base URL,
|
||||
usually of the form `https://{resource}.openai.azure.com`. Hence the URL has
|
||||
to be specified via the `base_url` argument.
|
||||
- Azure further expects the _API version_ to be specified. The default value for
|
||||
this, via the `api_version` argument, is currently `2023-05-15` but may be
|
||||
updated in the future.
|
||||
- Finally, since we can't infer information about the model from the deployment
|
||||
name, `spacy-llm` requires the `model_type` to be set to either
|
||||
`"completions"` or `"chat"`, depending on whether the deployed model is a
|
||||
completion or chat model.
|
||||
|
||||
#### API Keys {id="api-keys"}
|
||||
|
||||
Note that when using hosted services, you have to ensure that the proper API
|
||||
|
@ -1014,6 +1096,12 @@ For Anthropic:
|
|||
export ANTHROPIC_API_KEY="..."
|
||||
```
|
||||
|
||||
For PaLM:
|
||||
|
||||
```shell
|
||||
export PALM_API_KEY="..."
|
||||
```
|
||||
|
||||
### Models via HuggingFace {id="models-hf"}
|
||||
|
||||
These models all take the same parameters:
|
||||
|
@ -1037,11 +1125,27 @@ Currently, these models are provided as part of the core library:
|
|||
| Model | Provider | Supported names | HF directory |
|
||||
| -------------------- | --------------- | ------------------------------------------------------------------------------------------------------------ | -------------------------------------- |
|
||||
| `spacy.Dolly.v1` | Databricks | `["dolly-v2-3b", "dolly-v2-7b", "dolly-v2-12b"]` | https://huggingface.co/databricks |
|
||||
| `spacy.Llama2.v1` | Meta AI | `["Llama-2-7b-hf", "Llama-2-13b-hf", "Llama-2-70b-hf"]` | https://huggingface.co/meta-llama |
|
||||
| `spacy.Falcon.v1` | TII | `["falcon-rw-1b", "falcon-7b", "falcon-7b-instruct", "falcon-40b-instruct"]` | https://huggingface.co/tiiuae |
|
||||
| `spacy.Llama2.v1` | Meta AI | `["Llama-2-7b-hf", "Llama-2-13b-hf", "Llama-2-70b-hf"]` | https://huggingface.co/meta-llama |
|
||||
| `spacy.Mistral.v1` | Mistral AI | `["Mistral-7B-v0.1", "Mistral-7B-Instruct-v0.1"]` | https://huggingface.co/mistralai |
|
||||
| `spacy.StableLM.v1` | Stability AI | `["stablelm-base-alpha-3b", "stablelm-base-alpha-7b", "stablelm-tuned-alpha-3b", "stablelm-tuned-alpha-7b"]` | https://huggingface.co/stabilityai |
|
||||
| `spacy.OpenLLaMA.v1` | OpenLM Research | `["open_llama_3b", "open_llama_7b", "open_llama_7b_v2", "open_llama_13b"]` | https://huggingface.co/openlm-research |
|
||||
|
||||
<Infobox variant="warning" title="Gated models on Hugging Face" id="hf_licensing">
|
||||
|
||||
Some models available on Hugging Face (HF), such as Llama 2, are _gated models_.
|
||||
That means that users have to fulfill certain requirements to be allowed access
|
||||
to these models. In the case of Llama 2 you'll need to request agree to Meta's
|
||||
Terms of Service while logged in with your HF account. After Meta grants you
|
||||
permission to use Llama 2, you'll be able to download and use the model.
|
||||
|
||||
This requires that you are logged in with your HF account on your local
|
||||
machine - check out the HF quick start documentation. In a nutshell, you'll need
|
||||
to create an access token on HF and log in to HF using your access token, e. g.
|
||||
with `huggingface-cli login`.
|
||||
|
||||
</Infobox>
|
||||
|
||||
Note that Hugging Face will download the model the first time you use it - you
|
||||
can
|
||||
[define the cached directory](https://huggingface.co/docs/huggingface_hub/main/en/guides/manage-cache)
|
||||
|
|
|
@ -89,6 +89,21 @@ architectures and their arguments and hyperparameters.
|
|||
| `negative_weight` <Tag variant="new">3.5.1</Tag> | Multiplier for the loss terms. It can be used to downweight the negative samples if there are too many. It is only used when `add_negative_label` is `True`. Defaults to `1.0`. ~~float~~ |
|
||||
| `allow_overlap` <Tag variant="new">3.5.1</Tag> | If `True`, the data is assumed to contain overlapping spans. It is only available when `max_positive` is exactly 1. Defaults to `True`. ~~bool~~ |
|
||||
|
||||
<Infobox variant="warning">
|
||||
|
||||
If you set a non-default value for `spans_key`, you'll have to update
|
||||
`[training.score_weights]` as well so that weights are computed properly. E. g.
|
||||
for `spans_key == "myspankey"`, include this in your config:
|
||||
|
||||
```ini
|
||||
[training.score_weights]
|
||||
spans_myspankey_f = 1.0
|
||||
spans_myspankey_p = 0.0
|
||||
spans_myspankey_r = 0.0
|
||||
```
|
||||
|
||||
</Infobox>
|
||||
|
||||
```python
|
||||
%%GITHUB_SPACY/spacy/pipeline/spancat.py
|
||||
```
|
||||
|
|
Before Width: | Height: | Size: 6.8 KiB After Width: | Height: | Size: 6.8 KiB |
|
@ -31,8 +31,6 @@ for ent in doc.ents:
|
|||
Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what
|
||||
our example sentence and its named entities look like:
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualization of entities"
|
||||
src="/images/displacy-ent1.html"
|
||||
height={100}
|
||||
/>
|
||||
<Standalone height={120}>
|
||||
<div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}><mark style={{ background: '#7aecec', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Apple <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> is looking at buying <mark style={{ background: '#feca74', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>U.K. <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>GPE</span></mark> startup for <mark style={{ background: '#e4e7d2', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>$1 billion <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>MONEY</span></mark></div>
|
||||
</Standalone>
|
||||
|
|
|
@ -56,8 +56,7 @@ for token in doc:
|
|||
Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what
|
||||
our example sentence and its dependencies look like:
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualization of dependencies and entities"
|
||||
src="/images/displacy-long.html"
|
||||
height={450}
|
||||
<ImageScrollable
|
||||
src="/images/displacy-long.svg"
|
||||
width={1975}
|
||||
/>
|
||||
|
|
|
@ -170,8 +170,8 @@ to be `"databricks/dolly-v2-12b"` for better performance.
|
|||
### Example 3: Create the component directly in Python {id="example-3"}
|
||||
|
||||
The `llm` component behaves as any other component does, and there are
|
||||
[task-specific components](/api/large-language-models#config) defined to
|
||||
help you hit the ground running with a reasonable built-in task implementation.
|
||||
[task-specific components](/api/large-language-models#config) defined to help
|
||||
you hit the ground running with a reasonable built-in task implementation.
|
||||
|
||||
```python
|
||||
import spacy
|
||||
|
@ -436,7 +436,7 @@ respectively. Alternatively you can use LangChain to access hosted or local
|
|||
models by specifying one of the models registered with the `langchain.` prefix.
|
||||
|
||||
<Infobox>
|
||||
_Why LangChain if there are also are a native REST and a HuggingFace interface? When should I use what?_
|
||||
_Why LangChain if there are also are native REST and HuggingFace interfaces? When should I use what?_
|
||||
|
||||
Third-party libraries like `langchain` focus on prompt management, integration
|
||||
of many different LLM APIs, and other related features such as conversational
|
||||
|
@ -476,6 +476,7 @@ provider's documentation.
|
|||
| [`spacy.Curie.v2`](/api/large-language-models#models-rest) | OpenAI’s `curie` model family. |
|
||||
| [`spacy.Babbage.v2`](/api/large-language-models#models-rest) | OpenAI’s `babbage` model family. |
|
||||
| [`spacy.Ada.v2`](/api/large-language-models#models-rest) | OpenAI’s `ada` model family. |
|
||||
| [`spacy.Azure.v1`](/api/large-language-models#models-rest) | Azure's OpenAI models. |
|
||||
| [`spacy.Command.v1`](/api/large-language-models#models-rest) | Cohere’s `command` model family. |
|
||||
| [`spacy.Claude-2.v1`](/api/large-language-models#models-rest) | Anthropic’s `claude-2` model family. |
|
||||
| [`spacy.Claude-1.v1`](/api/large-language-models#models-rest) | Anthropic’s `claude-1` model family. |
|
||||
|
@ -484,8 +485,10 @@ provider's documentation.
|
|||
| [`spacy.Claude-1-0.v1`](/api/large-language-models#models-rest) | Anthropic’s `claude-1.0` model family. |
|
||||
| [`spacy.Claude-1-2.v1`](/api/large-language-models#models-rest) | Anthropic’s `claude-1.2` model family. |
|
||||
| [`spacy.Claude-1-3.v1`](/api/large-language-models#models-rest) | Anthropic’s `claude-1.3` model family. |
|
||||
| [`spacy.PaLM.v1`](/api/large-language-models#models-rest) | Google’s `PaLM` model family. |
|
||||
| [`spacy.Dolly.v1`](/api/large-language-models#models-hf) | Dolly models through HuggingFace. |
|
||||
| [`spacy.Falcon.v1`](/api/large-language-models#models-hf) | Falcon models through HuggingFace. |
|
||||
| [`spacy.Mistral.v1`](/api/large-language-models#models-hf) | Mistral models through HuggingFace. |
|
||||
| [`spacy.Llama2.v1`](/api/large-language-models#models-hf) | Llama2 models through HuggingFace. |
|
||||
| [`spacy.StableLM.v1`](/api/large-language-models#models-hf) | StableLM models through HuggingFace. |
|
||||
| [`spacy.OpenLLaMA.v1`](/api/large-language-models#models-hf) | OpenLLaMA models through HuggingFace. |
|
||||
|
|
|
@ -290,11 +290,7 @@ for token in doc:
|
|||
| toward | `prep` | shift | `NOUN` | manufacturers |
|
||||
| manufacturers | `pobj` | toward | `ADP` | |
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualization of dependencies and entities 2"
|
||||
src="/images/displacy-long2.html"
|
||||
height={450}
|
||||
/>
|
||||
<ImageScrollable src="/images/displacy-long2.svg" width={1275} />
|
||||
|
||||
Because the syntactic relations form a tree, every word has **exactly one
|
||||
head**. You can therefore iterate over the arcs in the tree by iterating over
|
||||
|
@ -709,11 +705,9 @@ doc = nlp(text)
|
|||
displacy.serve(doc, style="ent")
|
||||
```
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualizer for entities"
|
||||
src="/images/displacy-ent2.html"
|
||||
height={180}
|
||||
/>
|
||||
<Standalone height={180}>
|
||||
<div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>When <mark style={{ background: '#aa9cfc', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Sebastian Thrun <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>PERSON</span></mark> started working on self-driving cars at <mark style={{ background: '#7aecec', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Google <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> in <mark style={{ background: '#bfe1d9', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>2007 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>DATE</span></mark>, few people outside of the company took him seriously.</div>
|
||||
</Standalone>
|
||||
|
||||
## Entity Linking {id="entity-linking"}
|
||||
|
||||
|
@ -723,6 +717,10 @@ identifier from a knowledge base (KB). You can create your own
|
|||
[`KnowledgeBase`](/api/kb) and [train](/usage/training) a new
|
||||
[`EntityLinker`](/api/entitylinker) using that custom knowledge base.
|
||||
|
||||
As an example on how to define a KnowledgeBase and train an entity linker model,
|
||||
see [`this tutorial`](https://github.com/explosion/projects/blob/v3/tutorials/nel_emerson)
|
||||
using [spaCy projects](/usage/projects).
|
||||
|
||||
### Accessing entity identifiers {id="entity-linking-accessing",model="entity linking"}
|
||||
|
||||
The annotated KB identifier is accessible as either a hash value or as a string,
|
||||
|
@ -733,6 +731,7 @@ object, or the `ent_kb_id` and `ent_kb_id_` attributes of a
|
|||
```python
|
||||
import spacy
|
||||
|
||||
# "my_custom_el_pipeline" is assumed to be a custom NLP pipeline that was trained and serialized to disk
|
||||
nlp = spacy.load("my_custom_el_pipeline")
|
||||
doc = nlp("Ada Lovelace was born in London")
|
||||
|
||||
|
|
|
@ -656,9 +656,9 @@ locally.
|
|||
You can list one or more remotes in the `remotes` section of your
|
||||
[`project.yml`](#project-yml) by mapping a string name to the URL of the
|
||||
storage. Under the hood, spaCy uses
|
||||
[`Pathy`](https://github.com/justindujardin/pathy) to communicate with the
|
||||
remote storages, so you can use any protocol that `Pathy` supports, including
|
||||
[S3](https://aws.amazon.com/s3/),
|
||||
[`cloudpathlib`](https://cloudpathlib.drivendata.org) to communicate with the
|
||||
remote storages, so you can use any protocol that `cloudpathlib` supports,
|
||||
including [S3](https://aws.amazon.com/s3/),
|
||||
[Google Cloud Storage](https://cloud.google.com/storage), and the local
|
||||
filesystem, although you may need to install extra dependencies to use certain
|
||||
protocols.
|
||||
|
|
|
@ -1144,10 +1144,9 @@ relations and tokens we want to match:
|
|||
> displacy.serve(doc)
|
||||
> ```
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualization of dependencies"
|
||||
src="/images/displacy-dep-founded.html"
|
||||
height={450}
|
||||
<ImageScrollable
|
||||
src="/images/displacy-dep-founded.svg"
|
||||
width={925}
|
||||
/>
|
||||
|
||||
The relations we're interested in are:
|
||||
|
|
|
@ -586,11 +586,9 @@ After installing the package, the custom colors will be used when visualizing
|
|||
text with `displacy`. Whenever the label `SNEK` is assigned, it will be
|
||||
displayed in `#3dff74`.
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualization of entities"
|
||||
src="/images/displacy-ent-snek.html"
|
||||
height={100}
|
||||
/>
|
||||
<Standalone height={100}>
|
||||
<div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>🌱🌿 <mark style={{ background: '#3dff74', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>🐍 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>SNEK</span></mark> ____ 🌳🌲 ____ <mark style={{ background: '#cfc5ff', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>👨🌾 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>HUMAN</span></mark> 🏘️</div>
|
||||
</Standalone>
|
||||
|
||||
## Saving, loading and distributing trained pipelines {id="models"}
|
||||
|
||||
|
|
|
@ -77,11 +77,9 @@ doc.spans["custom"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
|
|||
displacy.serve(doc, style="span", options={"spans_key": "custom"})
|
||||
```
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualizer for overlapping spans"
|
||||
src="/images/displacy-span.html"
|
||||
height={180}
|
||||
/>
|
||||
<Standalone height={100}>
|
||||
<div style={{ lineHeight: 2.5, direction: 'ltr', fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18 }}>Welcome to the <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>Bank<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#7aecec', top: 40, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#7aecec', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>ORG</span></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>of <span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>China<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#feca74', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>GPE</span></span></span>.</div>
|
||||
</Standalone>
|
||||
|
||||
## Additional features and improvements
|
||||
|
||||
|
|
140
website/docs/usage/v3-7.mdx
Normal file
|
@ -0,0 +1,140 @@
|
|||
---
|
||||
title: What's New in v3.7
|
||||
teaser: New features and how to upgrade
|
||||
menu:
|
||||
- ['New Features', 'features']
|
||||
- ['Upgrading Notes', 'upgrading']
|
||||
---
|
||||
|
||||
## New features {id="features",hidden="true"}
|
||||
|
||||
spaCy v3.7 adds support for Python 3.12, introduces the new standalone library
|
||||
[Weasel](https://github.com/explosion/weasel) for project workflows, and updates
|
||||
the transformer-based trained pipelines to use our new
|
||||
[Curated Transformers](https://github.com/explosion/curated-transformers)
|
||||
library.
|
||||
|
||||
This release drops support for Python 3.6.
|
||||
|
||||
### Weasel {id="weasel"}
|
||||
|
||||
The [spaCy projects](/usage/projects) functionality has been moved into a new
|
||||
standalone library [Weasel](https://github.com/explosion/weasel). This brings
|
||||
minor changes to spaCy-specific settings in spaCy projects (see
|
||||
[upgrading](#upgrading) below), but also makes it possible to use the same
|
||||
workflow functionality outside of spaCy.
|
||||
|
||||
All `spacy project` commands should run as before, just now they're using Weasel
|
||||
under the hood.
|
||||
|
||||
<Infobox title="Remote storage for Python 3.12" variant="warning">
|
||||
|
||||
Remote storage for spaCy projects is not yet supported for Python 3.12. Use
|
||||
Python 3.11 or earlier for remote storage.
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Registered vectors {id="custom-vectors"}
|
||||
|
||||
You can specify a custom registered vectors class under `[nlp.vectors]` in order
|
||||
to use static vectors in formats other than the ones supported by
|
||||
[`Vectors`](/api/vectors). To implement your custom vectors, extend the abstract
|
||||
class [`BaseVectors`](/api/basevectors). See an example using
|
||||
[BPEmb subword embeddings](/usage/embeddings-transformers#custom-vectors).
|
||||
|
||||
### Additional features and improvements {id="additional-features-and-improvements"}
|
||||
|
||||
- Add support for Python 3.12.
|
||||
- Extend to Thinc v8.2.
|
||||
- Extend `transformers` extra to `spacy-transformers` v1.3.
|
||||
- Add `--spans-key` option for CLI evaluation with `spacy benchmark accuracy`.
|
||||
- Load the CLI module lazily for `spacy.info`.
|
||||
- Add type stubs for for `spacy.training.example`.
|
||||
- Warn for unsupported pattern keys in dependency matcher.
|
||||
- `Language.replace_listeners`: Pass the replaced listener and the `tok2vec`
|
||||
pipe to the callback in order to support `spacy-curated-transformers`.
|
||||
- Always use `tqdm` with `disable=None` in order to disable output in
|
||||
non-interactive environments.
|
||||
- Language updates:
|
||||
- Add left and right pointing angle brackets as punctuation to ancient Greek.
|
||||
- Update example sentences for Turkish.
|
||||
- Package setup updates:
|
||||
- Update NumPy build constraints for NumPy 1.25+. For Python 3.9+, it is no
|
||||
longer necessary to set build constraints while building binary wheels.
|
||||
- Refactor Cython profiling in order to disable profiling for Python 3.12 in
|
||||
the package setup, since Cython does not currently support profiling for
|
||||
Python 3.12.
|
||||
|
||||
## Trained pipelines {id="pipelines"}
|
||||
|
||||
### Pipeline updates {id="pipeline-updates"}
|
||||
|
||||
The transformer-based `trf` pipelines have been updated to use our new
|
||||
[Curated Transformers](https://github.com/explosion/curated-transformers)
|
||||
library using the Thinc model wrappers and pipeline component from
|
||||
[spaCy Curated Transformers](https://github.com/explosion/spacy-curated-transformers).
|
||||
|
||||
## Notes about upgrading from v3.6 {id="upgrading"}
|
||||
|
||||
This release drops support for Python 3.6, drops mypy checks for Python 3.7 and
|
||||
removes the `ray` extra. In addition there are several minor changes for spaCy
|
||||
projects described in the following section.
|
||||
|
||||
### Backwards incompatibilities for spaCy Projects {id="upgrading-projects"}
|
||||
|
||||
`spacy project` has a few backwards incompatibilities due to the transition to
|
||||
the standalone library [Weasel](https://github.com/explosion/weasel), which is
|
||||
not as tightly coupled to spaCy. Weasel produces warnings when it detects older
|
||||
spaCy-specific settings in your environment or project config.
|
||||
|
||||
- Support for the `spacy_version` configuration key has been dropped.
|
||||
- Support for the `check_requirements` configuration key has been dropped due to
|
||||
the deprecation of `pkg_resources`.
|
||||
- The `SPACY_CONFIG_OVERRIDES` environment variable is no longer checked. You
|
||||
can set configuration overrides using `WEASEL_CONFIG_OVERRIDES`.
|
||||
- Support for `SPACY_PROJECT_USE_GIT_VERSION` environment variable has been
|
||||
dropped.
|
||||
- Error codes are now Weasel-specific and do not follow spaCy error codes.
|
||||
|
||||
### Pipeline package version compatibility {id="version-compat"}
|
||||
|
||||
> #### Using legacy implementations
|
||||
>
|
||||
> In spaCy v3, you'll still be able to load and reference legacy implementations
|
||||
> via [`spacy-legacy`](https://github.com/explosion/spacy-legacy), even if the
|
||||
> components or architectures change and newer versions are available in the
|
||||
> core library.
|
||||
|
||||
When you're loading a pipeline package trained with an earlier version of spaCy
|
||||
v3, you will see a warning telling you that the pipeline may be incompatible.
|
||||
This doesn't necessarily have to be true, but we recommend running your
|
||||
pipelines against your test suite or evaluation data to make sure there are no
|
||||
unexpected results.
|
||||
|
||||
If you're using one of the [trained pipelines](/models) we provide, you should
|
||||
run [`spacy download`](/api/cli#download) to update to the latest version. To
|
||||
see an overview of all installed packages and their compatibility, you can run
|
||||
[`spacy validate`](/api/cli#validate).
|
||||
|
||||
If you've trained your own custom pipeline and you've confirmed that it's still
|
||||
working as expected, you can update the spaCy version requirements in the
|
||||
[`meta.json`](/api/data-formats#meta):
|
||||
|
||||
```diff
|
||||
- "spacy_version": ">=3.6.0,<3.7.0",
|
||||
+ "spacy_version": ">=3.6.0,<3.8.0",
|
||||
```
|
||||
|
||||
### Updating v3.6 configs
|
||||
|
||||
To update a config from spaCy v3.6 with the new v3.7 settings, run
|
||||
[`init fill-config`](/api/cli#init-fill-config):
|
||||
|
||||
```cli
|
||||
$ python -m spacy init fill-config config-v3.6.cfg config-v3.7.cfg
|
||||
```
|
||||
|
||||
In many cases ([`spacy train`](/api/cli#train),
|
||||
[`spacy.load`](/api/top-level#spacy.load)), the new defaults will be filled in
|
||||
automatically, but you'll need to fill in the new settings to run
|
||||
[`debug config`](/api/cli#debug) and [`debug data`](/api/cli#debug-data).
|
|
@ -119,11 +119,9 @@ doc = nlp(text)
|
|||
displacy.serve(doc, style="ent")
|
||||
```
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualizer for entities"
|
||||
src="/images/displacy-ent2.html"
|
||||
height={180}
|
||||
/>
|
||||
<Standalone height={180}>
|
||||
<div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>When <mark style={{ background: '#aa9cfc', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Sebastian Thrun <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>PERSON</span></mark> started working on self-driving cars at <mark style={{ background: '#7aecec', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Google <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> in <mark style={{ background: '#bfe1d9', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>2007 <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>DATE</span></mark>, few people outside of the company took him seriously.</div>
|
||||
</Standalone>
|
||||
|
||||
The entity visualizer lets you customize the following `options`:
|
||||
|
||||
|
@ -148,11 +146,9 @@ use the `colors` setting to add your own colors for them.
|
|||
> displacy.serve(doc, style="ent", options=options)
|
||||
> ```
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualizer for entities (custom styling)"
|
||||
src="/images/displacy-ent-custom.html"
|
||||
height={225}
|
||||
/>
|
||||
<Standalone height={225}>
|
||||
<div style={{lineHeight: 2.5, fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18}}>But <mark style={{ background: 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Google <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark> is starting from behind. The company made a late push into hardware, and <mark style={{ background: 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Apple <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark>’s Siri, available on iPhones, and <mark style={{ background: 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', padding: '0.45em 0.6em', margin: '0 0.25em', lineHeight: 1, borderRadius: '0.35em'}}>Amazon <span style={{ fontSize: '0.8em', fontWeight: 'bold', lineHeight: 1, borderRadius: '0.35em', marginLeft: '0.5rem'}}>ORG</span></mark>’s Alexa software, which runs on its Echo and Dot devices, have clear leads in consumer adoption.</div>
|
||||
</Standalone>
|
||||
|
||||
The above example uses a little trick: Since the background color values are
|
||||
added as the `background` style attribute, you can use any
|
||||
|
@ -197,11 +193,9 @@ doc.spans["sc"] = [
|
|||
displacy.serve(doc, style="span")
|
||||
```
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualizer for overlapping spans"
|
||||
src="/images/displacy-span.html"
|
||||
height={180}
|
||||
/>
|
||||
<Standalone height={100}>
|
||||
<div style={{ lineHeight: 2.5, direction: 'ltr', fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18 }}>Welcome to the <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>Bank<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#7aecec', top: 40, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#7aecec', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>ORG</span></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>of <span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>China<span style={{ background: '#7aecec', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#feca74', top: 57, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#feca74', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>GPE</span></span></span>.</div>
|
||||
</Standalone>
|
||||
|
||||
The span visualizer lets you customize the following `options`:
|
||||
|
||||
|
@ -223,11 +217,9 @@ specify which one displaCy should use with `spans_key` (`sc` is the default).
|
|||
> displacy.serve(doc, style="span", options=options)
|
||||
> ```
|
||||
|
||||
<Iframe
|
||||
title="displaCy visualizer for spans (custom spans_key)"
|
||||
src="/images/displacy-span-custom.html"
|
||||
height={225}
|
||||
/>
|
||||
<Standalone height={100}>
|
||||
<div style={{ lineHeight: 2.5, direction: 'ltr', fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'", fontSize: 18 }}>Welcome to the <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>Bank<span style={{ background: '#ddd', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span><span style={{ background: '#ddd', top: 40, height: 4, borderTopLeftRadius: 3, borderBottomLeftRadius: 3, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}><span style={{ background: '#ddd', color: '#000', top: '-0.5em', padding: '2px 3px', position: 'absolute', fontSize: '0.6em', fontWeight: 'bold', lineHeight: 1, borderRadius: 3 }}>BANK</span></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>of <span style={{ background: '#ddd', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span> <span style={{ fontWeight: 'bold', display: 'inline-block', position: 'relative'}}>China<span style={{ background: '#ddd', top: 40, height: 4, left: -1, width: 'calc(100% + 2px)', position: 'absolute' }}></span></span>.</div>
|
||||
</Standalone>
|
||||
|
||||
## Using displaCy in Jupyter notebooks {id="jupyter"}
|
||||
|
||||
|
|
|
@ -9,13 +9,9 @@
|
|||
{ "text": "Models & Languages", "url": "/usage/models" },
|
||||
{ "text": "Facts & Figures", "url": "/usage/facts-figures" },
|
||||
{ "text": "spaCy 101", "url": "/usage/spacy-101" },
|
||||
{ "text": "New in v3.0", "url": "/usage/v3" },
|
||||
{ "text": "New in v3.1", "url": "/usage/v3-1" },
|
||||
{ "text": "New in v3.2", "url": "/usage/v3-2" },
|
||||
{ "text": "New in v3.3", "url": "/usage/v3-3" },
|
||||
{ "text": "New in v3.4", "url": "/usage/v3-4" },
|
||||
{ "text": "New in v3.5", "url": "/usage/v3-5" },
|
||||
{ "text": "New in v3.6", "url": "/usage/v3-6" }
|
||||
{ "text": "New in v3.7", "url": "/usage/v3-7" },
|
||||
{ "text": "New in v3.6", "url": "/usage/v3-6" },
|
||||
{ "text": "New in v3.5", "url": "/usage/v3-5" }
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
"indexName": "spacy"
|
||||
},
|
||||
"binderUrl": "explosion/spacy-io-binder",
|
||||
"binderVersion": "3.6",
|
||||
"binderVersion": "3.7",
|
||||
"sections": [
|
||||
{ "id": "usage", "title": "Usage Documentation", "theme": "blue" },
|
||||
{ "id": "models", "title": "Models Documentation", "theme": "blue" },
|
||||
|
|
Before Width: | Height: | Size: 5.1 KiB After Width: | Height: | Size: 5.1 KiB |
|
@ -1,80 +0,0 @@
|
|||
<div
|
||||
class="entities"
|
||||
style="
|
||||
line-height: 2.5;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
||||
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
||||
font-size: 18px;
|
||||
"
|
||||
>But
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: linear-gradient(90deg, #aa9cfc, #fc9ce7);
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>Google
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>ORG</span
|
||||
></mark
|
||||
>is starting from behind. The company made a late push into hardware, and
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: linear-gradient(90deg, #aa9cfc, #fc9ce7);
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>Apple
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>ORG</span
|
||||
></mark
|
||||
>’s Siri, available on iPhones, and
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: linear-gradient(90deg, #aa9cfc, #fc9ce7);
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>Amazon
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>ORG</span
|
||||
></mark
|
||||
>’s Alexa software, which runs on its Echo and Dot devices, have clear leads in consumer
|
||||
adoption.</div
|
||||
>
|
|
@ -1,59 +0,0 @@
|
|||
<div
|
||||
class="entities"
|
||||
style="
|
||||
line-height: 2.5;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
||||
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
||||
font-size: 16px;
|
||||
"
|
||||
>
|
||||
🌱🌿
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #3dff74;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>🐍
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>SNEK</span
|
||||
></mark
|
||||
>
|
||||
____ 🌳🌲 ____
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #cfc5ff;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>👨🌾
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>HUMAN</span
|
||||
></mark
|
||||
>
|
||||
🏘️
|
||||
</div>
|
|
@ -1,84 +0,0 @@
|
|||
<div
|
||||
class="entities"
|
||||
style="
|
||||
line-height: 2.5;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
||||
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
||||
font-size: 16px;
|
||||
"
|
||||
>
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #7aecec;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>
|
||||
Apple
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>ORG</span
|
||||
>
|
||||
</mark>
|
||||
is looking at buying
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #feca74;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>
|
||||
U.K.
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>GPE</span
|
||||
>
|
||||
</mark>
|
||||
startup for
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #e4e7d2;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>
|
||||
$1 billion
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>MONEY</span
|
||||
>
|
||||
</mark>
|
||||
</div>
|
|
@ -1,86 +0,0 @@
|
|||
<div
|
||||
class="entities"
|
||||
style="
|
||||
line-height: 2.5;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
||||
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
||||
font-size: 18px;
|
||||
"
|
||||
>
|
||||
When
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #aa9cfc;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>
|
||||
Sebastian Thrun
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>PERSON</span
|
||||
>
|
||||
</mark>
|
||||
started working on self-driving cars at
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #7aecec;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>
|
||||
Google
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>ORG</span
|
||||
>
|
||||
</mark>
|
||||
in
|
||||
<mark
|
||||
class="entity"
|
||||
style="
|
||||
background: #bfe1d9;
|
||||
padding: 0.45em 0.6em;
|
||||
margin: 0 0.25em;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
"
|
||||
>
|
||||
2007
|
||||
<span
|
||||
style="
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 0.35em;
|
||||
text-transform: uppercase;
|
||||
vertical-align: middle;
|
||||
margin-left: 0.5rem;
|
||||
"
|
||||
>DATE</span
|
||||
>
|
||||
</mark>
|
||||
, few people outside of the company took him seriously.
|
||||
</div>
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
212
website/public/images/displacy-long2.svg
Normal file
|
@ -0,0 +1,212 @@
|
|||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
id="0"
|
||||
class="displacy"
|
||||
width="1275"
|
||||
height="399.5"
|
||||
style="
|
||||
max-width: none;
|
||||
height: 399.5px;
|
||||
color: #000000;
|
||||
background: #ffffff;
|
||||
font-family: Arial;
|
||||
"
|
||||
>
|
||||
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
||||
<tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
|
||||
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
|
||||
</text>
|
||||
|
||||
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
||||
<tspan class="displacy-word" fill="currentColor" x="225">cars</tspan>
|
||||
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="225">NOUN</tspan>
|
||||
</text>
|
||||
|
||||
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
||||
<tspan class="displacy-word" fill="currentColor" x="400">shift</tspan>
|
||||
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="400">VERB</tspan>
|
||||
</text>
|
||||
|
||||
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
||||
<tspan class="displacy-word" fill="currentColor" x="575">insurance</tspan>
|
||||
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="575">NOUN</tspan>
|
||||
</text>
|
||||
|
||||
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
||||
<tspan class="displacy-word" fill="currentColor" x="750">liability</tspan>
|
||||
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="750">NOUN</tspan>
|
||||
</text>
|
||||
|
||||
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
||||
<tspan class="displacy-word" fill="currentColor" x="925">toward</tspan>
|
||||
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="925">ADP</tspan>
|
||||
</text>
|
||||
|
||||
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
||||
<tspan class="displacy-word" fill="currentColor" x="1100">manufacturers</tspan>
|
||||
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="1100">NOUN</tspan>
|
||||
</text>
|
||||
|
||||
<g class="displacy-arrow">
|
||||
<path
|
||||
class="displacy-arc"
|
||||
id="arrow-0-0"
|
||||
stroke-width="2px"
|
||||
d="M70,264.5 C70,177.0 215.0,177.0 215.0,264.5"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
></path>
|
||||
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
||||
<textpath
|
||||
xlink:href="#arrow-0-0"
|
||||
class="displacy-label"
|
||||
startOffset="50%"
|
||||
fill="currentColor"
|
||||
text-anchor="middle"
|
||||
>
|
||||
amod
|
||||
</textpath>
|
||||
</text>
|
||||
<path
|
||||
class="displacy-arrowhead"
|
||||
d="M70,266.5 L62,254.5 78,254.5"
|
||||
fill="currentColor"
|
||||
></path>
|
||||
</g>
|
||||
|
||||
<g class="displacy-arrow">
|
||||
<path
|
||||
class="displacy-arc"
|
||||
id="arrow-0-1"
|
||||
stroke-width="2px"
|
||||
d="M245,264.5 C245,177.0 390.0,177.0 390.0,264.5"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
></path>
|
||||
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
||||
<textpath
|
||||
xlink:href="#arrow-0-1"
|
||||
class="displacy-label"
|
||||
startOffset="50%"
|
||||
fill="currentColor"
|
||||
text-anchor="middle"
|
||||
>
|
||||
nsubj
|
||||
</textpath>
|
||||
</text>
|
||||
<path
|
||||
class="displacy-arrowhead"
|
||||
d="M245,266.5 L237,254.5 253,254.5"
|
||||
fill="currentColor"
|
||||
></path>
|
||||
</g>
|
||||
|
||||
<g class="displacy-arrow">
|
||||
<path
|
||||
class="displacy-arc"
|
||||
id="arrow-0-2"
|
||||
stroke-width="2px"
|
||||
d="M595,264.5 C595,177.0 740.0,177.0 740.0,264.5"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
></path>
|
||||
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
||||
<textpath
|
||||
xlink:href="#arrow-0-2"
|
||||
class="displacy-label"
|
||||
startOffset="50%"
|
||||
fill="currentColor"
|
||||
text-anchor="middle"
|
||||
>
|
||||
compound
|
||||
</textpath>
|
||||
</text>
|
||||
<path
|
||||
class="displacy-arrowhead"
|
||||
d="M595,266.5 L587,254.5 603,254.5"
|
||||
fill="currentColor"
|
||||
></path>
|
||||
</g>
|
||||
|
||||
<g class="displacy-arrow">
|
||||
<path
|
||||
class="displacy-arc"
|
||||
id="arrow-0-3"
|
||||
stroke-width="2px"
|
||||
d="M420,264.5 C420,89.5 745.0,89.5 745.0,264.5"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
></path>
|
||||
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
||||
<textpath
|
||||
xlink:href="#arrow-0-3"
|
||||
class="displacy-label"
|
||||
startOffset="50%"
|
||||
fill="currentColor"
|
||||
text-anchor="middle"
|
||||
>
|
||||
dobj
|
||||
</textpath>
|
||||
</text>
|
||||
<path
|
||||
class="displacy-arrowhead"
|
||||
d="M745.0,266.5 L753.0,254.5 737.0,254.5"
|
||||
fill="currentColor"
|
||||
></path>
|
||||
</g>
|
||||
|
||||
<g class="displacy-arrow">
|
||||
<path
|
||||
class="displacy-arc"
|
||||
id="arrow-0-4"
|
||||
stroke-width="2px"
|
||||
d="M420,264.5 C420,2.0 925.0,2.0 925.0,264.5"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
></path>
|
||||
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
||||
<textpath
|
||||
xlink:href="#arrow-0-4"
|
||||
class="displacy-label"
|
||||
startOffset="50%"
|
||||
fill="currentColor"
|
||||
text-anchor="middle"
|
||||
>
|
||||
prep
|
||||
</textpath>
|
||||
</text>
|
||||
<path
|
||||
class="displacy-arrowhead"
|
||||
d="M925.0,266.5 L933.0,254.5 917.0,254.5"
|
||||
fill="currentColor"
|
||||
></path>
|
||||
</g>
|
||||
|
||||
<g class="displacy-arrow">
|
||||
<path
|
||||
class="displacy-arc"
|
||||
id="arrow-0-5"
|
||||
stroke-width="2px"
|
||||
d="M945,264.5 C945,177.0 1090.0,177.0 1090.0,264.5"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
></path>
|
||||
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
||||
<textpath
|
||||
xlink:href="#arrow-0-5"
|
||||
class="displacy-label"
|
||||
startOffset="50%"
|
||||
fill="currentColor"
|
||||
text-anchor="middle"
|
||||
>
|
||||
pobj
|
||||
</textpath>
|
||||
</text>
|
||||
<path
|
||||
class="displacy-arrowhead"
|
||||
d="M1090.0,266.5 L1098.0,254.5 1082.0,254.5"
|
||||
fill="currentColor"
|
||||
></path>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 6.8 KiB |
|
@ -1,84 +0,0 @@
|
|||
<div
|
||||
class="spans"
|
||||
style="
|
||||
line-height: 2.5;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
||||
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
||||
font-size: 18px;
|
||||
direction: ltr;
|
||||
"
|
||||
>
|
||||
Welcome to the
|
||||
<span style="font-weight: bold; display: inline-block; position: relative">
|
||||
Bank
|
||||
<span
|
||||
style="
|
||||
background: #ddd;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
</span>
|
||||
<span
|
||||
style="
|
||||
background: #ddd;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
border-top-left-radius: 3px;
|
||||
border-bottom-left-radius: 3px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
<span
|
||||
style="
|
||||
background: #ddd;
|
||||
color: #000;
|
||||
top: -0.5em;
|
||||
padding: 2px 3px;
|
||||
position: absolute;
|
||||
font-size: 0.6em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 3px;
|
||||
"
|
||||
>
|
||||
BANK
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative">
|
||||
of
|
||||
<span
|
||||
style="
|
||||
background: #ddd;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative">
|
||||
China
|
||||
|
||||
<span
|
||||
style="
|
||||
background: #ddd;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
</span>
|
||||
</span>
|
||||
.
|
||||
</div>
|
|
@ -1,123 +0,0 @@
|
|||
<div
|
||||
class="spans"
|
||||
style="
|
||||
line-height: 2.5;
|
||||
direction: ltr;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
||||
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
||||
font-size: 18px;
|
||||
"
|
||||
>
|
||||
Welcome to the
|
||||
<span style="font-weight: bold; display: inline-block; position: relative">
|
||||
Bank
|
||||
<span
|
||||
style="
|
||||
background: #7aecec;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
</span>
|
||||
<span
|
||||
style="
|
||||
background: #7aecec;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
border-top-left-radius: 3px;
|
||||
border-bottom-left-radius: 3px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
<span
|
||||
style="
|
||||
background: #7aecec;
|
||||
color: #000;
|
||||
top: -0.5em;
|
||||
padding: 2px 3px;
|
||||
position: absolute;
|
||||
font-size: 0.6em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 3px;
|
||||
"
|
||||
>
|
||||
ORG
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative">
|
||||
of
|
||||
|
||||
<span
|
||||
style="
|
||||
background: #7aecec;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative">
|
||||
China
|
||||
<span
|
||||
style="
|
||||
background: #7aecec;
|
||||
top: 40px;
|
||||
height: 4px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
</span>
|
||||
<span
|
||||
style="
|
||||
background: #feca74;
|
||||
top: 57px;
|
||||
height: 4px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
</span>
|
||||
<span
|
||||
style="
|
||||
background: #feca74;
|
||||
top: 57px;
|
||||
height: 4px;
|
||||
border-top-left-radius: 3px;
|
||||
border-bottom-left-radius: 3px;
|
||||
left: -1px;
|
||||
width: calc(100% + 2px);
|
||||
position: absolute;
|
||||
"
|
||||
>
|
||||
<span
|
||||
style="
|
||||
background: #feca74;
|
||||
color: #000;
|
||||
top: -0.5em;
|
||||
padding: 2px 3px;
|
||||
position: absolute;
|
||||
font-size: 0.6em;
|
||||
font-weight: bold;
|
||||
line-height: 1;
|
||||
border-radius: 3px;
|
||||
"
|
||||
>
|
||||
GPE
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
.
|
||||
</div>
|
|
@ -107,6 +107,22 @@ const Image = ({ src, alt, title, href, ...props }) => {
|
|||
)
|
||||
}
|
||||
|
||||
const ImageScrollable = ({ src, alt, width, ...props }) => {
|
||||
return (
|
||||
<figure className={classNames(classes.standalone, classes.scrollable)}>
|
||||
<img className={classes['image-scrollable']} src={src} alt={alt} width={width} height="auto" />
|
||||
</figure>
|
||||
)
|
||||
}
|
||||
|
||||
const Standalone = ({ height, children, ...props }) => {
|
||||
return (
|
||||
<figure className={classes.standalone} style={{ height }}>
|
||||
{children}
|
||||
</figure>
|
||||
)
|
||||
}
|
||||
|
||||
const ImageFill = ({ image, ...props }) => {
|
||||
return (
|
||||
<span
|
||||
|
@ -137,4 +153,4 @@ const GoogleSheet = ({ id, link, height, button = 'View full table' }) => {
|
|||
)
|
||||
}
|
||||
|
||||
export { YouTube, SoundCloud, Iframe, Image, ImageFill, GoogleSheet }
|
||||
export { YouTube, SoundCloud, Iframe, Image, ImageFill, ImageScrollable, GoogleSheet, Standalone }
|
||||
|
|
|
@ -13,7 +13,7 @@ import Aside from './components/aside'
|
|||
import Button from './components/button'
|
||||
import Tag from './components/tag'
|
||||
import Grid from './components/grid'
|
||||
import { YouTube, SoundCloud, Iframe, Image, GoogleSheet } from './components/embed'
|
||||
import { YouTube, SoundCloud, Iframe, Image, ImageScrollable, GoogleSheet, Standalone } from './components/embed'
|
||||
import Project from './widgets/project'
|
||||
import { Integration, IntegrationLogo } from './widgets/integration.js'
|
||||
import { Logos, Colors, Patterns } from './widgets/styleguide'
|
||||
|
@ -90,6 +90,8 @@ export const remarkComponents = {
|
|||
* For regular img elements it is not possible to pass properties
|
||||
*/
|
||||
Image,
|
||||
ImageScrollable,
|
||||
Standalone,
|
||||
|
||||
Label,
|
||||
Logos,
|
||||
|
|
|
@ -26,12 +26,20 @@
|
|||
padding: var(--spacing-xs)
|
||||
margin-bottom: var(--spacing-md)
|
||||
|
||||
.scrollable
|
||||
max-width: 100%
|
||||
overflow: auto
|
||||
|
||||
.image
|
||||
position: relative
|
||||
display: block
|
||||
max-width: 100%
|
||||
margin: 0 auto
|
||||
|
||||
.image-scrollable
|
||||
display: block
|
||||
max-width: fit-content
|
||||
|
||||
.figure-fill
|
||||
display: block
|
||||
position: relative
|
||||
|
|
|
@ -58,8 +58,8 @@ const AlertSpace = ({ nightly, legacy }) => {
|
|||
}
|
||||
|
||||
const navAlert = (
|
||||
<Link to="/usage/v3-6" noLinkLayout>
|
||||
<strong>💥 Out now:</strong> spaCy v3.6
|
||||
<Link to="https://form.typeform.com/to/WlflqP1b" noLinkLayout>
|
||||
💥 Interested in <strong>Premium spaCy Models</strong>?
|
||||
</Link>
|
||||
)
|
||||
|
||||
|
|