Merge branch 'develop' into nightly.spacy.io

This commit is contained in:
Ines Montani 2020-10-08 11:16:28 +02:00
commit 4f47f33793
25 changed files with 231 additions and 155 deletions

View File

@ -29,7 +29,7 @@ dist/$(SPACY_BIN) : $(WHEELHOUSE)/spacy-$(PYVER)-$(version).stamp
--disable-cache \ --disable-cache \
-o $@ \ -o $@ \
$(package)==$(version) \ $(package)==$(version) \
$(SPACY_EXTRAS) "$(SPACY_EXTRAS)"
chmod a+rx $@ chmod a+rx $@
cp $@ dist/spacy.pex cp $@ dist/spacy.pex

View File

@ -65,9 +65,11 @@ console_scripts =
[options.extras_require] [options.extras_require]
lookups = lookups =
spacy_lookups_data==1.0.0rc0 spacy_lookups_data>=1.0.0rc0,<1.0.0
transformers = transformers =
spacy_transformers>=1.0.0a17,<1.0.0 spacy_transformers>=1.0.0a17,<1.0.0
ray =
spacy_ray>=0.1.0,<1.0.0
cuda = cuda =
cupy>=5.0.0b4,<9.0.0 cupy>=5.0.0b4,<9.0.0
cuda80 = cuda80 =

View File

@ -843,7 +843,7 @@ class Language:
*, *,
config: Dict[str, Any] = SimpleFrozenDict(), config: Dict[str, Any] = SimpleFrozenDict(),
validate: bool = True, validate: bool = True,
) -> None: ) -> Callable[[Doc], Doc]:
"""Replace a component in the pipeline. """Replace a component in the pipeline.
name (str): Name of the component to replace. name (str): Name of the component to replace.
@ -852,6 +852,7 @@ class Language:
component. Will be merged with default config, if available. component. Will be merged with default config, if available.
validate (bool): Whether to validate the component config against the validate (bool): Whether to validate the component config against the
arguments and types expected by the factory. arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The new pipeline component.
DOCS: https://nightly.spacy.io/api/language#replace_pipe DOCS: https://nightly.spacy.io/api/language#replace_pipe
""" """
@ -866,9 +867,11 @@ class Language:
self.remove_pipe(name) self.remove_pipe(name)
if not len(self._components) or pipe_index == len(self._components): if not len(self._components) or pipe_index == len(self._components):
# we have no components to insert before/after, or we're replacing the last component # we have no components to insert before/after, or we're replacing the last component
self.add_pipe(factory_name, name=name, config=config, validate=validate) return self.add_pipe(
factory_name, name=name, config=config, validate=validate
)
else: else:
self.add_pipe( return self.add_pipe(
factory_name, factory_name,
name=name, name=name,
before=pipe_index, before=pipe_index,
@ -1300,7 +1303,11 @@ class Language:
kwargs.setdefault("batch_size", batch_size) kwargs.setdefault("batch_size", batch_size)
# non-trainable components may have a pipe() implementation that refers to dummy # non-trainable components may have a pipe() implementation that refers to dummy
# predict and set_annotations methods # predict and set_annotations methods
if not hasattr(pipe, "pipe") or not hasattr(pipe, "is_trainable") or not pipe.is_trainable(): if (
not hasattr(pipe, "pipe")
or not hasattr(pipe, "is_trainable")
or not pipe.is_trainable()
):
docs = _pipe(docs, pipe, kwargs) docs = _pipe(docs, pipe, kwargs)
else: else:
docs = pipe.pipe(docs, **kwargs) docs = pipe.pipe(docs, **kwargs)
@ -1412,7 +1419,11 @@ class Language:
kwargs.setdefault("batch_size", batch_size) kwargs.setdefault("batch_size", batch_size)
# non-trainable components may have a pipe() implementation that refers to dummy # non-trainable components may have a pipe() implementation that refers to dummy
# predict and set_annotations methods # predict and set_annotations methods
if hasattr(proc, "pipe") and hasattr(proc, "is_trainable") and proc.is_trainable(): if (
hasattr(proc, "pipe")
and hasattr(proc, "is_trainable")
and proc.is_trainable()
):
f = functools.partial(proc.pipe, **kwargs) f = functools.partial(proc.pipe, **kwargs)
else: else:
# Apply the function, but yield the doc # Apply the function, but yield the doc

View File

@ -53,10 +53,18 @@ class AttributeRuler(Pipe):
self.name = name self.name = name
self.vocab = vocab self.vocab = vocab
self.matcher = Matcher(self.vocab, validate=validate) self.matcher = Matcher(self.vocab, validate=validate)
self.validate = validate
self.attrs = [] self.attrs = []
self._attrs_unnormed = [] # store for reference self._attrs_unnormed = [] # store for reference
self.indices = [] self.indices = []
def clear(self) -> None:
"""Reset all patterns."""
self.matcher = Matcher(self.vocab, validate=self.validate)
self.attrs = []
self._attrs_unnormed = []
self.indices = []
def initialize( def initialize(
self, self,
get_examples: Optional[Callable[[], Iterable[Example]]], get_examples: Optional[Callable[[], Iterable[Example]]],
@ -65,13 +73,14 @@ class AttributeRuler(Pipe):
patterns: Optional[Iterable[AttributeRulerPatternType]] = None, patterns: Optional[Iterable[AttributeRulerPatternType]] = None,
tag_map: Optional[TagMapType] = None, tag_map: Optional[TagMapType] = None,
morph_rules: Optional[MorphRulesType] = None, morph_rules: Optional[MorphRulesType] = None,
): ) -> None:
"""Initialize the attribute ruler by adding zero or more patterns. """Initialize the attribute ruler by adding zero or more patterns.
Rules can be specified as a sequence of dicts using the `patterns` Rules can be specified as a sequence of dicts using the `patterns`
keyword argument. You can also provide rules using the "tag map" or keyword argument. You can also provide rules using the "tag map" or
"morph rules" formats supported by spaCy prior to v3. "morph rules" formats supported by spaCy prior to v3.
""" """
self.clear()
if patterns: if patterns:
self.add_patterns(patterns) self.add_patterns(patterns)
if tag_map: if tag_map:

View File

@ -8,6 +8,7 @@ from thinc.api import set_dropout_rate
import warnings import warnings
from ..kb import KnowledgeBase, Candidate from ..kb import KnowledgeBase, Candidate
from ..ml import empty_kb
from ..tokens import Doc from ..tokens import Doc
from .pipe import Pipe, deserialize_config from .pipe import Pipe, deserialize_config
from ..language import Language from ..language import Language
@ -41,11 +42,11 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
requires=["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"], requires=["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
assigns=["token.ent_kb_id"], assigns=["token.ent_kb_id"],
default_config={ default_config={
"kb_loader": {"@misc": "spacy.EmptyKB.v1", "entity_vector_length": 64},
"model": DEFAULT_NEL_MODEL, "model": DEFAULT_NEL_MODEL,
"labels_discard": [], "labels_discard": [],
"incl_prior": True, "incl_prior": True,
"incl_context": True, "incl_context": True,
"entity_vector_length": 64,
"get_candidates": {"@misc": "spacy.CandidateGenerator.v1"}, "get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
}, },
default_score_weights={ default_score_weights={
@ -58,11 +59,11 @@ def make_entity_linker(
nlp: Language, nlp: Language,
name: str, name: str,
model: Model, model: Model,
kb_loader: Callable[[Vocab], KnowledgeBase],
*, *,
labels_discard: Iterable[str], labels_discard: Iterable[str],
incl_prior: bool, incl_prior: bool,
incl_context: bool, incl_context: bool,
entity_vector_length: int,
get_candidates: Callable[[KnowledgeBase, "Span"], Iterable[Candidate]], get_candidates: Callable[[KnowledgeBase, "Span"], Iterable[Candidate]],
): ):
"""Construct an EntityLinker component. """Construct an EntityLinker component.
@ -70,19 +71,21 @@ def make_entity_linker(
model (Model[List[Doc], Floats2d]): A model that learns document vector model (Model[List[Doc], Floats2d]): A model that learns document vector
representations. Given a batch of Doc objects, it should return a single representations. Given a batch of Doc objects, it should return a single
array, with one row per item in the batch. array, with one row per item in the batch.
kb (KnowledgeBase): The knowledge-base to link entities to.
labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction. labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction.
incl_prior (bool): Whether or not to include prior probabilities from the KB in the model. incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
incl_context (bool): Whether or not to include the local context in the model. incl_context (bool): Whether or not to include the local context in the model.
entity_vector_length (int): Size of encoding vectors in the KB.
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
produces a list of candidates, given a certain knowledge base and a textual mention.
""" """
return EntityLinker( return EntityLinker(
nlp.vocab, nlp.vocab,
model, model,
name, name,
kb_loader=kb_loader,
labels_discard=labels_discard, labels_discard=labels_discard,
incl_prior=incl_prior, incl_prior=incl_prior,
incl_context=incl_context, incl_context=incl_context,
entity_vector_length=entity_vector_length,
get_candidates=get_candidates, get_candidates=get_candidates,
) )
@ -101,10 +104,10 @@ class EntityLinker(Pipe):
model: Model, model: Model,
name: str = "entity_linker", name: str = "entity_linker",
*, *,
kb_loader: Callable[[Vocab], KnowledgeBase],
labels_discard: Iterable[str], labels_discard: Iterable[str],
incl_prior: bool, incl_prior: bool,
incl_context: bool, incl_context: bool,
entity_vector_length: int,
get_candidates: Callable[[KnowledgeBase, "Span"], Iterable[Candidate]], get_candidates: Callable[[KnowledgeBase, "Span"], Iterable[Candidate]],
) -> None: ) -> None:
"""Initialize an entity linker. """Initialize an entity linker.
@ -113,10 +116,12 @@ class EntityLinker(Pipe):
model (thinc.api.Model): The Thinc Model powering the pipeline component. model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the name (str): The component instance name, used to add entries to the
losses during training. losses during training.
kb_loader (Callable[[Vocab], KnowledgeBase]): A function that creates a KnowledgeBase from a Vocab instance.
labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction. labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction.
incl_prior (bool): Whether or not to include prior probabilities from the KB in the model. incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
incl_context (bool): Whether or not to include the local context in the model. incl_context (bool): Whether or not to include the local context in the model.
entity_vector_length (int): Size of encoding vectors in the KB.
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
produces a list of candidates, given a certain knowledge base and a textual mention.
DOCS: https://nightly.spacy.io/api/entitylinker#init DOCS: https://nightly.spacy.io/api/entitylinker#init
""" """
@ -127,15 +132,23 @@ class EntityLinker(Pipe):
"labels_discard": list(labels_discard), "labels_discard": list(labels_discard),
"incl_prior": incl_prior, "incl_prior": incl_prior,
"incl_context": incl_context, "incl_context": incl_context,
"entity_vector_length": entity_vector_length,
} }
self.kb = kb_loader(self.vocab)
self.get_candidates = get_candidates self.get_candidates = get_candidates
self.cfg = dict(cfg) self.cfg = dict(cfg)
self.distance = CosineDistance(normalize=False) self.distance = CosineDistance(normalize=False)
# how many neightbour sentences to take into account # how many neightbour sentences to take into account
self.n_sents = cfg.get("n_sents", 0) self.n_sents = cfg.get("n_sents", 0)
# create an empty KB by default. If you want to load a predefined one, specify it in 'initialize'.
self.kb = empty_kb(entity_vector_length)(self.vocab)
def _require_kb(self) -> None: def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
"""Define the KB of this pipe by providing a function that will
create it using this object's vocab."""
self.kb = kb_loader(self.vocab)
self.cfg["entity_vector_length"] = self.kb.entity_vector_length
def validate_kb(self) -> None:
# Raise an error if the knowledge base is not initialized. # Raise an error if the knowledge base is not initialized.
if len(self.kb) == 0: if len(self.kb) == 0:
raise ValueError(Errors.E139.format(name=self.name)) raise ValueError(Errors.E139.format(name=self.name))
@ -145,6 +158,7 @@ class EntityLinker(Pipe):
get_examples: Callable[[], Iterable[Example]], get_examples: Callable[[], Iterable[Example]],
*, *,
nlp: Optional[Language] = None, nlp: Optional[Language] = None,
kb_loader: Callable[[Vocab], KnowledgeBase] = None,
): ):
"""Initialize the pipe for training, using a representative set """Initialize the pipe for training, using a representative set
of data examples. of data examples.
@ -152,11 +166,16 @@ class EntityLinker(Pipe):
get_examples (Callable[[], Iterable[Example]]): Function that get_examples (Callable[[], Iterable[Example]]): Function that
returns a representative sample of gold-standard Example objects. returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of. nlp (Language): The current nlp object the component is part of.
kb_loader (Callable[[Vocab], KnowledgeBase]): A function that creates a KnowledgeBase from a Vocab instance.
Note that providing this argument, will overwrite all data accumulated in the current KB.
Use this only when loading a KB as-such from file.
DOCS: https://nightly.spacy.io/api/entitylinker#initialize DOCS: https://nightly.spacy.io/api/entitylinker#initialize
""" """
self._ensure_examples(get_examples) self._ensure_examples(get_examples)
self._require_kb() if kb_loader is not None:
self.set_kb(kb_loader)
self.validate_kb()
nO = self.kb.entity_vector_length nO = self.kb.entity_vector_length
doc_sample = [] doc_sample = []
vector_sample = [] vector_sample = []
@ -192,7 +211,7 @@ class EntityLinker(Pipe):
DOCS: https://nightly.spacy.io/api/entitylinker#update DOCS: https://nightly.spacy.io/api/entitylinker#update
""" """
self._require_kb() self.validate_kb()
if losses is None: if losses is None:
losses = {} losses = {}
losses.setdefault(self.name, 0.0) losses.setdefault(self.name, 0.0)
@ -303,7 +322,7 @@ class EntityLinker(Pipe):
DOCS: https://nightly.spacy.io/api/entitylinker#predict DOCS: https://nightly.spacy.io/api/entitylinker#predict
""" """
self._require_kb() self.validate_kb()
entity_count = 0 entity_count = 0
final_kb_ids = [] final_kb_ids = []
if not docs: if not docs:

View File

@ -201,10 +201,10 @@ class EntityRuler(Pipe):
DOCS: https://nightly.spacy.io/api/entityruler#initialize DOCS: https://nightly.spacy.io/api/entityruler#initialize
""" """
self.clear()
if patterns: if patterns:
self.add_patterns(patterns) self.add_patterns(patterns)
@property @property
def ent_ids(self) -> Tuple[str, ...]: def ent_ids(self) -> Tuple[str, ...]:
"""All entity ids present in the match patterns `id` properties """All entity ids present in the match patterns `id` properties

View File

@ -136,6 +136,16 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
assert doc.has_annotation("MORPH") assert doc.has_annotation("MORPH")
def test_attributeruler_init_clear(nlp, pattern_dicts):
"""Test that initialization clears patterns."""
ruler = nlp.add_pipe("attribute_ruler")
assert not len(ruler.matcher)
ruler.add_patterns(pattern_dicts)
assert len(ruler.matcher)
ruler.initialize(lambda: [])
assert not len(ruler.matcher)
def test_attributeruler_score(nlp, pattern_dicts): def test_attributeruler_score(nlp, pattern_dicts):
# initialize with patterns # initialize with patterns
ruler = nlp.add_pipe("attribute_ruler") ruler = nlp.add_pipe("attribute_ruler")

View File

@ -110,7 +110,7 @@ def test_kb_invalid_entity_vector(nlp):
def test_kb_default(nlp): def test_kb_default(nlp):
"""Test that the default (empty) KB is loaded when not providing a config""" """Test that the default (empty) KB is loaded upon construction"""
entity_linker = nlp.add_pipe("entity_linker", config={}) entity_linker = nlp.add_pipe("entity_linker", config={})
assert len(entity_linker.kb) == 0 assert len(entity_linker.kb) == 0
assert entity_linker.kb.get_size_entities() == 0 assert entity_linker.kb.get_size_entities() == 0
@ -122,7 +122,7 @@ def test_kb_default(nlp):
def test_kb_custom_length(nlp): def test_kb_custom_length(nlp):
"""Test that the default (empty) KB can be configured with a custom entity length""" """Test that the default (empty) KB can be configured with a custom entity length"""
entity_linker = nlp.add_pipe( entity_linker = nlp.add_pipe(
"entity_linker", config={"kb_loader": {"entity_vector_length": 35}} "entity_linker", config={"entity_vector_length": 35}
) )
assert len(entity_linker.kb) == 0 assert len(entity_linker.kb) == 0
assert entity_linker.kb.get_size_entities() == 0 assert entity_linker.kb.get_size_entities() == 0
@ -130,18 +130,9 @@ def test_kb_custom_length(nlp):
assert entity_linker.kb.entity_vector_length == 35 assert entity_linker.kb.entity_vector_length == 35
def test_kb_undefined(nlp): def test_kb_initialize_empty(nlp):
"""Test that the EL can't train without defining a KB""" """Test that the EL can't initialize without examples"""
entity_linker = nlp.add_pipe("entity_linker", config={}) entity_linker = nlp.add_pipe("entity_linker")
with pytest.raises(ValueError):
entity_linker.initialize(lambda: [])
def test_kb_empty(nlp):
"""Test that the EL can't train with an empty KB"""
config = {"kb_loader": {"@misc": "spacy.EmptyKB.v1", "entity_vector_length": 342}}
entity_linker = nlp.add_pipe("entity_linker", config=config)
assert len(entity_linker.kb) == 0
with pytest.raises(ValueError): with pytest.raises(ValueError):
entity_linker.initialize(lambda: []) entity_linker.initialize(lambda: [])
@ -201,8 +192,6 @@ def test_el_pipe_configuration(nlp):
ruler = nlp.add_pipe("entity_ruler") ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns([pattern]) ruler.add_patterns([pattern])
@registry.misc.register("myAdamKB.v1")
def mykb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
kb = KnowledgeBase(vocab, entity_vector_length=1) kb = KnowledgeBase(vocab, entity_vector_length=1)
kb.add_entity(entity="Q2", freq=12, entity_vector=[2]) kb.add_entity(entity="Q2", freq=12, entity_vector=[2])
@ -212,13 +201,12 @@ def test_el_pipe_configuration(nlp):
) )
return kb return kb
return create_kb
# run an EL pipe without a trained context encoder, to check the candidate generation step only # run an EL pipe without a trained context encoder, to check the candidate generation step only
nlp.add_pipe( entity_linker = nlp.add_pipe(
"entity_linker", "entity_linker",
config={"kb_loader": {"@misc": "myAdamKB.v1"}, "incl_context": False}, config={"incl_context": False},
) )
entity_linker.set_kb(create_kb)
# With the default get_candidates function, matching is case-sensitive # With the default get_candidates function, matching is case-sensitive
text = "Douglas and douglas are not the same." text = "Douglas and douglas are not the same."
doc = nlp(text) doc = nlp(text)
@ -234,15 +222,15 @@ def test_el_pipe_configuration(nlp):
return get_lowercased_candidates return get_lowercased_candidates
# replace the pipe with a new one with with a different candidate generator # replace the pipe with a new one with with a different candidate generator
nlp.replace_pipe( entity_linker = nlp.replace_pipe(
"entity_linker", "entity_linker",
"entity_linker", "entity_linker",
config={ config={
"kb_loader": {"@misc": "myAdamKB.v1"},
"incl_context": False, "incl_context": False,
"get_candidates": {"@misc": "spacy.LowercaseCandidateGenerator.v1"}, "get_candidates": {"@misc": "spacy.LowercaseCandidateGenerator.v1"},
}, },
) )
entity_linker.set_kb(create_kb)
doc = nlp(text) doc = nlp(text)
assert doc[0].ent_kb_id_ == "Q2" assert doc[0].ent_kb_id_ == "Q2"
assert doc[1].ent_kb_id_ == "" assert doc[1].ent_kb_id_ == ""
@ -334,8 +322,6 @@ def test_preserving_links_asdoc(nlp):
"""Test that Span.as_doc preserves the existing entity links""" """Test that Span.as_doc preserves the existing entity links"""
vector_length = 1 vector_length = 1
@registry.misc.register("myLocationsKB.v1")
def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
mykb = KnowledgeBase(vocab, entity_vector_length=vector_length) mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
# adding entities # adding entities
@ -346,8 +332,6 @@ def test_preserving_links_asdoc(nlp):
mykb.add_alias(alias="Denver", entities=["Q2"], probabilities=[0.6]) mykb.add_alias(alias="Denver", entities=["Q2"], probabilities=[0.6])
return mykb return mykb
return create_kb
# set up pipeline with NER (Entity Ruler) and NEL (prior probability only, model not trained) # set up pipeline with NER (Entity Ruler) and NEL (prior probability only, model not trained)
nlp.add_pipe("sentencizer") nlp.add_pipe("sentencizer")
patterns = [ patterns = [
@ -356,8 +340,9 @@ def test_preserving_links_asdoc(nlp):
] ]
ruler = nlp.add_pipe("entity_ruler") ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns) ruler.add_patterns(patterns)
el_config = {"kb_loader": {"@misc": "myLocationsKB.v1"}, "incl_prior": False} config = {"incl_prior": False}
entity_linker = nlp.add_pipe("entity_linker", config=el_config, last=True) entity_linker = nlp.add_pipe("entity_linker", config=config, last=True)
entity_linker.set_kb(create_kb)
nlp.initialize() nlp.initialize()
assert entity_linker.model.get_dim("nO") == vector_length assert entity_linker.model.get_dim("nO") == vector_length
@ -435,8 +420,6 @@ def test_overfitting_IO():
doc = nlp(text) doc = nlp(text)
train_examples.append(Example.from_dict(doc, annotation)) train_examples.append(Example.from_dict(doc, annotation))
@registry.misc.register("myOverfittingKB.v1")
def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
# create artificial KB - assign same prior weight to the two russ cochran's # create artificial KB - assign same prior weight to the two russ cochran's
# Q2146908 (Russ Cochran): American golfer # Q2146908 (Russ Cochran): American golfer
@ -451,14 +434,12 @@ def test_overfitting_IO():
) )
return mykb return mykb
return create_kb
# Create the Entity Linker component and add it to the pipeline # Create the Entity Linker component and add it to the pipeline
entity_linker = nlp.add_pipe( entity_linker = nlp.add_pipe(
"entity_linker", "entity_linker",
config={"kb_loader": {"@misc": "myOverfittingKB.v1"}},
last=True, last=True,
) )
entity_linker.set_kb(create_kb)
# train the NEL pipe # train the NEL pipe
optimizer = nlp.initialize(get_examples=lambda: train_examples) optimizer = nlp.initialize(get_examples=lambda: train_examples)

View File

@ -68,6 +68,15 @@ def test_entity_ruler_init_patterns(nlp, patterns):
assert doc.ents[1].label_ == "BYE" assert doc.ents[1].label_ == "BYE"
def test_entity_ruler_init_clear(nlp, patterns):
"""Test that initialization clears patterns."""
ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns)
assert len(ruler.labels) == 4
ruler.initialize(lambda: [])
assert len(ruler.labels) == 0
def test_entity_ruler_existing(nlp, patterns): def test_entity_ruler_existing(nlp, patterns):
ruler = nlp.add_pipe("entity_ruler") ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns) ruler.add_patterns(patterns)

View File

@ -71,17 +71,13 @@ def tagger():
def entity_linker(): def entity_linker():
nlp = Language() nlp = Language()
@registry.misc.register("TestIssue5230KB.v1")
def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
kb = KnowledgeBase(vocab, entity_vector_length=1) kb = KnowledgeBase(vocab, entity_vector_length=1)
kb.add_entity("test", 0.0, zeros((1, 1), dtype="f")) kb.add_entity("test", 0.0, zeros((1, 1), dtype="f"))
return kb return kb
return create_kb entity_linker = nlp.add_pipe("entity_linker")
entity_linker.set_kb(create_kb)
config = {"kb_loader": {"@misc": "TestIssue5230KB.v1"}}
entity_linker = nlp.add_pipe("entity_linker", config=config)
# need to add model for two reasons: # need to add model for two reasons:
# 1. no model leads to error in serialization, # 1. no model leads to error in serialization,
# 2. the affected line is the one for model serialization # 2. the affected line is the one for model serialization

View File

@ -1,11 +1,12 @@
from typing import Callable from typing import Callable
from spacy import util from spacy import util
from spacy.lang.en import English from spacy.util import ensure_path, registry, load_model_from_config
from spacy.util import ensure_path, registry
from spacy.kb import KnowledgeBase from spacy.kb import KnowledgeBase
from thinc.api import Config
from ..util import make_tempdir from ..util import make_tempdir
from numpy import zeros
def test_serialize_kb_disk(en_vocab): def test_serialize_kb_disk(en_vocab):
@ -80,6 +81,28 @@ def _check_kb(kb):
def test_serialize_subclassed_kb(): def test_serialize_subclassed_kb():
"""Check that IO of a custom KB works fine as part of an EL pipe.""" """Check that IO of a custom KB works fine as part of an EL pipe."""
config_string = """
[nlp]
lang = "en"
pipeline = ["entity_linker"]
[components]
[components.entity_linker]
factory = "entity_linker"
[initialize]
[initialize.components]
[initialize.components.entity_linker]
[initialize.components.entity_linker.kb_loader]
@misc = "spacy.CustomKB.v1"
entity_vector_length = 342
custom_field = 666
"""
class SubKnowledgeBase(KnowledgeBase): class SubKnowledgeBase(KnowledgeBase):
def __init__(self, vocab, entity_vector_length, custom_field): def __init__(self, vocab, entity_vector_length, custom_field):
super().__init__(vocab, entity_vector_length) super().__init__(vocab, entity_vector_length)
@ -90,23 +113,21 @@ def test_serialize_subclassed_kb():
entity_vector_length: int, custom_field: int entity_vector_length: int, custom_field: int
) -> Callable[["Vocab"], KnowledgeBase]: ) -> Callable[["Vocab"], KnowledgeBase]:
def custom_kb_factory(vocab): def custom_kb_factory(vocab):
return SubKnowledgeBase( kb = SubKnowledgeBase(
vocab=vocab, vocab=vocab,
entity_vector_length=entity_vector_length, entity_vector_length=entity_vector_length,
custom_field=custom_field, custom_field=custom_field,
) )
kb.add_entity("random_entity", 0.0, zeros(entity_vector_length))
return kb
return custom_kb_factory return custom_kb_factory
nlp = English() config = Config().from_str(config_string)
config = { nlp = load_model_from_config(config, auto_fill=True)
"kb_loader": { nlp.initialize()
"@misc": "spacy.CustomKB.v1",
"entity_vector_length": 342, entity_linker = nlp.get_pipe("entity_linker")
"custom_field": 666,
}
}
entity_linker = nlp.add_pipe("entity_linker", config=config)
assert type(entity_linker.kb) == SubKnowledgeBase assert type(entity_linker.kb) == SubKnowledgeBase
assert entity_linker.kb.entity_vector_length == 342 assert entity_linker.kb.entity_vector_length == 342
assert entity_linker.kb.custom_field == 666 assert entity_linker.kb.custom_field == 666
@ -116,6 +137,7 @@ def test_serialize_subclassed_kb():
nlp.to_disk(tmp_dir) nlp.to_disk(tmp_dir)
nlp2 = util.load_model_from_path(tmp_dir) nlp2 = util.load_model_from_path(tmp_dir)
entity_linker2 = nlp2.get_pipe("entity_linker") entity_linker2 = nlp2.get_pipe("entity_linker")
assert type(entity_linker2.kb) == SubKnowledgeBase # After IO, the KB is the standard one
assert type(entity_linker2.kb) == KnowledgeBase
assert entity_linker2.kb.entity_vector_length == 342 assert entity_linker2.kb.entity_vector_length == 342
assert entity_linker2.kb.custom_field == 666 assert not hasattr(entity_linker2.kb, "custom_field")

View File

@ -524,7 +524,7 @@ Get a pipeline component for a given component name.
## Language.replace_pipe {#replace_pipe tag="method" new="2"} ## Language.replace_pipe {#replace_pipe tag="method" new="2"}
Replace a component in the pipeline. Replace a component in the pipeline and return the new component.
<Infobox title="Changed in v3.0" variant="warning"> <Infobox title="Changed in v3.0" variant="warning">
@ -538,7 +538,7 @@ and instead expects the **name of a component factory** registered using
> #### Example > #### Example
> >
> ```python > ```python
> nlp.replace_pipe("parser", my_custom_parser) > new_parser = nlp.replace_pipe("parser", "my_custom_parser")
> ``` > ```
| Name | Description | | Name | Description |
@ -548,6 +548,7 @@ and instead expects the **name of a component factory** registered using
| _keyword-only_ | | | _keyword-only_ | |
| `config` <Tag variant="new">3</Tag> | Optional config parameters to use for the new component. Will be merged with the `default_config` specified by the component factory. ~~Optional[Dict[str, Any]]~~ | | `config` <Tag variant="new">3</Tag> | Optional config parameters to use for the new component. Will be merged with the `default_config` specified by the component factory. ~~Optional[Dict[str, Any]]~~ |
| `validate` <Tag variant="new">3</Tag> | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ | | `validate` <Tag variant="new">3</Tag> | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
| **RETURNS** | The new pipeline component. ~~Callable[[Doc], Doc]~~ |
## Language.rename_pipe {#rename_pipe tag="method" new="2"} ## Language.rename_pipe {#rename_pipe tag="method" new="2"}

View File

@ -11,7 +11,7 @@ api_string_name: transformer
> #### Installation > #### Installation
> >
> ```bash > ```bash
> $ pip install spacy-transformers > $ pip install -U %%SPACY_PKG_NAME[transformers] %%SPACY_PKG_FLAGS
> ``` > ```
<Infobox title="Important note" variant="warning"> <Infobox title="Important note" variant="warning">
@ -386,7 +386,7 @@ by this class. Instances of this class are typically assigned to the
[`Doc._.trf_data`](/api/transformer#custom-attributes) extension attribute. [`Doc._.trf_data`](/api/transformer#custom-attributes) extension attribute.
| Name | Description | | Name | Description |
| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ | | `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ |
| `tensors` | The activations for the `Doc` from the transformer. Usually the last tensor that is 3-dimensional will be the most important, as that will provide the final hidden state. Generally activations that are 2-dimensional will be attention weights. Details of this variable will differ depending on the underlying transformer model. ~~List[FloatsXd]~~ | | `tensors` | The activations for the `Doc` from the transformer. Usually the last tensor that is 3-dimensional will be the most important, as that will provide the final hidden state. Generally activations that are 2-dimensional will be attention weights. Details of this variable will differ depending on the underlying transformer model. ~~List[FloatsXd]~~ |
| `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ | | `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
@ -407,7 +407,7 @@ then be split to a list of [`TransformerData`](/api/transformer#transformerdata)
objects to associate the outputs to each [`Doc`](/api/doc) in the batch. objects to associate the outputs to each [`Doc`](/api/doc) in the batch.
| Name | Description | | Name | Description |
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `spans` | The batch of input spans. The outer list refers to the Doc objects in the batch, and the inner list are the spans for that `Doc`. Note that spans are allowed to overlap or exclude tokens, but each `Span` can only refer to one `Doc` (by definition). This means that within a `Doc`, the regions of the output tensors that correspond to each `Span` may overlap or have gaps, but for each `Doc`, there is a non-overlapping contiguous slice of the outputs. ~~List[List[Span]]~~ | | `spans` | The batch of input spans. The outer list refers to the Doc objects in the batch, and the inner list are the spans for that `Doc`. Note that spans are allowed to overlap or exclude tokens, but each `Span` can only refer to one `Doc` (by definition). This means that within a `Doc`, the regions of the output tensors that correspond to each `Span` may overlap or have gaps, but for each `Doc`, there is a non-overlapping contiguous slice of the outputs. ~~List[List[Span]]~~ |
| `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ | | `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ |
| `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ | | `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ |

View File

@ -216,8 +216,7 @@ in `/opt/nvidia/cuda`, you would run:
```bash ```bash
### Installation with CUDA ### Installation with CUDA
$ export CUDA_PATH="/opt/nvidia/cuda" $ export CUDA_PATH="/opt/nvidia/cuda"
$ pip install cupy-cuda102 $ pip install -U %%SPACY_PKG_NAME[cud102,transformers]%%SPACY_PKG_FLAGS
$ pip install spacy-transformers
``` ```
### Runtime usage {#transformers-runtime} ### Runtime usage {#transformers-runtime}

View File

@ -47,7 +47,7 @@ Before you install spaCy and its dependencies, make sure that your `pip`,
```bash ```bash
$ pip install -U pip setuptools wheel $ pip install -U pip setuptools wheel
$ pip install -U spacy $ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
``` ```
When using pip it is generally recommended to install packages in a virtual When using pip it is generally recommended to install packages in a virtual
@ -57,7 +57,7 @@ environment to avoid modifying system state:
$ python -m venv .env $ python -m venv .env
$ source .env/bin/activate $ source .env/bin/activate
$ pip install -U pip setuptools wheel $ pip install -U pip setuptools wheel
$ pip install spacy $ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
``` ```
spaCy also lets you install extra dependencies by specifying the following spaCy also lets you install extra dependencies by specifying the following
@ -68,15 +68,16 @@ spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included.
> #### Example > #### Example
> >
> ```bash > ```bash
> $ pip install spacy[lookups,transformers] > $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS
> ``` > ```
| Name | Description | | Name | Description |
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. | | `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. | | `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. |
| `ray` | Install [`spacy-ray`](https://github.com/explosion/spacy-ray) to add CLI commands for [parallel training](/usage/training#parallel-training). |
| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. | | `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. |
| `ja`, `ko`, `th` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). | | `ja`, `ko`, `th`, `zh` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). |
### conda {#conda} ### conda {#conda}
@ -88,8 +89,8 @@ $ conda install -c conda-forge spacy
``` ```
For the feedstock including the build recipe and configuration, check out For the feedstock including the build recipe and configuration, check out
[this repository](https://github.com/conda-forge/spacy-feedstock). Improvements [this repository](https://github.com/conda-forge/spacy-feedstock). Note that we
and pull requests to the recipe and setup are always appreciated. currently don't publish any [pre-releases](#changelog-pre) on conda.
### Upgrading spaCy {#upgrading} ### Upgrading spaCy {#upgrading}
@ -116,7 +117,7 @@ are printed. It's recommended to run the command with `python -m` to make sure
you're executing the correct version of spaCy. you're executing the correct version of spaCy.
```cli ```cli
$ pip install -U spacy $ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
$ python -m spacy validate $ python -m spacy validate
``` ```
@ -134,7 +135,7 @@ specifier allows cupy to be installed via wheel, saving some compilation time.
The specifiers should install [`cupy`](https://cupy.chainer.org). The specifiers should install [`cupy`](https://cupy.chainer.org).
```bash ```bash
$ pip install -U spacy[cuda92] $ pip install -U %%SPACY_PKG_NAME[cuda92]%%SPACY_PKG_FLAGS
``` ```
Once you have a GPU-enabled installation, the best way to activate it is to call Once you have a GPU-enabled installation, the best way to activate it is to call

View File

@ -166,7 +166,7 @@ lookup lemmatizer looks up the token surface form in the lookup table without
reference to the token's part-of-speech or context. reference to the token's part-of-speech or context.
```python ```python
# pip install spacy-lookups-data # pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
import spacy import spacy
nlp = spacy.blank("sv") nlp = spacy.blank("sv")
@ -181,7 +181,7 @@ rule-based lemmatizer can be added using rule tables from
[`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data): [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data):
```python ```python
# pip install spacy-lookups-data # pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
import spacy import spacy
nlp = spacy.blank("de") nlp = spacy.blank("de")
@ -1801,7 +1801,10 @@ print(doc2[5].tag_, doc2[5].pos_) # WP PRON
<Infobox variant="warning" title="Migrating from spaCy v2.x"> <Infobox variant="warning" title="Migrating from spaCy v2.x">
The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details. The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph
rules** in the v2.x format via its built-in methods or when the component is
initialized before training. See the
[migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
</Infobox> </Infobox>

View File

@ -54,7 +54,7 @@ contribute to development.
> separately in the same environment: > separately in the same environment:
> >
> ```bash > ```bash
> $ pip install spacy[lookups] > $ pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
> ``` > ```
import Languages from 'widgets/languages.js' import Languages from 'widgets/languages.js'
@ -287,7 +287,7 @@ The download command will [install the package](/usage/models#download-pip) via
pip and place the package in your `site-packages` directory. pip and place the package in your `site-packages` directory.
```cli ```cli
$ pip install -U spacy $ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
$ python -m spacy download en_core_web_sm $ python -m spacy download en_core_web_sm
``` ```

View File

@ -813,7 +813,7 @@ full embedded visualizer, as well as individual components.
> #### Installation > #### Installation
> >
> ```bash > ```bash
> $ pip install "spacy-streamlit>=1.0.0a0" > $ pip install spacy-streamlit --pre
> ``` > ```
![](../images/spacy-streamlit.png) ![](../images/spacy-streamlit.png)
@ -911,7 +911,7 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.
> #### Installation > #### Installation
> >
> ```cli > ```cli
> $ pip install spacy-ray > $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
> # Check that the CLI is registered > # Check that the CLI is registered
> $ python -m spacy ray --help > $ python -m spacy ray --help
> ``` > ```

View File

@ -297,7 +297,7 @@ packages. This lets one application easily customize the behavior of another, by
exposing an entry point in its `setup.py`. For a quick and fun intro to entry exposing an entry point in its `setup.py`. For a quick and fun intro to entry
points in Python, check out points in Python, check out
[this excellent blog post](https://amir.rachum.com/blog/2017/07/28/python-entry-points/). [this excellent blog post](https://amir.rachum.com/blog/2017/07/28/python-entry-points/).
spaCy can load custom function from several different entry points to add spaCy can load custom functions from several different entry points to add
pipeline component factories, language classes and other settings. To make spaCy pipeline component factories, language classes and other settings. To make spaCy
use your entry points, your package needs to expose them and it needs to be use your entry points, your package needs to expose them and it needs to be
installed in the same environment that's it. installed in the same environment that's it.

View File

@ -1249,7 +1249,7 @@ valid.
> #### Installation > #### Installation
> >
> ```cli > ```cli
> $ pip install spacy-ray > $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
> # Check that the CLI is registered > # Check that the CLI is registered
> $ python -m spacy ray --help > $ python -m spacy ray --help
> ``` > ```

View File

@ -236,7 +236,7 @@ treebank.
> #### Example > #### Example
> >
> ```cli > ```cli
> $ pip install spacy-ray > $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
> # Check that the CLI is registered > # Check that the CLI is registered
> $ python -m spacy ray --help > $ python -m spacy ray --help
> # Train a pipeline > # Train a pipeline
@ -272,7 +272,7 @@ add to your pipeline and customize for your use case:
> #### Example > #### Example
> >
> ```python > ```python
> # pip install spacy-lookups-data > # pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
> nlp = spacy.blank("en") > nlp = spacy.blank("en")
> nlp.add_pipe("lemmatizer") > nlp.add_pipe("lemmatizer")
> ``` > ```
@ -395,7 +395,7 @@ type-check model definitions.
For data validation, spaCy v3.0 adopts For data validation, spaCy v3.0 adopts
[`pydantic`](https://github.com/samuelcolvin/pydantic). It also powers the data [`pydantic`](https://github.com/samuelcolvin/pydantic). It also powers the data
validation of Thinc's [config system](https://thinc.ai/docs/usage-config), which validation of Thinc's [config system](https://thinc.ai/docs/usage-config), which
lets you to register **custom functions with typed arguments**, reference them lets you register **custom functions with typed arguments**, reference them
in your config and see validation errors if the argument values don't match. in your config and see validation errors if the argument values don't match.
<Infobox title="Details & Documentation" emoji="📖" list> <Infobox title="Details & Documentation" emoji="📖" list>

View File

@ -30,6 +30,8 @@ const branch = isNightly ? 'develop' : 'master'
const replacements = { const replacements = {
GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`, GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
GITHUB_PROJECTS: `https://github.com/${site.projectsRepo}`, GITHUB_PROJECTS: `https://github.com/${site.projectsRepo}`,
SPACY_PKG_NAME: isNightly ? 'spacy-nightly' : 'spacy',
SPACY_PKG_FLAGS: isNightly ? ' --pre' : '',
} }
/** /**

View File

@ -97,7 +97,10 @@ const Changelog = () => {
<p> <p>
Pre-releases include alpha and beta versions, as well as release candidates. They Pre-releases include alpha and beta versions, as well as release candidates. They
are not intended for production use. You can download spaCy pre-releases via the{' '} are not intended for production use. You can download spaCy pre-releases via the{' '}
<InlineCode>spacy-nightly</InlineCode> package on pip. <Link to="https://pypi.org/packages/spacy-nightly">
<InlineCode>spacy-nightly</InlineCode>
</Link>{' '}
package on pip.
</p> </p>
<p> <p>

View File

@ -28,7 +28,8 @@ import irlBackground from '../images/spacy-irl.jpg'
import Benchmarks from 'usage/_benchmarks-models.md' import Benchmarks from 'usage/_benchmarks-models.md'
const CODE_EXAMPLE = `# pip install spacy function getCodeExample(nightly) {
return `# pip install -U ${nightly ? 'spacy-nightly --pre' : 'spacy'}
# python -m spacy download en_core_web_sm # python -m spacy download en_core_web_sm
import spacy import spacy
@ -52,9 +53,11 @@ print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
for entity in doc.ents: for entity in doc.ents:
print(entity.text, entity.label_) print(entity.text, entity.label_)
` `
}
const Landing = ({ data }) => { const Landing = ({ data }) => {
const { counts } = data const { counts, nightly } = data
const codeExample = getCodeExample(nightly)
return ( return (
<> <>
<LandingHeader nightly={data.nightly}> <LandingHeader nightly={data.nightly}>
@ -91,7 +94,7 @@ const Landing = ({ data }) => {
</LandingGrid> </LandingGrid>
<LandingGrid> <LandingGrid>
<LandingDemo title="Edit the code & try spaCy">{CODE_EXAMPLE}</LandingDemo> <LandingDemo title="Edit the code &amp; try spaCy">{codeExample}</LandingDemo>
<LandingCol> <LandingCol>
<H2>Features</H2> <H2>Features</H2>

View File

@ -141,6 +141,11 @@ const QuickstartInstall = ({ id, title }) => {
setters={setters} setters={setters}
showDropdown={showDropdown} showDropdown={showDropdown}
> >
{nightly && (
<QS package="conda" comment prompt={false}>
# 🚨 Nightly releases are currently only available via pip
</QS>
)}
<QS config="venv">python -m venv .env</QS> <QS config="venv">python -m venv .env</QS>
<QS config="venv" os="mac"> <QS config="venv" os="mac">
source .env/bin/activate source .env/bin/activate
@ -175,9 +180,9 @@ const QuickstartInstall = ({ id, title }) => {
</QS> </QS>
<QS package="source">pip install -r requirements.txt</QS> <QS package="source">pip install -r requirements.txt</QS>
<QS package="source">python setup.py build_ext --inplace</QS> <QS package="source">python setup.py build_ext --inplace</QS>
<QS package="source" config="train"> {(train || hardware == 'gpu') && (
pip install -e '.[{pipExtras}]' <QS package="source">pip install -e '.[{pipExtras}]'</QS>
</QS> )}
<QS config="train" package="conda"> <QS config="train" package="conda">
conda install -c conda-forge spacy-transformers conda install -c conda-forge spacy-transformers