diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index 4b06abc0f..43c852d13 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -29,7 +29,7 @@ name = "{{ transformer["name"] }}" tokenizer_config = {"use_fast": true} [components.transformer.model.get_spans] -@span_getters = "strided_spans.v1" +@span_getters = "spacy-transformers.strided_spans.v1" window = 128 stride = 96 diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md index 35816a9a2..ee844d961 100644 --- a/website/docs/api/architectures.md +++ b/website/docs/api/architectures.md @@ -320,7 +320,7 @@ for details and system requirements. > tokenizer_config = {"use_fast": true} > > [model.get_spans] -> @span_getters = "strided_spans.v1" +> @span_getters = "spacy-transformers.strided_spans.v1" > window = 128 > stride = 96 > ``` diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md index 5ac95cb29..b41a18890 100644 --- a/website/docs/api/transformer.md +++ b/website/docs/api/transformer.md @@ -453,7 +453,7 @@ using the `@spacy.registry.span_getters` decorator. > #### Example > > ```python -> @spacy.registry.span_getters("sent_spans.v1") +> @spacy.registry.span_getters("custom_sent_spans") > def configure_get_sent_spans() -> Callable: > def get_sent_spans(docs: Iterable[Doc]) -> List[List[Span]]: > return [list(doc.sents) for doc in docs] @@ -472,7 +472,7 @@ using the `@spacy.registry.span_getters` decorator. > > ```ini > [transformer.model.get_spans] -> @span_getters = "doc_spans.v1" +> @span_getters = "spacy-transformers.doc_spans.v1" > ``` Create a span getter that uses the whole document as its spans. This is the best @@ -485,7 +485,7 @@ texts. > > ```ini > [transformer.model.get_spans] -> @span_getters = "sent_spans.v1" +> @span_getters = "spacy-transformers.sent_spans.v1" > ``` Create a span getter that uses sentence boundary markers to extract the spans. @@ -500,7 +500,7 @@ more meaningful windows to attend over. > > ```ini > [transformer.model.get_spans] -> @span_getters = "strided_spans.v1" +> @span_getters = "spacy-transformers.strided_spans.v1" > window = 128 > stride = 96 > ``` diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md index 7792ce124..abd92a8ac 100644 --- a/website/docs/usage/embeddings-transformers.md +++ b/website/docs/usage/embeddings-transformers.md @@ -331,7 +331,7 @@ name = "bert-base-cased" tokenizer_config = {"use_fast": true} [components.transformer.model.get_spans] -@span_getters = "doc_spans.v1" +@span_getters = "spacy-transformers.doc_spans.v1" [components.transformer.annotation_setter] @annotation_setters = "spacy-transformers.null_annotation_setter.v1" @@ -369,8 +369,9 @@ all defaults. To change any of the settings, you can edit the `config.cfg` and re-run the training. To change any of the functions, like the span getter, you can replace -the name of the referenced function – e.g. `@span_getters = "sent_spans.v1"` to -process sentences. You can also register your own functions using the +the name of the referenced function – e.g. +`@span_getters = "spacy-transformers.sent_spans.v1"` to process sentences. You +can also register your own functions using the [`span_getters` registry](/api/top-level#registry). For instance, the following custom function returns [`Span`](/api/span) objects following sentence boundaries, unless a sentence succeeds a certain amount of tokens, in which case