Prefix span getters

This commit is contained in:
Ines Montani 2020-09-03 17:37:06 +02:00
parent 5afe6447cd
commit 23b7d9cfa3
4 changed files with 10 additions and 9 deletions

View File

@ -29,7 +29,7 @@ name = "{{ transformer["name"] }}"
tokenizer_config = {"use_fast": true} tokenizer_config = {"use_fast": true}
[components.transformer.model.get_spans] [components.transformer.model.get_spans]
@span_getters = "strided_spans.v1" @span_getters = "spacy-transformers.strided_spans.v1"
window = 128 window = 128
stride = 96 stride = 96

View File

@ -320,7 +320,7 @@ for details and system requirements.
> tokenizer_config = {"use_fast": true} > tokenizer_config = {"use_fast": true}
> >
> [model.get_spans] > [model.get_spans]
> @span_getters = "strided_spans.v1" > @span_getters = "spacy-transformers.strided_spans.v1"
> window = 128 > window = 128
> stride = 96 > stride = 96
> ``` > ```

View File

@ -453,7 +453,7 @@ using the `@spacy.registry.span_getters` decorator.
> #### Example > #### Example
> >
> ```python > ```python
> @spacy.registry.span_getters("sent_spans.v1") > @spacy.registry.span_getters("custom_sent_spans")
> def configure_get_sent_spans() -> Callable: > def configure_get_sent_spans() -> Callable:
> def get_sent_spans(docs: Iterable[Doc]) -> List[List[Span]]: > def get_sent_spans(docs: Iterable[Doc]) -> List[List[Span]]:
> return [list(doc.sents) for doc in docs] > return [list(doc.sents) for doc in docs]
@ -472,7 +472,7 @@ using the `@spacy.registry.span_getters` decorator.
> >
> ```ini > ```ini
> [transformer.model.get_spans] > [transformer.model.get_spans]
> @span_getters = "doc_spans.v1" > @span_getters = "spacy-transformers.doc_spans.v1"
> ``` > ```
Create a span getter that uses the whole document as its spans. This is the best Create a span getter that uses the whole document as its spans. This is the best
@ -485,7 +485,7 @@ texts.
> >
> ```ini > ```ini
> [transformer.model.get_spans] > [transformer.model.get_spans]
> @span_getters = "sent_spans.v1" > @span_getters = "spacy-transformers.sent_spans.v1"
> ``` > ```
Create a span getter that uses sentence boundary markers to extract the spans. Create a span getter that uses sentence boundary markers to extract the spans.
@ -500,7 +500,7 @@ more meaningful windows to attend over.
> >
> ```ini > ```ini
> [transformer.model.get_spans] > [transformer.model.get_spans]
> @span_getters = "strided_spans.v1" > @span_getters = "spacy-transformers.strided_spans.v1"
> window = 128 > window = 128
> stride = 96 > stride = 96
> ``` > ```

View File

@ -331,7 +331,7 @@ name = "bert-base-cased"
tokenizer_config = {"use_fast": true} tokenizer_config = {"use_fast": true}
[components.transformer.model.get_spans] [components.transformer.model.get_spans]
@span_getters = "doc_spans.v1" @span_getters = "spacy-transformers.doc_spans.v1"
[components.transformer.annotation_setter] [components.transformer.annotation_setter]
@annotation_setters = "spacy-transformers.null_annotation_setter.v1" @annotation_setters = "spacy-transformers.null_annotation_setter.v1"
@ -369,8 +369,9 @@ all defaults.
To change any of the settings, you can edit the `config.cfg` and re-run the To change any of the settings, you can edit the `config.cfg` and re-run the
training. To change any of the functions, like the span getter, you can replace training. To change any of the functions, like the span getter, you can replace
the name of the referenced function e.g. `@span_getters = "sent_spans.v1"` to the name of the referenced function e.g.
process sentences. You can also register your own functions using the `@span_getters = "spacy-transformers.sent_spans.v1"` to process sentences. You
can also register your own functions using the
[`span_getters` registry](/api/top-level#registry). For instance, the following [`span_getters` registry](/api/top-level#registry). For instance, the following
custom function returns [`Span`](/api/span) objects following sentence custom function returns [`Span`](/api/span) objects following sentence
boundaries, unless a sentence succeeds a certain amount of tokens, in which case boundaries, unless a sentence succeeds a certain amount of tokens, in which case