mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Prefix span getters
This commit is contained in:
		
							parent
							
								
									5afe6447cd
								
							
						
					
					
						commit
						23b7d9cfa3
					
				| 
						 | 
					@ -29,7 +29,7 @@ name = "{{ transformer["name"] }}"
 | 
				
			||||||
tokenizer_config = {"use_fast": true}
 | 
					tokenizer_config = {"use_fast": true}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[components.transformer.model.get_spans]
 | 
					[components.transformer.model.get_spans]
 | 
				
			||||||
@span_getters = "strided_spans.v1"
 | 
					@span_getters = "spacy-transformers.strided_spans.v1"
 | 
				
			||||||
window = 128
 | 
					window = 128
 | 
				
			||||||
stride = 96
 | 
					stride = 96
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -320,7 +320,7 @@ for details and system requirements.
 | 
				
			||||||
> tokenizer_config = {"use_fast": true}
 | 
					> tokenizer_config = {"use_fast": true}
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
> [model.get_spans]
 | 
					> [model.get_spans]
 | 
				
			||||||
> @span_getters = "strided_spans.v1"
 | 
					> @span_getters = "spacy-transformers.strided_spans.v1"
 | 
				
			||||||
> window = 128
 | 
					> window = 128
 | 
				
			||||||
> stride = 96
 | 
					> stride = 96
 | 
				
			||||||
> ```
 | 
					> ```
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -453,7 +453,7 @@ using the `@spacy.registry.span_getters` decorator.
 | 
				
			||||||
> #### Example
 | 
					> #### Example
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
> ```python
 | 
					> ```python
 | 
				
			||||||
> @spacy.registry.span_getters("sent_spans.v1")
 | 
					> @spacy.registry.span_getters("custom_sent_spans")
 | 
				
			||||||
> def configure_get_sent_spans() -> Callable:
 | 
					> def configure_get_sent_spans() -> Callable:
 | 
				
			||||||
>     def get_sent_spans(docs: Iterable[Doc]) -> List[List[Span]]:
 | 
					>     def get_sent_spans(docs: Iterable[Doc]) -> List[List[Span]]:
 | 
				
			||||||
>         return [list(doc.sents) for doc in docs]
 | 
					>         return [list(doc.sents) for doc in docs]
 | 
				
			||||||
| 
						 | 
					@ -472,7 +472,7 @@ using the `@spacy.registry.span_getters` decorator.
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
> ```ini
 | 
					> ```ini
 | 
				
			||||||
> [transformer.model.get_spans]
 | 
					> [transformer.model.get_spans]
 | 
				
			||||||
> @span_getters = "doc_spans.v1"
 | 
					> @span_getters = "spacy-transformers.doc_spans.v1"
 | 
				
			||||||
> ```
 | 
					> ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Create a span getter that uses the whole document as its spans. This is the best
 | 
					Create a span getter that uses the whole document as its spans. This is the best
 | 
				
			||||||
| 
						 | 
					@ -485,7 +485,7 @@ texts.
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
> ```ini
 | 
					> ```ini
 | 
				
			||||||
> [transformer.model.get_spans]
 | 
					> [transformer.model.get_spans]
 | 
				
			||||||
> @span_getters = "sent_spans.v1"
 | 
					> @span_getters = "spacy-transformers.sent_spans.v1"
 | 
				
			||||||
> ```
 | 
					> ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Create a span getter that uses sentence boundary markers to extract the spans.
 | 
					Create a span getter that uses sentence boundary markers to extract the spans.
 | 
				
			||||||
| 
						 | 
					@ -500,7 +500,7 @@ more meaningful windows to attend over.
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
> ```ini
 | 
					> ```ini
 | 
				
			||||||
> [transformer.model.get_spans]
 | 
					> [transformer.model.get_spans]
 | 
				
			||||||
> @span_getters = "strided_spans.v1"
 | 
					> @span_getters = "spacy-transformers.strided_spans.v1"
 | 
				
			||||||
> window = 128
 | 
					> window = 128
 | 
				
			||||||
> stride = 96
 | 
					> stride = 96
 | 
				
			||||||
> ```
 | 
					> ```
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -331,7 +331,7 @@ name = "bert-base-cased"
 | 
				
			||||||
tokenizer_config = {"use_fast": true}
 | 
					tokenizer_config = {"use_fast": true}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[components.transformer.model.get_spans]
 | 
					[components.transformer.model.get_spans]
 | 
				
			||||||
@span_getters = "doc_spans.v1"
 | 
					@span_getters = "spacy-transformers.doc_spans.v1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[components.transformer.annotation_setter]
 | 
					[components.transformer.annotation_setter]
 | 
				
			||||||
@annotation_setters = "spacy-transformers.null_annotation_setter.v1"
 | 
					@annotation_setters = "spacy-transformers.null_annotation_setter.v1"
 | 
				
			||||||
| 
						 | 
					@ -369,8 +369,9 @@ all defaults.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To change any of the settings, you can edit the `config.cfg` and re-run the
 | 
					To change any of the settings, you can edit the `config.cfg` and re-run the
 | 
				
			||||||
training. To change any of the functions, like the span getter, you can replace
 | 
					training. To change any of the functions, like the span getter, you can replace
 | 
				
			||||||
the name of the referenced function – e.g. `@span_getters = "sent_spans.v1"` to
 | 
					the name of the referenced function – e.g.
 | 
				
			||||||
process sentences. You can also register your own functions using the
 | 
					`@span_getters = "spacy-transformers.sent_spans.v1"` to process sentences. You
 | 
				
			||||||
 | 
					can also register your own functions using the
 | 
				
			||||||
[`span_getters` registry](/api/top-level#registry). For instance, the following
 | 
					[`span_getters` registry](/api/top-level#registry). For instance, the following
 | 
				
			||||||
custom function returns [`Span`](/api/span) objects following sentence
 | 
					custom function returns [`Span`](/api/span) objects following sentence
 | 
				
			||||||
boundaries, unless a sentence succeeds a certain amount of tokens, in which case
 | 
					boundaries, unless a sentence succeeds a certain amount of tokens, in which case
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user