mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	more docstring and fix negative_label
This commit is contained in:
		
							parent
							
								
									edf9134e45
								
							
						
					
					
						commit
						5ccb154972
					
				|  | @ -1,4 +1,4 @@ | |||
| from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast | ||||
| from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union | ||||
| from dataclasses import dataclass | ||||
| from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops | ||||
| from thinc.api import Optimizer | ||||
|  | @ -309,7 +309,12 @@ class SpanCategorizer(TrainablePipe): | |||
|         threshold: Optional[float] = None, | ||||
|         scorer: Optional[Callable] = spancat_score, | ||||
|     ) -> None: | ||||
|         """Initialize the span categorizer. | ||||
|         """Initialize the multilabel or multiclass span categorizer. | ||||
|         The 'single_label' argument configures whether the component | ||||
|         should only produce one label per span (multiclass) or if it | ||||
|         can produce multiple labels per span (multilabel). In the | ||||
|         multilabel case the classification layer is expected to be | ||||
|         Logistic and Softmax in the multiclass case. | ||||
|         vocab (Vocab): The shared vocabulary. | ||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||
|         suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans. | ||||
|  | @ -321,16 +326,23 @@ class SpanCategorizer(TrainablePipe): | |||
|             During initialization and training, the component will look for | ||||
|             spans on the reference document under the same key. Defaults to | ||||
|             `"spans"`. | ||||
|         single_label (bool): Whether to configure SpanCategorizer to produce | ||||
|             a single label per span. In this case its expected that the scorer | ||||
|             layer is Softmax. Otherwise its expected to be Logistic. When single_label | ||||
|             is true the SpanCategorizer internally has a negative-label indicating | ||||
|             that a span should not receive any of the labels found in the corpus. | ||||
|         threshold (Optional[float]): Minimum probability to consider a prediction | ||||
|             positive. Spans with a positive prediction will be saved on the Doc. | ||||
|             Defaults to 0.5. | ||||
|             positive in the multilabel usecase.Defaults to 0.5 when single_label is | ||||
|             False otherwise its None. Spans with a positive prediction will be saved on the Doc. | ||||
|         max_positive (Optional[int]): Maximum number of labels to consider | ||||
|             positive per span. Defaults to None, indicating no limit. | ||||
|             positive per span. Defaults to None, indicating no limit. This is | ||||
|             unused when single_label is True. | ||||
|         negative_weight (float): Multiplier for the loss terms. | ||||
|             Can be used to downweight the negative samples if there are too many. | ||||
|             Can be used to downweight the negative samples if there are too many | ||||
|             when single_label is True. Otherwise its unused. | ||||
|         allow_overlap (bool): If True the data is assumed to contain overlapping spans. | ||||
|             Otherwise it produces non-overlapping spans greedily prioritizing | ||||
|             higher assigned label scores. | ||||
|             higher assigned label scores. Only used when single_label is True. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_spans for the Doc.spans[spans_key] with overlapping | ||||
|             spans allowed. | ||||
|  | @ -422,12 +434,12 @@ class SpanCategorizer(TrainablePipe): | |||
|             return len(self.labels) | ||||
| 
 | ||||
|     @property | ||||
|     def _negative_label(self) -> int: | ||||
|         """RETURNS (int): Index of the negative label.""" | ||||
|     def _negative_label(self) -> Union[int, None]: | ||||
|         """RETURNS (Union[int, None]): Index of the negative label.""" | ||||
|         if self.single_label: | ||||
|             return -1 | ||||
|         else: | ||||
|             return len(self.label_data) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def predict(self, docs: Iterable[Doc]): | ||||
|         """Apply the pipeline's model to a batch of docs, without modifying them. | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user