mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Formatting
This commit is contained in:
		
							parent
							
								
									41fc092674
								
							
						
					
					
						commit
						33f4f90ff0
					
				|  | @ -64,7 +64,6 @@ def build_wl_coref_model( | |||
|     return coref_model | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def convert_coref_scorer_inputs(model: Model, X: List[Floats2d], is_train: bool): | ||||
|     # The input here is List[Floats2d], one for each doc | ||||
|     # just use the first | ||||
|  | @ -96,7 +95,6 @@ def convert_coref_scorer_outputs(model: Model, inputs_outputs, is_train: bool): | |||
|     return (scores_xp, indices_xp), convert_for_torch_backward | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # TODO add docstring for this, maybe move to utils. | ||||
| # This might belong in the component. | ||||
| def _clusterize(model, scores: Floats2d, top_indices: Ints2d): | ||||
|  | @ -126,7 +124,6 @@ def _clusterize(model, scores: Floats2d, top_indices: Ints2d): | |||
|     return sorted(clusters) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| class CorefScorer(torch.nn.Module): | ||||
|     """Combines all coref modules together to find coreferent spans. | ||||
| 
 | ||||
|  | @ -372,8 +369,6 @@ class RoughScorer(torch.nn.Module): | |||
|         return top_scores, indices | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| class DistancePairwiseEncoder(torch.nn.Module): | ||||
|     def __init__(self, embedding_size, dropout_rate): | ||||
|         super().__init__() | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ from ...tokens import Doc | |||
| from ...util import registry | ||||
| from .coref_util import get_sentence_ids | ||||
| 
 | ||||
| 
 | ||||
| @registry.architectures("spacy.SpanPredictor.v1") | ||||
| def build_span_predictor( | ||||
|     tok2vec: Model[List[Doc], List[Floats2d]], | ||||
|  | @ -34,6 +35,7 @@ def build_span_predictor( | |||
| 
 | ||||
|     return model | ||||
| 
 | ||||
| 
 | ||||
| def convert_span_predictor_inputs( | ||||
|     model: Model, X: Tuple[Ints1d, Floats2d, Ints1d], is_train: bool | ||||
| ): | ||||
|  | @ -89,6 +91,38 @@ def predict_span_clusters( | |||
| 
 | ||||
|     return [[head2span[head] for head in cluster] for cluster in clusters] | ||||
| 
 | ||||
| 
 | ||||
| def build_get_head_metadata(prefix): | ||||
|     # TODO this name is awful, fix it | ||||
|     model = Model( | ||||
|         "HeadDataProvider", attrs={"prefix": prefix}, forward=head_data_forward | ||||
|     ) | ||||
|     return model | ||||
| 
 | ||||
| 
 | ||||
| def head_data_forward(model, docs, is_train): | ||||
|     """A layer to generate the extra data needed for the span predictor.""" | ||||
|     sent_ids = [] | ||||
|     head_ids = [] | ||||
|     prefix = model.attrs["prefix"] | ||||
|     for doc in docs: | ||||
|         sids = model.ops.asarray2i(get_sentence_ids(doc)) | ||||
|         sent_ids.append(sids) | ||||
|         heads = [] | ||||
|         for key, sg in doc.spans.items(): | ||||
|             if not key.startswith(prefix): | ||||
|                 continue | ||||
|             for span in sg: | ||||
|                 # TODO warn if spans are more than one token | ||||
|                 heads.append(span[0].i) | ||||
|         heads = model.ops.asarray2i(heads) | ||||
|         head_ids.append(heads) | ||||
|     # each of these is a list with one entry per doc | ||||
|     # backprop is just a placeholder | ||||
|     # TODO it would probably be better to have a list of tuples than two lists of arrays | ||||
|     return (sent_ids, head_ids), lambda x: [] | ||||
| 
 | ||||
| 
 | ||||
| # TODO this should maybe have a different name from the component | ||||
| class SpanPredictor(torch.nn.Module): | ||||
|     def __init__(self, input_size: int, hidden_size: int, dist_emb_size: int): | ||||
|  | @ -181,35 +215,3 @@ class SpanPredictor(torch.nn.Module): | |||
|             valid_positions = torch.stack((valid_starts, valid_ends), dim=2) | ||||
|             return scores + valid_positions | ||||
|         return scores | ||||
| 
 | ||||
| 
 | ||||
| def build_get_head_metadata(prefix): | ||||
|     # TODO this name is awful, fix it | ||||
|     model = Model( | ||||
|         "HeadDataProvider", attrs={"prefix": prefix}, forward=head_data_forward | ||||
|     ) | ||||
|     return model | ||||
| 
 | ||||
| 
 | ||||
| def head_data_forward(model, docs, is_train): | ||||
|     """A layer to generate the extra data needed for the span predictor.""" | ||||
|     sent_ids = [] | ||||
|     head_ids = [] | ||||
|     prefix = model.attrs["prefix"] | ||||
|     for doc in docs: | ||||
|         sids = model.ops.asarray2i(get_sentence_ids(doc)) | ||||
|         sent_ids.append(sids) | ||||
|         heads = [] | ||||
|         for key, sg in doc.spans.items(): | ||||
|             if not key.startswith(prefix): | ||||
|                 continue | ||||
|             for span in sg: | ||||
|                 # TODO warn if spans are more than one token | ||||
|                 heads.append(span[0].i) | ||||
|         heads = model.ops.asarray2i(heads) | ||||
|         head_ids.append(heads) | ||||
|     # each of these is a list with one entry per doc | ||||
|     # backprop is just a placeholder | ||||
|     # TODO it would probably be better to have a list of tuples than two lists of arrays | ||||
|     return (sent_ids, head_ids), lambda x: [] | ||||
| 
 | ||||
|  |  | |||
|  | @ -84,7 +84,6 @@ def make_coref( | |||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| class CoreferenceResolver(TrainablePipe): | ||||
|     """Pipeline component for coreference resolution. | ||||
| 
 | ||||
|  | @ -318,7 +317,7 @@ class CoreferenceResolver(TrainablePipe): | |||
|             log_marg = ops.softmax(cscores + ops.xp.log(top_gscores), axis=1) | ||||
|         log_norm = ops.softmax(cscores, axis=1) | ||||
|         grad = log_norm - log_marg | ||||
|         #gradients.append((grad, cidx)) | ||||
|         # gradients.append((grad, cidx)) | ||||
|         loss = float((grad**2).sum()) | ||||
| 
 | ||||
|         return loss, grad | ||||
|  | @ -373,5 +372,3 @@ class CoreferenceResolver(TrainablePipe): | |||
|             "coref_r": evaluator.get_recall(), | ||||
|         } | ||||
|         return score | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -46,26 +46,30 @@ depth = 2 | |||
| """ | ||||
| DEFAULT_SPAN_PREDICTOR_MODEL = Config().from_str(default_span_predictor_config)["model"] | ||||
| 
 | ||||
| 
 | ||||
| @Language.factory( | ||||
|         "span_predictor", | ||||
|         assigns=["doc.spans"], | ||||
|         requires=["doc.spans"], | ||||
|         default_config={ | ||||
|             "model": DEFAULT_SPAN_PREDICTOR_MODEL, | ||||
|             "input_prefix": "coref_head_clusters", | ||||
|             "output_prefix": "coref_clusters", | ||||
|             }, | ||||
|     "span_predictor", | ||||
|     assigns=["doc.spans"], | ||||
|     requires=["doc.spans"], | ||||
|     default_config={ | ||||
|         "model": DEFAULT_SPAN_PREDICTOR_MODEL, | ||||
|         "input_prefix": "coref_head_clusters", | ||||
|         "output_prefix": "coref_clusters", | ||||
|     }, | ||||
|     default_score_weights={"span_accuracy": 1.0}, | ||||
|     ) | ||||
| ) | ||||
| def make_span_predictor( | ||||
|         nlp: Language, | ||||
|         name: str, | ||||
|         model, | ||||
|         input_prefix: str = "coref_head_clusters", | ||||
|         output_prefix: str = "coref_clusters", | ||||
|     nlp: Language, | ||||
|     name: str, | ||||
|     model, | ||||
|     input_prefix: str = "coref_head_clusters", | ||||
|     output_prefix: str = "coref_clusters", | ||||
| ) -> "SpanPredictor": | ||||
|     """Create a SpanPredictor component.""" | ||||
|     return SpanPredictor(nlp.vocab, model, name, input_prefix=input_prefix, output_prefix=output_prefix) | ||||
|     return SpanPredictor( | ||||
|         nlp.vocab, model, name, input_prefix=input_prefix, output_prefix=output_prefix | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| class SpanPredictor(TrainablePipe): | ||||
|     """Pipeline component to resolve one-token spans to full spans. | ||||
|  | @ -125,7 +129,7 @@ class SpanPredictor(TrainablePipe): | |||
|     def set_annotations(self, docs: Iterable[Doc], clusters_by_doc) -> None: | ||||
|         for doc, clusters in zip(docs, clusters_by_doc): | ||||
|             for ii, cluster in enumerate(clusters): | ||||
|                 spans = [doc[mm[0]:mm[1]] for mm in cluster] | ||||
|                 spans = [doc[mm[0] : mm[1]] for mm in cluster] | ||||
|                 doc.spans[f"{self.output_prefix}_{ii}"] = spans | ||||
| 
 | ||||
|     def update( | ||||
|  | @ -218,10 +222,10 @@ class SpanPredictor(TrainablePipe): | |||
|             end_probs = ops.softmax(end_scores, axis=1) | ||||
|             start_targets = to_categorical(starts, n_classes) | ||||
|             end_targets = to_categorical(ends, n_classes) | ||||
|             start_grads = (start_probs - start_targets) | ||||
|             end_grads = (end_probs - end_targets) | ||||
|             start_grads = start_probs - start_targets | ||||
|             end_grads = end_probs - end_targets | ||||
|             grads = ops.xp.stack((start_grads, end_grads), axis=2) | ||||
|             loss = float((grads ** 2).sum()) | ||||
|             loss = float((grads**2).sum()) | ||||
|         return loss, grads | ||||
| 
 | ||||
|     def initialize( | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user