mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* Use isort with Black profile * isort all the things * Fix import cycles as a result of import sorting * Add DOCBIN_ALL_ATTRS type definition * Add isort to requirements * Remove isort from build dependencies check * Typo
		
			
				
	
	
		
			30 lines
		
	
	
		
			970 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			30 lines
		
	
	
		
			970 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import Callable, List, Tuple, Union
 | |
| 
 | |
| from thinc.api import Model, registry
 | |
| from thinc.types import Ints2d
 | |
| 
 | |
| from ..tokens import Doc
 | |
| 
 | |
| 
 | |
| @registry.layers("spacy.FeatureExtractor.v1")
 | |
| def FeatureExtractor(columns: List[Union[int, str]]) -> Model[List[Doc], List[Ints2d]]:
 | |
|     return Model("extract_features", forward, attrs={"columns": columns})
 | |
| 
 | |
| 
 | |
| def forward(
 | |
|     model: Model[List[Doc], List[Ints2d]], docs, is_train: bool
 | |
| ) -> Tuple[List[Ints2d], Callable]:
 | |
|     columns = model.attrs["columns"]
 | |
|     features: List[Ints2d] = []
 | |
|     for doc in docs:
 | |
|         if hasattr(doc, "to_array"):
 | |
|             attrs = doc.to_array(columns)
 | |
|         else:
 | |
|             attrs = doc.doc.to_array(columns)[doc.start : doc.end]
 | |
|         if attrs.ndim == 1:
 | |
|             attrs = attrs.reshape((attrs.shape[0], 1))
 | |
|         features.append(model.ops.asarray2i(attrs, dtype="uint64"))
 | |
| 
 | |
|     backprop: Callable[[List[Ints2d]], List] = lambda d_features: []
 | |
|     return features, backprop
 |