mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
a22215f427
* move featureextractor from Thinc * Update website/docs/api/architectures.md Co-authored-by: Ines Montani <ines@ines.io> * Update website/docs/api/architectures.md Co-authored-by: Ines Montani <ines@ines.io> Co-authored-by: Ines Montani <ines@ines.io>
26 lines
943 B
Python
26 lines
943 B
Python
from typing import List, Union, Callable, Tuple
|
|
from thinc.types import Ints2d, Doc
|
|
from thinc.api import Model, registry
|
|
|
|
|
|
|
|
@registry.layers("spacy.FeatureExtractor.v1")
|
|
def FeatureExtractor(columns: List[Union[int, str]]) -> Model[List[Doc], List[Ints2d]]:
|
|
return Model("extract_features", forward, attrs={"columns": columns})
|
|
|
|
|
|
def forward(model: Model[List[Doc], List[Ints2d]], docs, is_train: bool) -> Tuple[List[Ints2d], Callable]:
|
|
columns = model.attrs["columns"]
|
|
features: List[Ints2d] = []
|
|
for doc in docs:
|
|
if hasattr(doc, "to_array"):
|
|
attrs = doc.to_array(columns)
|
|
else:
|
|
attrs = doc.doc.to_array(columns)[doc.start : doc.end]
|
|
if attrs.ndim == 1:
|
|
attrs = attrs.reshape((attrs.shape[0], 1))
|
|
features.append(model.ops.asarray2i(attrs, dtype="uint64"))
|
|
|
|
backprop: Callable[[List[Ints2d]], List] = lambda d_features: []
|
|
return features, backprop
|