mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Add docstrings for Pipe API
This commit is contained in:
parent
1d73dec8b1
commit
39f390dba7
|
@ -88,17 +88,30 @@ class BaseThincComponent(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def Model(cls, *shape, **kwargs):
|
def Model(cls, *shape, **kwargs):
|
||||||
|
'''Initialize a model for the pipe.'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def __init__(self, vocab, model=True, **cfg):
|
def __init__(self, vocab, model=True, **cfg):
|
||||||
|
'''Create a new pipe instance.'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def __call__(self, doc):
|
def __call__(self, doc):
|
||||||
|
'''Apply the pipe to one document. The document is
|
||||||
|
modified in-place, and returned.
|
||||||
|
|
||||||
|
Both __call__ and pipe should delegate to the `predict()`
|
||||||
|
and `set_annotations()` methods.
|
||||||
|
'''
|
||||||
scores = self.predict([doc])
|
scores = self.predict([doc])
|
||||||
self.set_annotations([doc], scores)
|
self.set_annotations([doc], scores)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def pipe(self, stream, batch_size=128, n_threads=-1):
|
def pipe(self, stream, batch_size=128, n_threads=-1):
|
||||||
|
'''Apply the pipe to a stream of documents.
|
||||||
|
|
||||||
|
Both __call__ and pipe should delegate to the `predict()`
|
||||||
|
and `set_annotations()` methods.
|
||||||
|
'''
|
||||||
for docs in cytoolz.partition_all(batch_size, stream):
|
for docs in cytoolz.partition_all(batch_size, stream):
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
scores = self.predict(docs)
|
scores = self.predict(docs)
|
||||||
|
@ -106,27 +119,42 @@ class BaseThincComponent(object):
|
||||||
yield from docs
|
yield from docs
|
||||||
|
|
||||||
def predict(self, docs):
|
def predict(self, docs):
|
||||||
|
'''Apply the pipeline's model to a batch of docs, without
|
||||||
|
modifying them.
|
||||||
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def set_annotations(self, docs, scores):
|
def set_annotations(self, docs, scores):
|
||||||
|
'''Modify a batch of documents, using pre-computed scores.'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def update(self, docs_tensors, golds, state=None, drop=0., sgd=None, losses=None):
|
def update(self, docs, golds, drop=0., sgd=None, losses=None):
|
||||||
|
'''Learn from a batch of documents and gold-standard information,
|
||||||
|
updating the pipe's model.
|
||||||
|
|
||||||
|
Delegates to predict() and get_loss().
|
||||||
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_loss(self, docs, golds, scores):
|
def get_loss(self, docs, golds, scores):
|
||||||
|
'''Find the loss and gradient of loss for the batch of
|
||||||
|
documents and their predicted scores.'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def begin_training(self, gold_tuples=tuple(), pipeline=None):
|
def begin_training(self, gold_tuples=tuple(), pipeline=None):
|
||||||
token_vector_width = pipeline[0].model.nO
|
'''Initialize the pipe for training, using data exampes if available.
|
||||||
|
If no model has been initialized yet, the model is added.'''
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
self.model = self.Model(1, token_vector_width)
|
self.model = self.Model(**self.cfg)
|
||||||
|
|
||||||
def use_params(self, params):
|
def use_params(self, params):
|
||||||
|
'''Modify the pipe's model, to use the given parameter values.
|
||||||
|
'''
|
||||||
with self.model.use_params(params):
|
with self.model.use_params(params):
|
||||||
yield
|
yield
|
||||||
|
|
||||||
def to_bytes(self, **exclude):
|
def to_bytes(self, **exclude):
|
||||||
|
'''Serialize the pipe to a bytestring.'''
|
||||||
serialize = OrderedDict((
|
serialize = OrderedDict((
|
||||||
('cfg', lambda: json_dumps(self.cfg)),
|
('cfg', lambda: json_dumps(self.cfg)),
|
||||||
('model', lambda: self.model.to_bytes()),
|
('model', lambda: self.model.to_bytes()),
|
||||||
|
@ -135,6 +163,7 @@ class BaseThincComponent(object):
|
||||||
return util.to_bytes(serialize, exclude)
|
return util.to_bytes(serialize, exclude)
|
||||||
|
|
||||||
def from_bytes(self, bytes_data, **exclude):
|
def from_bytes(self, bytes_data, **exclude):
|
||||||
|
'''Load the pipe from a bytestring.'''
|
||||||
def load_model(b):
|
def load_model(b):
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
self.cfg['pretrained_dims'] = self.vocab.vectors_length
|
self.cfg['pretrained_dims'] = self.vocab.vectors_length
|
||||||
|
@ -150,6 +179,7 @@ class BaseThincComponent(object):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def to_disk(self, path, **exclude):
|
def to_disk(self, path, **exclude):
|
||||||
|
'''Serialize the pipe to disk.'''
|
||||||
serialize = OrderedDict((
|
serialize = OrderedDict((
|
||||||
('cfg', lambda p: p.open('w').write(json_dumps(self.cfg))),
|
('cfg', lambda p: p.open('w').write(json_dumps(self.cfg))),
|
||||||
('model', lambda p: p.open('wb').write(self.model.to_bytes())),
|
('model', lambda p: p.open('wb').write(self.model.to_bytes())),
|
||||||
|
@ -158,6 +188,7 @@ class BaseThincComponent(object):
|
||||||
util.to_disk(path, serialize, exclude)
|
util.to_disk(path, serialize, exclude)
|
||||||
|
|
||||||
def from_disk(self, path, **exclude):
|
def from_disk(self, path, **exclude):
|
||||||
|
'''Load the pipe from disk.'''
|
||||||
def load_model(p):
|
def load_model(p):
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
self.cfg['pretrained_dims'] = self.vocab.vectors_length
|
self.cfg['pretrained_dims'] = self.vocab.vectors_length
|
||||||
|
|
Loading…
Reference in New Issue
Block a user