Fix formatting

This commit is contained in:
ines 2017-09-25 18:37:13 +02:00
parent 8eb0b7b779
commit d2d35b63b7

View File

@ -48,7 +48,7 @@ from .parts_of_speech import X
class SentenceSegmenter(object):
'''A simple spaCy hook, to allow custom sentence boundary detection logic
"""A simple spaCy hook, to allow custom sentence boundary detection logic
(that doesn't require the dependency parse).
To change the sentence boundary detection strategy, pass a generator
@ -57,7 +57,7 @@ class SentenceSegmenter(object):
Sentence detection strategies should be generators that take `Doc` objects
and yield `Span` objects for each sentence.
'''
"""
name = 'sbd'
def __init__(self, vocab, strategy=None):
@ -89,30 +89,30 @@ class BaseThincComponent(object):
@classmethod
def Model(cls, *shape, **kwargs):
'''Initialize a model for the pipe.'''
"""Initialize a model for the pipe."""
raise NotImplementedError
def __init__(self, vocab, model=True, **cfg):
'''Create a new pipe instance.'''
"""Create a new pipe instance."""
raise NotImplementedError
def __call__(self, doc):
'''Apply the pipe to one document. The document is
"""Apply the pipe to one document. The document is
modified in-place, and returned.
Both __call__ and pipe should delegate to the `predict()`
and `set_annotations()` methods.
'''
"""
scores = self.predict([doc])
self.set_annotations([doc], scores)
return doc
def pipe(self, stream, batch_size=128, n_threads=-1):
'''Apply the pipe to a stream of documents.
"""Apply the pipe to a stream of documents.
Both __call__ and pipe should delegate to the `predict()`
and `set_annotations()` methods.
'''
"""
for docs in cytoolz.partition_all(batch_size, stream):
docs = list(docs)
scores = self.predict(docs)
@ -120,43 +120,43 @@ class BaseThincComponent(object):
yield from docs
def predict(self, docs):
'''Apply the pipeline's model to a batch of docs, without
"""Apply the pipeline's model to a batch of docs, without
modifying them.
'''
"""
raise NotImplementedError
def set_annotations(self, docs, scores):
'''Modify a batch of documents, using pre-computed scores.'''
"""Modify a batch of documents, using pre-computed scores."""
raise NotImplementedError
def update(self, docs, golds, drop=0., sgd=None, losses=None):
'''Learn from a batch of documents and gold-standard information,
"""Learn from a batch of documents and gold-standard information,
updating the pipe's model.
Delegates to predict() and get_loss().
'''
"""
raise NotImplementedError
def get_loss(self, docs, golds, scores):
'''Find the loss and gradient of loss for the batch of
documents and their predicted scores.'''
"""Find the loss and gradient of loss for the batch of
documents and their predicted scores."""
raise NotImplementedError
def begin_training(self, gold_tuples=tuple(), pipeline=None):
'''Initialize the pipe for training, using data exampes if available.
If no model has been initialized yet, the model is added.'''
"""Initialize the pipe for training, using data exampes if available.
If no model has been initialized yet, the model is added."""
if self.model is True:
self.model = self.Model(**self.cfg)
link_vectors_to_models(self.vocab)
def use_params(self, params):
'''Modify the pipe's model, to use the given parameter values.
'''
"""Modify the pipe's model, to use the given parameter values.
"""
with self.model.use_params(params):
yield
def to_bytes(self, **exclude):
'''Serialize the pipe to a bytestring.'''
"""Serialize the pipe to a bytestring."""
serialize = OrderedDict((
('cfg', lambda: json_dumps(self.cfg)),
('model', lambda: self.model.to_bytes()),
@ -165,7 +165,7 @@ class BaseThincComponent(object):
return util.to_bytes(serialize, exclude)
def from_bytes(self, bytes_data, **exclude):
'''Load the pipe from a bytestring.'''
"""Load the pipe from a bytestring."""
def load_model(b):
if self.model is True:
self.cfg['pretrained_dims'] = self.vocab.vectors_length
@ -181,7 +181,7 @@ class BaseThincComponent(object):
return self
def to_disk(self, path, **exclude):
'''Serialize the pipe to disk.'''
"""Serialize the pipe to disk."""
serialize = OrderedDict((
('cfg', lambda p: p.open('w').write(json_dumps(self.cfg))),
('vocab', lambda p: self.vocab.to_disk(p)),
@ -190,7 +190,7 @@ class BaseThincComponent(object):
util.to_disk(path, serialize, exclude)
def from_disk(self, path, **exclude):
'''Load the pipe from disk.'''
"""Load the pipe from disk."""
def load_model(p):
if self.model is True:
self.cfg['pretrained_dims'] = self.vocab.vectors_length
@ -596,7 +596,7 @@ class SimilarityHook(BaseThincComponent):
return Siamese(Pooling(max_pool, mean_pool), CauchySimilarity(length))
def __call__(self, doc):
'''Install similarity hook'''
"""Install similarity hook"""
doc.user_hooks['similarity'] = self.predict
return doc