Auto-format

This commit is contained in:
Ines Montani 2019-04-01 12:11:27 +02:00
parent 5821b020d5
commit c23e234d65
8 changed files with 43 additions and 39 deletions

View File

@ -86,7 +86,7 @@ def with_cpu(ops, model):
as necessary."""
model.to_cpu()
def with_cpu_forward(inputs, drop=0.):
def with_cpu_forward(inputs, drop=0.0):
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
gpu_outputs = _to_device(ops, cpu_outputs)
@ -106,7 +106,7 @@ def _to_cpu(X):
return tuple([_to_cpu(x) for x in X])
elif isinstance(X, list):
return [_to_cpu(x) for x in X]
elif hasattr(X, 'get'):
elif hasattr(X, "get"):
return X.get()
else:
return X
@ -142,7 +142,9 @@ class extract_ngrams(Model):
# The dtype here matches what thinc is expecting -- which differs per
# platform (by int definition). This should be fixed once the problem
# is fixed on Thinc's side.
lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
lengths = self.ops.asarray(
[arr.shape[0] for arr in batch_keys], dtype=numpy.int_
)
batch_keys = self.ops.xp.concatenate(batch_keys)
batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
return (batch_keys, batch_vals, lengths), None
@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg):
)
linear_model = build_bow_text_classifier(
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False)
if cfg.get('exclusive_classes'):
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
)
if cfg.get("exclusive_classes"):
output_layer = Softmax(nr_class, nr_class * 2)
else:
output_layer = (
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0))
>> logistic
)
model = (
(linear_model | cnn_model)
>> output_layer
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
)
model = (linear_model | cnn_model) >> output_layer
model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class
model.lsuv = False
return model
def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
no_output_layer=False, **cfg):
def build_bow_text_classifier(
nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
):
with Model.define_operators({">>": chain}):
model = (
with_cpu(Model.ops,
extract_ngrams(ngram_size, attr=ORTH)
>> LinearModel(nr_class)
)
model = with_cpu(
Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
)
if not no_output_layer:
model = model >> (cpu_softmax if exclusive_classes else logistic)
@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
@layerize
def cpu_softmax(X, drop=0.):
def cpu_softmax(X, drop=0.0):
ops = NumpyOps()
Y = ops.softmax(X)
def cpu_softmax_backward(dY, sgd=None):
return dY
@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
if exclusive_classes:
output_layer = Softmax(nr_class, tok2vec.nO)
else:
output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
output_layer = (
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
)
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class

View File

@ -125,7 +125,9 @@ def pretrain(
max_length=max_length,
min_length=min_length,
)
loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout)
loss = make_update(
model, docs, optimizer, objective=loss_func, drop=dropout
)
progress = tracker.update(epoch, loss, docs)
if progress:
msg.row(progress, **row_settings)

View File

@ -1,7 +1,7 @@
# coding: utf8
from __future__ import unicode_literals
from ...symbols import LEMMA, PRON_LEMMA, AUX
from ...symbols import LEMMA, PRON_LEMMA
_subordinating_conjunctions = [
"that",
@ -457,7 +457,6 @@ MORPH_RULES = {
"have": {"POS": "AUX"},
"'m": {"POS": "AUX", LEMMA: "be"},
"'ve": {"POS": "AUX"},
"'re": {"POS": "AUX", LEMMA: "be"},
"'s": {"POS": "AUX"},
"is": {"POS": "AUX"},
"'d": {"POS": "AUX"},

View File

@ -1,6 +1,8 @@
import pytest
# coding: utf8
from __future__ import unicode_literals
import re
from ... import compat
from spacy import compat
prefix_search = (
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
@ -67,4 +69,4 @@ if compat.is_python2:
# string above in the xpass message.
def test_issue3356():
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
assert not pattern.search(u"hello")
assert not pattern.search("hello")

View File

@ -1,10 +1,14 @@
# coding: utf8
from __future__ import unicode_literals
from spacy.util import decaying
def test_decaying():
sizes = decaying(10., 1., .5)
def test_issue3447():
sizes = decaying(10.0, 1.0, 0.5)
size = next(sizes)
assert size == 10.
assert size == 10.0
size = next(sizes)
assert size == 10. - 0.5
assert size == 10.0 - 0.5
size = next(sizes)
assert size == 10. - 0.5 - 0.5
assert size == 10.0 - 0.5 - 0.5

View File

@ -1,7 +1,6 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
from spacy.lang.en import English
from spacy.tokens import Doc

View File

@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink):
os.mkdir(path2str(symlink_target))
# yield -- need to cleanup even if assertion fails
# https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
def cleanup():
symlink_remove(symlink)
os.rmdir(path2str(symlink_target))