Auto-format

This commit is contained in:
Ines Montani 2019-04-01 12:11:27 +02:00
parent 5821b020d5
commit c23e234d65
8 changed files with 43 additions and 39 deletions

View File

@ -86,7 +86,7 @@ def with_cpu(ops, model):
as necessary."""
model.to_cpu()
def with_cpu_forward(inputs, drop=0.):
def with_cpu_forward(inputs, drop=0.0):
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
gpu_outputs = _to_device(ops, cpu_outputs)
@ -106,7 +106,7 @@ def _to_cpu(X):
return tuple([_to_cpu(x) for x in X])
elif isinstance(X, list):
return [_to_cpu(x) for x in X]
elif hasattr(X, 'get'):
elif hasattr(X, "get"):
return X.get()
else:
return X
@ -142,7 +142,9 @@ class extract_ngrams(Model):
# The dtype here matches what thinc is expecting -- which differs per
# platform (by int definition). This should be fixed once the problem
# is fixed on Thinc's side.
lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
lengths = self.ops.asarray(
[arr.shape[0] for arr in batch_keys], dtype=numpy.int_
)
batch_keys = self.ops.xp.concatenate(batch_keys)
batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
return (batch_keys, batch_vals, lengths), None
@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg):
)
linear_model = build_bow_text_classifier(
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False)
if cfg.get('exclusive_classes'):
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
)
if cfg.get("exclusive_classes"):
output_layer = Softmax(nr_class, nr_class * 2)
else:
output_layer = (
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0))
>> logistic
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
)
model = (
(linear_model | cnn_model)
>> output_layer
)
model = (linear_model | cnn_model) >> output_layer
model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class
model.lsuv = False
return model
def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
no_output_layer=False, **cfg):
def build_bow_text_classifier(
nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
):
with Model.define_operators({">>": chain}):
model = (
with_cpu(Model.ops,
extract_ngrams(ngram_size, attr=ORTH)
>> LinearModel(nr_class)
)
model = with_cpu(
Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
)
if not no_output_layer:
model = model >> (cpu_softmax if exclusive_classes else logistic)
@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
@layerize
def cpu_softmax(X, drop=0.):
def cpu_softmax(X, drop=0.0):
ops = NumpyOps()
Y = ops.softmax(X)
def cpu_softmax_backward(dY, sgd=None):
return dY
@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
if exclusive_classes:
output_layer = Softmax(nr_class, tok2vec.nO)
else:
output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
output_layer = (
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
)
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class

View File

@ -125,7 +125,9 @@ def pretrain(
max_length=max_length,
min_length=min_length,
)
loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout)
loss = make_update(
model, docs, optimizer, objective=loss_func, drop=dropout
)
progress = tracker.update(epoch, loss, docs)
if progress:
msg.row(progress, **row_settings)
@ -215,8 +217,8 @@ def get_cossim_loss(yh, y):
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
mul_norms = norm_yh * norm_y
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
loss = xp.abs(cosine-1).sum()
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
loss = xp.abs(cosine - 1).sum()
return loss, -d_yh

View File

@ -1,7 +1,7 @@
# coding: utf8
from __future__ import unicode_literals
from ...symbols import LEMMA, PRON_LEMMA, AUX
from ...symbols import LEMMA, PRON_LEMMA
_subordinating_conjunctions = [
"that",
@ -457,7 +457,6 @@ MORPH_RULES = {
"have": {"POS": "AUX"},
"'m": {"POS": "AUX", LEMMA: "be"},
"'ve": {"POS": "AUX"},
"'re": {"POS": "AUX", LEMMA: "be"},
"'s": {"POS": "AUX"},
"is": {"POS": "AUX"},
"'d": {"POS": "AUX"},

View File

@ -1,7 +1,7 @@
# encoding: utf8
from __future__ import unicode_literals
from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX,VERB
from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX, VERB
from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ
# Source: Korakot Chaovavanich
@ -17,8 +17,8 @@ TAG_MAP = {
"CFQC": {POS: NOUN},
"CVBL": {POS: NOUN},
# VERB
"VACT":{POS:VERB},
"VSTA":{POS:VERB},
"VACT": {POS: VERB},
"VSTA": {POS: VERB},
# PRON
"PRON": {POS: PRON},
"NPRP": {POS: PRON},

View File

@ -1,6 +1,8 @@
import pytest
# coding: utf8
from __future__ import unicode_literals
import re
from ... import compat
from spacy import compat
prefix_search = (
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
@ -67,4 +69,4 @@ if compat.is_python2:
# string above in the xpass message.
def test_issue3356():
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
assert not pattern.search(u"hello")
assert not pattern.search("hello")

View File

@ -1,10 +1,14 @@
# coding: utf8
from __future__ import unicode_literals
from spacy.util import decaying
def test_decaying():
sizes = decaying(10., 1., .5)
def test_issue3447():
sizes = decaying(10.0, 1.0, 0.5)
size = next(sizes)
assert size == 10.
assert size == 10.0
size = next(sizes)
assert size == 10. - 0.5
assert size == 10.0 - 0.5
size = next(sizes)
assert size == 10. - 0.5 - 0.5
assert size == 10.0 - 0.5 - 0.5

View File

@ -1,7 +1,6 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
from spacy.lang.en import English
from spacy.tokens import Doc

View File

@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink):
os.mkdir(path2str(symlink_target))
# yield -- need to cleanup even if assertion fails
# https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
def cleanup():
symlink_remove(symlink)
os.rmdir(path2str(symlink_target))