Auto-format

This commit is contained in:
Ines Montani 2019-04-01 12:11:27 +02:00
parent 5821b020d5
commit c23e234d65
8 changed files with 43 additions and 39 deletions

View File

@ -86,7 +86,7 @@ def with_cpu(ops, model):
as necessary.""" as necessary."""
model.to_cpu() model.to_cpu()
def with_cpu_forward(inputs, drop=0.): def with_cpu_forward(inputs, drop=0.0):
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop) cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
gpu_outputs = _to_device(ops, cpu_outputs) gpu_outputs = _to_device(ops, cpu_outputs)
@ -106,7 +106,7 @@ def _to_cpu(X):
return tuple([_to_cpu(x) for x in X]) return tuple([_to_cpu(x) for x in X])
elif isinstance(X, list): elif isinstance(X, list):
return [_to_cpu(x) for x in X] return [_to_cpu(x) for x in X]
elif hasattr(X, 'get'): elif hasattr(X, "get"):
return X.get() return X.get()
else: else:
return X return X
@ -142,7 +142,9 @@ class extract_ngrams(Model):
# The dtype here matches what thinc is expecting -- which differs per # The dtype here matches what thinc is expecting -- which differs per
# platform (by int definition). This should be fixed once the problem # platform (by int definition). This should be fixed once the problem
# is fixed on Thinc's side. # is fixed on Thinc's side.
lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_) lengths = self.ops.asarray(
[arr.shape[0] for arr in batch_keys], dtype=numpy.int_
)
batch_keys = self.ops.xp.concatenate(batch_keys) batch_keys = self.ops.xp.concatenate(batch_keys)
batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f") batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
return (batch_keys, batch_vals, lengths), None return (batch_keys, batch_vals, lengths), None
@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg):
) )
linear_model = build_bow_text_classifier( linear_model = build_bow_text_classifier(
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False) nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
if cfg.get('exclusive_classes'): )
if cfg.get("exclusive_classes"):
output_layer = Softmax(nr_class, nr_class * 2) output_layer = Softmax(nr_class, nr_class * 2)
else: else:
output_layer = ( output_layer = (
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
>> logistic
)
model = (
(linear_model | cnn_model)
>> output_layer
) )
model = (linear_model | cnn_model) >> output_layer
model.tok2vec = chain(tok2vec, flatten) model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class model.nO = nr_class
model.lsuv = False model.lsuv = False
return model return model
def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False, def build_bow_text_classifier(
no_output_layer=False, **cfg): nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
):
with Model.define_operators({">>": chain}): with Model.define_operators({">>": chain}):
model = ( model = with_cpu(
with_cpu(Model.ops, Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
extract_ngrams(ngram_size, attr=ORTH)
>> LinearModel(nr_class)
)
) )
if not no_output_layer: if not no_output_layer:
model = model >> (cpu_softmax if exclusive_classes else logistic) model = model >> (cpu_softmax if exclusive_classes else logistic)
@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
@layerize @layerize
def cpu_softmax(X, drop=0.): def cpu_softmax(X, drop=0.0):
ops = NumpyOps() ops = NumpyOps()
Y = ops.softmax(X)
def cpu_softmax_backward(dY, sgd=None): def cpu_softmax_backward(dY, sgd=None):
return dY return dY
@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
if exclusive_classes: if exclusive_classes:
output_layer = Softmax(nr_class, tok2vec.nO) output_layer = Softmax(nr_class, tok2vec.nO)
else: else:
output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic output_layer = (
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
)
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
model.tok2vec = chain(tok2vec, flatten) model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class model.nO = nr_class

View File

@ -125,7 +125,9 @@ def pretrain(
max_length=max_length, max_length=max_length,
min_length=min_length, min_length=min_length,
) )
loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout) loss = make_update(
model, docs, optimizer, objective=loss_func, drop=dropout
)
progress = tracker.update(epoch, loss, docs) progress = tracker.update(epoch, loss, docs)
if progress: if progress:
msg.row(progress, **row_settings) msg.row(progress, **row_settings)

View File

@ -1,7 +1,7 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...symbols import LEMMA, PRON_LEMMA, AUX from ...symbols import LEMMA, PRON_LEMMA
_subordinating_conjunctions = [ _subordinating_conjunctions = [
"that", "that",
@ -457,7 +457,6 @@ MORPH_RULES = {
"have": {"POS": "AUX"}, "have": {"POS": "AUX"},
"'m": {"POS": "AUX", LEMMA: "be"}, "'m": {"POS": "AUX", LEMMA: "be"},
"'ve": {"POS": "AUX"}, "'ve": {"POS": "AUX"},
"'re": {"POS": "AUX", LEMMA: "be"},
"'s": {"POS": "AUX"}, "'s": {"POS": "AUX"},
"is": {"POS": "AUX"}, "is": {"POS": "AUX"},
"'d": {"POS": "AUX"}, "'d": {"POS": "AUX"},

View File

@ -1,6 +1,8 @@
import pytest # coding: utf8
from __future__ import unicode_literals
import re import re
from ... import compat from spacy import compat
prefix_search = ( prefix_search = (
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])" b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
@ -67,4 +69,4 @@ if compat.is_python2:
# string above in the xpass message. # string above in the xpass message.
def test_issue3356(): def test_issue3356():
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8"))) pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
assert not pattern.search(u"hello") assert not pattern.search("hello")

View File

@ -1,10 +1,14 @@
# coding: utf8
from __future__ import unicode_literals
from spacy.util import decaying from spacy.util import decaying
def test_decaying():
sizes = decaying(10., 1., .5) def test_issue3447():
sizes = decaying(10.0, 1.0, 0.5)
size = next(sizes) size = next(sizes)
assert size == 10. assert size == 10.0
size = next(sizes) size = next(sizes)
assert size == 10. - 0.5 assert size == 10.0 - 0.5
size = next(sizes) size = next(sizes)
assert size == 10. - 0.5 - 0.5 assert size == 10.0 - 0.5 - 0.5

View File

@ -1,7 +1,6 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest
from spacy.lang.en import English from spacy.lang.en import English
from spacy.tokens import Doc from spacy.tokens import Doc

View File

@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink):
os.mkdir(path2str(symlink_target)) os.mkdir(path2str(symlink_target))
# yield -- need to cleanup even if assertion fails # yield -- need to cleanup even if assertion fails
# https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240 # https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
def cleanup(): def cleanup():
symlink_remove(symlink) symlink_remove(symlink)
os.rmdir(path2str(symlink_target)) os.rmdir(path2str(symlink_target))