Auto-format

This commit is contained in:
Ines Montani 2019-04-01 12:11:27 +02:00
parent 5821b020d5
commit c23e234d65
8 changed files with 43 additions and 39 deletions

View File

@ -86,7 +86,7 @@ def with_cpu(ops, model):
as necessary.""" as necessary."""
model.to_cpu() model.to_cpu()
def with_cpu_forward(inputs, drop=0.): def with_cpu_forward(inputs, drop=0.0):
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop) cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
gpu_outputs = _to_device(ops, cpu_outputs) gpu_outputs = _to_device(ops, cpu_outputs)
@ -106,7 +106,7 @@ def _to_cpu(X):
return tuple([_to_cpu(x) for x in X]) return tuple([_to_cpu(x) for x in X])
elif isinstance(X, list): elif isinstance(X, list):
return [_to_cpu(x) for x in X] return [_to_cpu(x) for x in X]
elif hasattr(X, 'get'): elif hasattr(X, "get"):
return X.get() return X.get()
else: else:
return X return X
@ -142,7 +142,9 @@ class extract_ngrams(Model):
# The dtype here matches what thinc is expecting -- which differs per # The dtype here matches what thinc is expecting -- which differs per
# platform (by int definition). This should be fixed once the problem # platform (by int definition). This should be fixed once the problem
# is fixed on Thinc's side. # is fixed on Thinc's side.
lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_) lengths = self.ops.asarray(
[arr.shape[0] for arr in batch_keys], dtype=numpy.int_
)
batch_keys = self.ops.xp.concatenate(batch_keys) batch_keys = self.ops.xp.concatenate(batch_keys)
batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f") batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
return (batch_keys, batch_vals, lengths), None return (batch_keys, batch_vals, lengths), None
@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg):
) )
linear_model = build_bow_text_classifier( linear_model = build_bow_text_classifier(
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False) nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
if cfg.get('exclusive_classes'): )
if cfg.get("exclusive_classes"):
output_layer = Softmax(nr_class, nr_class * 2) output_layer = Softmax(nr_class, nr_class * 2)
else: else:
output_layer = ( output_layer = (
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
>> logistic
) )
model = ( model = (linear_model | cnn_model) >> output_layer
(linear_model | cnn_model)
>> output_layer
)
model.tok2vec = chain(tok2vec, flatten) model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class model.nO = nr_class
model.lsuv = False model.lsuv = False
return model return model
def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False, def build_bow_text_classifier(
no_output_layer=False, **cfg): nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
):
with Model.define_operators({">>": chain}): with Model.define_operators({">>": chain}):
model = ( model = with_cpu(
with_cpu(Model.ops, Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
extract_ngrams(ngram_size, attr=ORTH)
>> LinearModel(nr_class)
)
) )
if not no_output_layer: if not no_output_layer:
model = model >> (cpu_softmax if exclusive_classes else logistic) model = model >> (cpu_softmax if exclusive_classes else logistic)
@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
@layerize @layerize
def cpu_softmax(X, drop=0.): def cpu_softmax(X, drop=0.0):
ops = NumpyOps() ops = NumpyOps()
Y = ops.softmax(X)
def cpu_softmax_backward(dY, sgd=None): def cpu_softmax_backward(dY, sgd=None):
return dY return dY
@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
if exclusive_classes: if exclusive_classes:
output_layer = Softmax(nr_class, tok2vec.nO) output_layer = Softmax(nr_class, tok2vec.nO)
else: else:
output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic output_layer = (
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
)
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
model.tok2vec = chain(tok2vec, flatten) model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class model.nO = nr_class

View File

@ -125,7 +125,9 @@ def pretrain(
max_length=max_length, max_length=max_length,
min_length=min_length, min_length=min_length,
) )
loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout) loss = make_update(
model, docs, optimizer, objective=loss_func, drop=dropout
)
progress = tracker.update(epoch, loss, docs) progress = tracker.update(epoch, loss, docs)
if progress: if progress:
msg.row(progress, **row_settings) msg.row(progress, **row_settings)
@ -215,8 +217,8 @@ def get_cossim_loss(yh, y):
norm_y = xp.linalg.norm(y, axis=1, keepdims=True) norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
mul_norms = norm_yh * norm_y mul_norms = norm_yh * norm_y
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2)) d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
loss = xp.abs(cosine-1).sum() loss = xp.abs(cosine - 1).sum()
return loss, -d_yh return loss, -d_yh

View File

@ -1,7 +1,7 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...symbols import LEMMA, PRON_LEMMA, AUX from ...symbols import LEMMA, PRON_LEMMA
_subordinating_conjunctions = [ _subordinating_conjunctions = [
"that", "that",
@ -457,7 +457,6 @@ MORPH_RULES = {
"have": {"POS": "AUX"}, "have": {"POS": "AUX"},
"'m": {"POS": "AUX", LEMMA: "be"}, "'m": {"POS": "AUX", LEMMA: "be"},
"'ve": {"POS": "AUX"}, "'ve": {"POS": "AUX"},
"'re": {"POS": "AUX", LEMMA: "be"},
"'s": {"POS": "AUX"}, "'s": {"POS": "AUX"},
"is": {"POS": "AUX"}, "is": {"POS": "AUX"},
"'d": {"POS": "AUX"}, "'d": {"POS": "AUX"},

View File

@ -1,7 +1,7 @@
# encoding: utf8 # encoding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX,VERB from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX, VERB
from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ
# Source: Korakot Chaovavanich # Source: Korakot Chaovavanich
@ -17,8 +17,8 @@ TAG_MAP = {
"CFQC": {POS: NOUN}, "CFQC": {POS: NOUN},
"CVBL": {POS: NOUN}, "CVBL": {POS: NOUN},
# VERB # VERB
"VACT":{POS:VERB}, "VACT": {POS: VERB},
"VSTA":{POS:VERB}, "VSTA": {POS: VERB},
# PRON # PRON
"PRON": {POS: PRON}, "PRON": {POS: PRON},
"NPRP": {POS: PRON}, "NPRP": {POS: PRON},

View File

@ -1,6 +1,8 @@
import pytest # coding: utf8
from __future__ import unicode_literals
import re import re
from ... import compat from spacy import compat
prefix_search = ( prefix_search = (
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])" b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
@ -67,4 +69,4 @@ if compat.is_python2:
# string above in the xpass message. # string above in the xpass message.
def test_issue3356(): def test_issue3356():
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8"))) pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
assert not pattern.search(u"hello") assert not pattern.search("hello")

View File

@ -1,10 +1,14 @@
# coding: utf8
from __future__ import unicode_literals
from spacy.util import decaying from spacy.util import decaying
def test_decaying():
sizes = decaying(10., 1., .5) def test_issue3447():
sizes = decaying(10.0, 1.0, 0.5)
size = next(sizes) size = next(sizes)
assert size == 10. assert size == 10.0
size = next(sizes) size = next(sizes)
assert size == 10. - 0.5 assert size == 10.0 - 0.5
size = next(sizes) size = next(sizes)
assert size == 10. - 0.5 - 0.5 assert size == 10.0 - 0.5 - 0.5

View File

@ -1,7 +1,6 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest
from spacy.lang.en import English from spacy.lang.en import English
from spacy.tokens import Doc from spacy.tokens import Doc

View File

@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink):
os.mkdir(path2str(symlink_target)) os.mkdir(path2str(symlink_target))
# yield -- need to cleanup even if assertion fails # yield -- need to cleanup even if assertion fails
# https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240 # https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
def cleanup(): def cleanup():
symlink_remove(symlink) symlink_remove(symlink)
os.rmdir(path2str(symlink_target)) os.rmdir(path2str(symlink_target))