diff --git a/spacy/_ml.py b/spacy/_ml.py index edcaf7c1f..349b88df9 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -86,7 +86,7 @@ def with_cpu(ops, model): as necessary.""" model.to_cpu() - def with_cpu_forward(inputs, drop=0.): + def with_cpu_forward(inputs, drop=0.0): cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop) gpu_outputs = _to_device(ops, cpu_outputs) @@ -106,7 +106,7 @@ def _to_cpu(X): return tuple([_to_cpu(x) for x in X]) elif isinstance(X, list): return [_to_cpu(x) for x in X] - elif hasattr(X, 'get'): + elif hasattr(X, "get"): return X.get() else: return X @@ -142,7 +142,9 @@ class extract_ngrams(Model): # The dtype here matches what thinc is expecting -- which differs per # platform (by int definition). This should be fixed once the problem # is fixed on Thinc's side. - lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_) + lengths = self.ops.asarray( + [arr.shape[0] for arr in batch_keys], dtype=numpy.int_ + ) batch_keys = self.ops.xp.concatenate(batch_keys) batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f") return (batch_keys, batch_vals, lengths), None @@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg): ) linear_model = build_bow_text_classifier( - nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False) - if cfg.get('exclusive_classes'): + nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False + ) + if cfg.get("exclusive_classes"): output_layer = Softmax(nr_class, nr_class * 2) else: output_layer = ( - zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) - >> logistic + zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic ) - model = ( - (linear_model | cnn_model) - >> output_layer - ) + model = (linear_model | cnn_model) >> output_layer model.tok2vec = chain(tok2vec, flatten) model.nO = nr_class model.lsuv = False return model -def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False, - no_output_layer=False, **cfg): +def build_bow_text_classifier( + nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg +): with Model.define_operators({">>": chain}): - model = ( - with_cpu(Model.ops, - extract_ngrams(ngram_size, attr=ORTH) - >> LinearModel(nr_class) - ) + model = with_cpu( + Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class) ) if not no_output_layer: model = model >> (cpu_softmax if exclusive_classes else logistic) @@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False, @layerize -def cpu_softmax(X, drop=0.): +def cpu_softmax(X, drop=0.0): ops = NumpyOps() - Y = ops.softmax(X) - def cpu_softmax_backward(dY, sgd=None): return dY @@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False, if exclusive_classes: output_layer = Softmax(nr_class, tok2vec.nO) else: - output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic + output_layer = ( + zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic + ) model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer model.tok2vec = chain(tok2vec, flatten) model.nO = nr_class diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index e44af8b48..0b316b47c 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -125,7 +125,9 @@ def pretrain( max_length=max_length, min_length=min_length, ) - loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout) + loss = make_update( + model, docs, optimizer, objective=loss_func, drop=dropout + ) progress = tracker.update(epoch, loss, docs) if progress: msg.row(progress, **row_settings) @@ -215,8 +217,8 @@ def get_cossim_loss(yh, y): norm_y = xp.linalg.norm(y, axis=1, keepdims=True) mul_norms = norm_yh * norm_y cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms - d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2)) - loss = xp.abs(cosine-1).sum() + d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2)) + loss = xp.abs(cosine - 1).sum() return loss, -d_yh diff --git a/spacy/lang/en/morph_rules.py b/spacy/lang/en/morph_rules.py index 54a108d53..198182ff0 100644 --- a/spacy/lang/en/morph_rules.py +++ b/spacy/lang/en/morph_rules.py @@ -1,7 +1,7 @@ # coding: utf8 from __future__ import unicode_literals -from ...symbols import LEMMA, PRON_LEMMA, AUX +from ...symbols import LEMMA, PRON_LEMMA _subordinating_conjunctions = [ "that", @@ -457,7 +457,6 @@ MORPH_RULES = { "have": {"POS": "AUX"}, "'m": {"POS": "AUX", LEMMA: "be"}, "'ve": {"POS": "AUX"}, - "'re": {"POS": "AUX", LEMMA: "be"}, "'s": {"POS": "AUX"}, "is": {"POS": "AUX"}, "'d": {"POS": "AUX"}, diff --git a/spacy/lang/th/tag_map.py b/spacy/lang/th/tag_map.py index 6515ffe05..119a2f6a0 100644 --- a/spacy/lang/th/tag_map.py +++ b/spacy/lang/th/tag_map.py @@ -1,7 +1,7 @@ # encoding: utf8 from __future__ import unicode_literals -from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX,VERB +from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX, VERB from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ # Source: Korakot Chaovavanich @@ -17,8 +17,8 @@ TAG_MAP = { "CFQC": {POS: NOUN}, "CVBL": {POS: NOUN}, # VERB - "VACT":{POS:VERB}, - "VSTA":{POS:VERB}, + "VACT": {POS: VERB}, + "VSTA": {POS: VERB}, # PRON "PRON": {POS: PRON}, "NPRP": {POS: PRON}, diff --git a/spacy/tests/regression/test_issue3356.py b/spacy/tests/regression/test_issue3356.py index 4e27055c7..f8d16459c 100644 --- a/spacy/tests/regression/test_issue3356.py +++ b/spacy/tests/regression/test_issue3356.py @@ -1,6 +1,8 @@ -import pytest +# coding: utf8 +from __future__ import unicode_literals + import re -from ... import compat +from spacy import compat prefix_search = ( b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])" @@ -67,4 +69,4 @@ if compat.is_python2: # string above in the xpass message. def test_issue3356(): pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8"))) - assert not pattern.search(u"hello") + assert not pattern.search("hello") diff --git a/spacy/tests/regression/test_issue3447.py b/spacy/tests/regression/test_issue3447.py index bfe71669a..0ca1f9e67 100644 --- a/spacy/tests/regression/test_issue3447.py +++ b/spacy/tests/regression/test_issue3447.py @@ -1,10 +1,14 @@ +# coding: utf8 +from __future__ import unicode_literals + from spacy.util import decaying -def test_decaying(): - sizes = decaying(10., 1., .5) + +def test_issue3447(): + sizes = decaying(10.0, 1.0, 0.5) size = next(sizes) - assert size == 10. + assert size == 10.0 size = next(sizes) - assert size == 10. - 0.5 + assert size == 10.0 - 0.5 size = next(sizes) - assert size == 10. - 0.5 - 0.5 + assert size == 10.0 - 0.5 - 0.5 diff --git a/spacy/tests/regression/test_issue3468.py b/spacy/tests/regression/test_issue3468.py index 02cd01e17..ebbed2640 100644 --- a/spacy/tests/regression/test_issue3468.py +++ b/spacy/tests/regression/test_issue3468.py @@ -1,7 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -import pytest from spacy.lang.en import English from spacy.tokens import Doc diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py index 6472dc7e1..2124cdb81 100644 --- a/spacy/tests/test_misc.py +++ b/spacy/tests/test_misc.py @@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink): os.mkdir(path2str(symlink_target)) # yield -- need to cleanup even if assertion fails # https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240 + def cleanup(): symlink_remove(symlink) os.rmdir(path2str(symlink_target))