Auto-format

2025-07-14 18:22:27 +03:00 · 2019-04-01 12:11:27 +02:00 · 2019-04-01 12:11:27 +02:00 · c23e234d65
commit c23e234d65
parent 5821b020d5
8 changed files with 43 additions and 39 deletions
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@ -86,7 +86,7 @@ def with_cpu(ops, model):
    as necessary."""
    model.to_cpu()
-    def with_cpu_forward(inputs, drop=0.):
+    def with_cpu_forward(inputs, drop=0.0):
        cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
        gpu_outputs = _to_device(ops, cpu_outputs)
@ -106,7 +106,7 @@ def _to_cpu(X):
        return tuple([_to_cpu(x) for x in X])
    elif isinstance(X, list):
        return [_to_cpu(x) for x in X]
-    elif hasattr(X, 'get'):
+    elif hasattr(X, "get"):
        return X.get()
    else:
        return X
@ -142,7 +142,9 @@ class extract_ngrams(Model):
        # The dtype here matches what thinc is expecting -- which differs per
        # platform (by int definition). This should be fixed once the problem
        # is fixed on Thinc's side.
-        lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
+        lengths = self.ops.asarray(
            [arr.shape[0] for arr in batch_keys], dtype=numpy.int_
        )
        batch_keys = self.ops.xp.concatenate(batch_keys)
        batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
        return (batch_keys, batch_vals, lengths), None
@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg):
        )
        linear_model = build_bow_text_classifier(
-            nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False)
+            nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
-        if cfg.get('exclusive_classes'):
+        )
        if cfg.get("exclusive_classes"):
            output_layer = Softmax(nr_class, nr_class * 2)
        else:
            output_layer = (
-                zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0))
+                zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
                >> logistic
            )
-        model = (
+        model = (linear_model | cnn_model) >> output_layer
            (linear_model | cnn_model)
            >> output_layer
        )
        model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    model.lsuv = False
    return model
-def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
+def build_bow_text_classifier(
-        no_output_layer=False, **cfg):
+    nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
 ):
    with Model.define_operators({">>": chain}):
-        model = (
+        model = with_cpu(
-            with_cpu(Model.ops,
+            Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
                extract_ngrams(ngram_size, attr=ORTH) 
                >> LinearModel(nr_class)
            )
        )
        if not no_output_layer:
            model = model >> (cpu_softmax if exclusive_classes else logistic)
@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
@layerize
-def cpu_softmax(X, drop=0.):
+def cpu_softmax(X, drop=0.0):
    ops = NumpyOps()
    Y = ops.softmax(X)
    def cpu_softmax_backward(dY, sgd=None):
        return dY
@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
        if exclusive_classes:
            output_layer = Softmax(nr_class, tok2vec.nO)
        else:
-            output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
+            output_layer = (
                zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
            )
        model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
    model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -125,7 +125,9 @@ def pretrain(
                max_length=max_length,
                min_length=min_length,
            )
-            loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout)
+            loss = make_update(
                model, docs, optimizer, objective=loss_func, drop=dropout
            )
            progress = tracker.update(epoch, loss, docs)
            if progress:
                msg.row(progress, **row_settings)
@ -215,8 +217,8 @@ def get_cossim_loss(yh, y):
    norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
    mul_norms = norm_yh * norm_y
    cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
-    d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
+    d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
-    loss = xp.abs(cosine-1).sum()
+    loss = xp.abs(cosine - 1).sum()
    return loss, -d_yh
--- a/spacy/lang/en/morph_rules.py
+++ b/spacy/lang/en/morph_rules.py
@ -1,7 +1,7 @@
 # coding: utf8
 from __future__ import unicode_literals
-from ...symbols import LEMMA, PRON_LEMMA, AUX
+from ...symbols import LEMMA, PRON_LEMMA
 _subordinating_conjunctions = [
    "that",
@ -457,7 +457,6 @@ MORPH_RULES = {
        "have": {"POS": "AUX"},
        "'m": {"POS": "AUX", LEMMA: "be"},
        "'ve": {"POS": "AUX"},
        "'re": {"POS": "AUX", LEMMA: "be"},
        "'s": {"POS": "AUX"},
        "is": {"POS": "AUX"},
        "'d": {"POS": "AUX"},
--- a/spacy/lang/th/tag_map.py
+++ b/spacy/lang/th/tag_map.py
@ -1,7 +1,7 @@
 # encoding: utf8
 from __future__ import unicode_literals
-from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX,VERB
+from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX, VERB
 from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ
 # Source: Korakot Chaovavanich
@ -17,8 +17,8 @@ TAG_MAP = {
    "CFQC": {POS: NOUN},
    "CVBL": {POS: NOUN},
    # VERB
-    "VACT":{POS:VERB},
+    "VACT": {POS: VERB},
-    "VSTA":{POS:VERB},
+    "VSTA": {POS: VERB},
    # PRON
    "PRON": {POS: PRON},
    "NPRP": {POS: PRON},
--- a/spacy/tests/regression/test_issue3356.py
+++ b/spacy/tests/regression/test_issue3356.py
@ -1,6 +1,8 @@
-import pytest
+# coding: utf8
 from __future__ import unicode_literals
 import re
-from ... import compat
+from spacy import compat
 prefix_search = (
    b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
@ -67,4 +69,4 @@ if compat.is_python2:
    # string above in the xpass message.
    def test_issue3356():
        pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
-        assert not pattern.search(u"hello")
+        assert not pattern.search("hello")
--- a/spacy/tests/regression/test_issue3447.py
+++ b/spacy/tests/regression/test_issue3447.py
@ -1,10 +1,14 @@
 # coding: utf8
 from __future__ import unicode_literals
 from spacy.util import decaying
-def test_decaying():
+
-    sizes = decaying(10., 1., .5)
+def test_issue3447():
    sizes = decaying(10.0, 1.0, 0.5)
    size = next(sizes)
-    assert size == 10.
+    assert size == 10.0
    size = next(sizes)
-    assert size == 10. - 0.5
+    assert size == 10.0 - 0.5
    size = next(sizes)
-    assert size == 10. - 0.5 - 0.5
+    assert size == 10.0 - 0.5 - 0.5
--- a/spacy/tests/regression/test_issue3468.py
+++ b/spacy/tests/regression/test_issue3468.py
@ -1,7 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
 import pytest
 from spacy.lang.en import English
 from spacy.tokens import Doc
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink):
        os.mkdir(path2str(symlink_target))
    # yield -- need to cleanup even if assertion fails
    # https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
    def cleanup():
        symlink_remove(symlink)
        os.rmdir(path2str(symlink_target))