Tidy up and auto-format [ci skip]

2025-07-15 02:32:37 +03:00 · 2019-08-31 13:39:31 +02:00 · 2019-08-31 13:39:31 +02:00 · dad5621166
commit dad5621166
parent cd90752193
3 changed files with 18 additions and 24 deletions
--- a/examples/keras_parikh_entailment/spacy_hook.py
+++ b/examples/keras_parikh_entailment/spacy_hook.py
@ -12,15 +12,15 @@ class KerasSimilarityShim(object):
    @classmethod
    def load(cls, path, nlp, max_length=100, get_features=None):
-        
+
        if get_features is None:
            get_features = get_word_ids
-            
+
-        with (path / 'config.json').open() as file_:
+        with (path / "config.json").open() as file_:
            model = model_from_json(file_.read())
-        with (path / 'model').open('rb') as file_:
+        with (path / "model").open("rb") as file_:
            weights = pickle.load(file_)
-            
+
        embeddings = get_embeddings(nlp.vocab)
        weights.insert(1, embeddings)
        model.set_weights(weights)
@ -33,8 +33,8 @@ class KerasSimilarityShim(object):
        self.max_length = max_length
    def __call__(self, doc):
-        doc.user_hooks['similarity'] = self.predict
+        doc.user_hooks["similarity"] = self.predict
-        doc.user_span_hooks['similarity'] = self.predict
+        doc.user_span_hooks["similarity"] = self.predict
        return doc
@ -48,24 +48,24 @@ class KerasSimilarityShim(object):
 def get_embeddings(vocab, nr_unk=100):
    # the extra +1 is for a zero vector representing sentence-final padding
-    num_vectors = max(lex.rank for lex in vocab) + 2 
+    num_vectors = max(lex.rank for lex in vocab) + 2
-    
+
    # create random vectors for OOV tokens
    oov = np.random.normal(size=(nr_unk, vocab.vectors_length))
    oov = oov / oov.sum(axis=1, keepdims=True)
-    
+
-    vectors = np.zeros((num_vectors + nr_unk, vocab.vectors_length), dtype='float32')
+    vectors = np.zeros((num_vectors + nr_unk, vocab.vectors_length), dtype="float32")
-    vectors[1:(nr_unk + 1), ] = oov
+    vectors[1 : (nr_unk + 1),] = oov
    for lex in vocab:
        if lex.has_vector and lex.vector_norm > 0:
-            vectors[nr_unk + lex.rank + 1] = lex.vector / lex.vector_norm 
+            vectors[nr_unk + lex.rank + 1] = lex.vector / lex.vector_norm
    return vectors
 def get_word_ids(docs, max_length=100, nr_unk=100):
-    Xs = np.zeros((len(docs), max_length), dtype='int32')
+    Xs = np.zeros((len(docs), max_length), dtype="int32")
-    
+
    for i, doc in enumerate(docs):
        for j, token in enumerate(doc):
            if j == max_length:
--- a/examples/training/rehearsal.py
+++ b/examples/training/rehearsal.py
@ -80,7 +80,7 @@ def main(model_name, unlabelled_loc):
                nlp.rehearse(raw_batch, sgd=optimizer, losses=r_losses)
            print("Losses", losses)
            print("R. Losses", r_losses)
-    print(nlp.get_pipe('ner').model.unseen_classes)
+    print(nlp.get_pipe("ner").model.unseen_classes)
    test_text = "Do you like horses?"
    doc = nlp(test_text)
    print("Entities in '%s'" % test_text)
@ -88,7 +88,5 @@ def main(model_name, unlabelled_loc):
        print(ent.label_, ent.text)
 if __name__ == "__main__":
    plac.call(main)
--- a/examples/training/train_textcat.py
+++ b/examples/training/train_textcat.py
@ -24,7 +24,7 @@ from spacy.util import minibatch, compounding
    output_dir=("Optional output directory", "option", "o", Path),
    n_texts=("Number of texts to train from", "option", "t", int),
    n_iter=("Number of training iterations", "option", "n", int),
-    init_tok2vec=("Pretrained tok2vec weights", "option", "t2v", Path)
+    init_tok2vec=("Pretrained tok2vec weights", "option", "t2v", Path),
 )
 def main(model=None, output_dir=None, n_iter=20, n_texts=2000, init_tok2vec=None):
    if output_dir is not None:
@ -43,11 +43,7 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000, init_tok2vec=None
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if "textcat" not in nlp.pipe_names:
        textcat = nlp.create_pipe(
-            "textcat",
+            "textcat", config={"exclusive_classes": True, "architecture": "simple_cnn"}
            config={
                "exclusive_classes": True,
                "architecture": "simple_cnn",
            }
        )
        nlp.add_pipe(textcat, last=True)
    # otherwise, get it, so we can add labels to it