Auto-format example

2025-12-06 01:34:25 +03:00 · 2018-12-17 13:44:38 +01:00 · 2018-12-17 13:44:38 +01:00 · 6f1438b5d9
commit 6f1438b5d9
parent 361554f629
1 changed files with 19 additions and 21 deletions
--- a/examples/training/train_ner.py
+++ b/examples/training/train_ner.py
@ -20,51 +20,48 @@ from spacy.util import minibatch, compounding

 # training data
 TRAIN_DATA = [
-    ('Who is Shaka Khan?', {
-        'entities': [(7, 17, 'PERSON')]
-    }),
-    ('I like London and Berlin.', {
-        'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
-    })
+    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
+    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
 ]


@plac.annotations(
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    output_dir=("Optional output directory", "option", "o", Path),
-    n_iter=("Number of training iterations", "option", "n", int))
+    n_iter=("Number of training iterations", "option", "n", int),
+)
 def main(model=None, output_dir=None, n_iter=100):
    """Load the model, set up the pipeline and train the entity recognizer."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
-        nlp = spacy.blank('en')  # create blank Language class
+        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
-    if 'ner' not in nlp.pipe_names:
-        ner = nlp.create_pipe('ner')
+    if "ner" not in nlp.pipe_names:
+        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner, last=True)
    # otherwise, get it so we can add labels
    else:
-        ner = nlp.get_pipe('ner')
+        ner = nlp.get_pipe("ner")

    # add labels
    for _, annotations in TRAIN_DATA:
-        for ent in annotations.get('entities'):
+        for ent in annotations.get("entities"):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
-    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            # batch up the examples using spaCy's minibatch
-            batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
+            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(
@ -72,14 +69,15 @@ def main(model=None, output_dir=None, n_iter=100):
                    annotations,  # batch of annotations
                    drop=0.5,  # dropout - make it harder to memorise data
                    sgd=optimizer,  # callable to update weights
-                    losses=losses)
-            print('Losses', losses)
+                    losses=losses,
+                )
+            print("Losses", losses)

    # test the trained model
    for text, _ in TRAIN_DATA:
        doc = nlp(text)
-        print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
-        print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
+        print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
+        print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])

    # save model to output directory
    if output_dir is not None:
@ -94,11 +92,11 @@ def main(model=None, output_dir=None, n_iter=100):
        nlp2 = spacy.load(output_dir)
        for text, _ in TRAIN_DATA:
            doc = nlp2(text)
-            print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
-            print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
+            print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
+            print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output: