mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Auto-format example
This commit is contained in:
		
							parent
							
								
									361554f629
								
							
						
					
					
						commit
						6f1438b5d9
					
				| 
						 | 
				
			
			@ -20,51 +20,48 @@ from spacy.util import minibatch, compounding
 | 
			
		|||
 | 
			
		||||
# training data
 | 
			
		||||
TRAIN_DATA = [
 | 
			
		||||
    ('Who is Shaka Khan?', {
 | 
			
		||||
        'entities': [(7, 17, 'PERSON')]
 | 
			
		||||
    }),
 | 
			
		||||
    ('I like London and Berlin.', {
 | 
			
		||||
        'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
 | 
			
		||||
    })
 | 
			
		||||
    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
 | 
			
		||||
    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@plac.annotations(
 | 
			
		||||
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
 | 
			
		||||
    output_dir=("Optional output directory", "option", "o", Path),
 | 
			
		||||
    n_iter=("Number of training iterations", "option", "n", int))
 | 
			
		||||
    n_iter=("Number of training iterations", "option", "n", int),
 | 
			
		||||
)
 | 
			
		||||
def main(model=None, output_dir=None, n_iter=100):
 | 
			
		||||
    """Load the model, set up the pipeline and train the entity recognizer."""
 | 
			
		||||
    if model is not None:
 | 
			
		||||
        nlp = spacy.load(model)  # load existing spaCy model
 | 
			
		||||
        print("Loaded model '%s'" % model)
 | 
			
		||||
    else:
 | 
			
		||||
        nlp = spacy.blank('en')  # create blank Language class
 | 
			
		||||
        nlp = spacy.blank("en")  # create blank Language class
 | 
			
		||||
        print("Created blank 'en' model")
 | 
			
		||||
 | 
			
		||||
    # create the built-in pipeline components and add them to the pipeline
 | 
			
		||||
    # nlp.create_pipe works for built-ins that are registered with spaCy
 | 
			
		||||
    if 'ner' not in nlp.pipe_names:
 | 
			
		||||
        ner = nlp.create_pipe('ner')
 | 
			
		||||
    if "ner" not in nlp.pipe_names:
 | 
			
		||||
        ner = nlp.create_pipe("ner")
 | 
			
		||||
        nlp.add_pipe(ner, last=True)
 | 
			
		||||
    # otherwise, get it so we can add labels
 | 
			
		||||
    else:
 | 
			
		||||
        ner = nlp.get_pipe('ner')
 | 
			
		||||
        ner = nlp.get_pipe("ner")
 | 
			
		||||
 | 
			
		||||
    # add labels
 | 
			
		||||
    for _, annotations in TRAIN_DATA:
 | 
			
		||||
        for ent in annotations.get('entities'):
 | 
			
		||||
        for ent in annotations.get("entities"):
 | 
			
		||||
            ner.add_label(ent[2])
 | 
			
		||||
 | 
			
		||||
    # get names of other pipes to disable them during training
 | 
			
		||||
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
 | 
			
		||||
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
 | 
			
		||||
    with nlp.disable_pipes(*other_pipes):  # only train NER
 | 
			
		||||
        optimizer = nlp.begin_training()
 | 
			
		||||
        for itn in range(n_iter):
 | 
			
		||||
            random.shuffle(TRAIN_DATA)
 | 
			
		||||
            losses = {}
 | 
			
		||||
            # batch up the examples using spaCy's minibatch
 | 
			
		||||
            batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
 | 
			
		||||
            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
 | 
			
		||||
            for batch in batches:
 | 
			
		||||
                texts, annotations = zip(*batch)
 | 
			
		||||
                nlp.update(
 | 
			
		||||
| 
						 | 
				
			
			@ -72,14 +69,15 @@ def main(model=None, output_dir=None, n_iter=100):
 | 
			
		|||
                    annotations,  # batch of annotations
 | 
			
		||||
                    drop=0.5,  # dropout - make it harder to memorise data
 | 
			
		||||
                    sgd=optimizer,  # callable to update weights
 | 
			
		||||
                    losses=losses)
 | 
			
		||||
            print('Losses', losses)
 | 
			
		||||
                    losses=losses,
 | 
			
		||||
                )
 | 
			
		||||
            print("Losses", losses)
 | 
			
		||||
 | 
			
		||||
    # test the trained model
 | 
			
		||||
    for text, _ in TRAIN_DATA:
 | 
			
		||||
        doc = nlp(text)
 | 
			
		||||
        print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
 | 
			
		||||
        print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
 | 
			
		||||
        print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
 | 
			
		||||
        print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
 | 
			
		||||
 | 
			
		||||
    # save model to output directory
 | 
			
		||||
    if output_dir is not None:
 | 
			
		||||
| 
						 | 
				
			
			@ -94,11 +92,11 @@ def main(model=None, output_dir=None, n_iter=100):
 | 
			
		|||
        nlp2 = spacy.load(output_dir)
 | 
			
		||||
        for text, _ in TRAIN_DATA:
 | 
			
		||||
            doc = nlp2(text)
 | 
			
		||||
            print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
 | 
			
		||||
            print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
 | 
			
		||||
            print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
 | 
			
		||||
            print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    plac.call(main)
 | 
			
		||||
 | 
			
		||||
    # Expected output:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user