mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 21:51:24 +03:00 
			
		
		
		
	Update train_ner_standalone example
This commit is contained in:
		
							parent
							
								
									e514d6aa0a
								
							
						
					
					
						commit
						cbb1fbef80
					
				|  | @ -20,9 +20,10 @@ import plac | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| import random | import random | ||||||
| import json | import json | ||||||
|  | import tqdm | ||||||
|  | 
 | ||||||
| from thinc.neural.optimizers import Adam | from thinc.neural.optimizers import Adam | ||||||
| from thinc.neural.ops import NumpyOps | from thinc.neural.ops import NumpyOps | ||||||
| import tqdm |  | ||||||
| 
 | 
 | ||||||
| from spacy.vocab import Vocab | from spacy.vocab import Vocab | ||||||
| from spacy.pipeline import TokenVectorEncoder, NeuralEntityRecognizer | from spacy.pipeline import TokenVectorEncoder, NeuralEntityRecognizer | ||||||
|  | @ -35,6 +36,7 @@ from spacy.gold import minibatch | ||||||
| from spacy.scorer import Scorer | from spacy.scorer import Scorer | ||||||
| import spacy.util | import spacy.util | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| try: | try: | ||||||
|     unicode |     unicode | ||||||
| except NameError: | except NameError: | ||||||
|  | @ -55,20 +57,17 @@ def init_vocab(): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Pipeline(object): | class Pipeline(object): | ||||||
|     def __init__(self, vocab=None, tokenizer=None, tensorizer=None, entity=None): |     def __init__(self, vocab=None, tokenizer=None, entity=None): | ||||||
|         if vocab is None: |         if vocab is None: | ||||||
|             vocab = init_vocab() |             vocab = init_vocab() | ||||||
|         if tokenizer is None: |         if tokenizer is None: | ||||||
|             tokenizer = Tokenizer(vocab, {}, None, None, None) |             tokenizer = Tokenizer(vocab, {}, None, None, None) | ||||||
|         if tensorizer is None: |  | ||||||
|             tensorizer = TokenVectorEncoder(vocab) |  | ||||||
|         if entity is None: |         if entity is None: | ||||||
|             entity = NeuralEntityRecognizer(vocab) |             entity = NeuralEntityRecognizer(vocab) | ||||||
|         self.vocab = vocab |         self.vocab = vocab | ||||||
|         self.tokenizer = tokenizer |         self.tokenizer = tokenizer | ||||||
|         self.tensorizer = tensorizer |  | ||||||
|         self.entity = entity |         self.entity = entity | ||||||
|         self.pipeline = [tensorizer, self.entity] |         self.pipeline = [self.entity] | ||||||
| 
 | 
 | ||||||
|     def begin_training(self): |     def begin_training(self): | ||||||
|         for model in self.pipeline: |         for model in self.pipeline: | ||||||
|  | @ -102,10 +101,8 @@ class Pipeline(object): | ||||||
|         golds = [self.make_gold(input_, annot) for input_, annot in |         golds = [self.make_gold(input_, annot) for input_, annot in | ||||||
|                  zip(inputs, annots)] |                  zip(inputs, annots)] | ||||||
| 
 | 
 | ||||||
|         tensors, bp_tensors = self.tensorizer.update(docs, golds, drop=drop) |         self.entity.update(docs, golds, drop=drop, | ||||||
|         d_tensors = self.entity.update((docs, tensors), golds, drop=drop, |  | ||||||
|                            sgd=sgd, losses=losses) |                            sgd=sgd, losses=losses) | ||||||
|         bp_tensors(d_tensors, sgd=sgd) |  | ||||||
|         return losses |         return losses | ||||||
| 
 | 
 | ||||||
|     def evaluate(self, examples): |     def evaluate(self, examples): | ||||||
|  | @ -123,7 +120,6 @@ class Pipeline(object): | ||||||
|         elif not path.is_dir(): |         elif not path.is_dir(): | ||||||
|             raise IOError("Can't save pipeline to %s\nNot a directory" % path) |             raise IOError("Can't save pipeline to %s\nNot a directory" % path) | ||||||
|         self.vocab.to_disk(path / 'vocab') |         self.vocab.to_disk(path / 'vocab') | ||||||
|         self.tensorizer.to_disk(path / 'tensorizer') |  | ||||||
|         self.entity.to_disk(path / 'ner') |         self.entity.to_disk(path / 'ner') | ||||||
| 
 | 
 | ||||||
|     def from_disk(self, path): |     def from_disk(self, path): | ||||||
|  | @ -133,7 +129,6 @@ class Pipeline(object): | ||||||
|         if not path.is_dir(): |         if not path.is_dir(): | ||||||
|             raise IOError("Cannot load pipeline from %s\nNot a directory" % path) |             raise IOError("Cannot load pipeline from %s\nNot a directory" % path) | ||||||
|         self.vocab = self.vocab.from_disk(path / 'vocab') |         self.vocab = self.vocab.from_disk(path / 'vocab') | ||||||
|         self.tensorizer = self.tensorizer.from_disk(path / 'tensorizer') |  | ||||||
|         self.entity = self.entity.from_disk(path / 'ner') |         self.entity = self.entity.from_disk(path / 'ner') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user