mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Update paddle example
This commit is contained in:
		
							parent
							
								
									1ef541ddff
								
							
						
					
					
						commit
						d75fe7c19a
					
				| 
						 | 
					@ -1,31 +0,0 @@
 | 
				
			||||||
def write_parameter(outfile, feats):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    From https://github.com/baidu/Paddle/issues/490
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    outfile: Output file name with string type. **Note**, it should be the same as it in the above config.
 | 
					 | 
				
			||||||
    feats: Parameter with float type.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    version = 0
 | 
					 | 
				
			||||||
    value_size  = 4; # means float type
 | 
					 | 
				
			||||||
    ret = b""
 | 
					 | 
				
			||||||
    for feat in feats:
 | 
					 | 
				
			||||||
        ret += feat.tostring()
 | 
					 | 
				
			||||||
    size = len(ret) / 4
 | 
					 | 
				
			||||||
    fo = open(outfile, 'wb')
 | 
					 | 
				
			||||||
    fo.write(struct.pack('iIQ', version, value_size, size))
 | 
					 | 
				
			||||||
    fo.write(ret)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# config=trainer_config.py
 | 
					 | 
				
			||||||
# output=./model_output
 | 
					 | 
				
			||||||
# paddle train --config=$config \
 | 
					 | 
				
			||||||
#              --save_dir=$output \
 | 
					 | 
				
			||||||
#              --job=train \
 | 
					 | 
				
			||||||
#              --use_gpu=false \
 | 
					 | 
				
			||||||
#              --trainer_count=4 \
 | 
					 | 
				
			||||||
#              --num_passes=10 \
 | 
					 | 
				
			||||||
#              --log_period=20 \
 | 
					 | 
				
			||||||
#              --dot_period=20 \
 | 
					 | 
				
			||||||
#              --show_parameter_stats_period=100 \
 | 
					 | 
				
			||||||
#              --test_all_data_in_one_period=1 \
 | 
					 | 
				
			||||||
#              2>&1 | tee 'train.log'
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,6 @@
 | 
				
			||||||
from paddle.trainer.PyDataProvider2 import *
 | 
					from paddle.trainer.PyDataProvider2 import *
 | 
				
			||||||
from itertools import izip
 | 
					from itertools import izip
 | 
				
			||||||
 | 
					import spacy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_features(doc):
 | 
					def get_features(doc):
 | 
				
			||||||
| 
						 | 
					@ -17,7 +18,7 @@ def read_data(data_dir):
 | 
				
			||||||
                yield text, label
 | 
					                yield text, label
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def on_init(settings, lang_name, **kwargs):
 | 
					def on_init(settings, **kwargs):
 | 
				
			||||||
    print("Loading spaCy")
 | 
					    print("Loading spaCy")
 | 
				
			||||||
    nlp = spacy.load('en', entity=False)
 | 
					    nlp = spacy.load('en', entity=False)
 | 
				
			||||||
    vectors = get_vectors(nlp)
 | 
					    vectors = get_vectors(nlp)
 | 
				
			||||||
| 
						 | 
					@ -32,6 +33,7 @@ def on_init(settings, lang_name, **kwargs):
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    settings.nlp = nlp
 | 
					    settings.nlp = nlp
 | 
				
			||||||
    settings.vectors = vectors
 | 
					    settings.vectors = vectors
 | 
				
			||||||
 | 
					    settings['batch_size'] = 32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@provider(init_hook=on_init)
 | 
					@provider(init_hook=on_init)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user