mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 05:31:15 +03:00 
			
		
		
		
	Fiddle with sizings for parser
This commit is contained in:
		
							parent
							
								
									e6d71e1778
								
							
						
					
					
						commit
						613ba79e2e
					
				|  | @ -41,23 +41,23 @@ class TokenVectorEncoder(object): | ||||||
|                         Softmax(self.vocab.morphology.n_tags, |                         Softmax(self.vocab.morphology.n_tags, | ||||||
|                                 token_vector_width)) |                                 token_vector_width)) | ||||||
| 
 | 
 | ||||||
|     def build_model(self, lang, width, embed_size=1000, **cfg): |     def build_model(self, lang, width, embed_size=5000, **cfg): | ||||||
|         cols = self.doc2feats.cols |         cols = self.doc2feats.cols | ||||||
|         with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}): |         with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}): | ||||||
|             lower = get_col(cols.index(LOWER))   >> (HashEmbed(width, embed_size*3) |             lower = get_col(cols.index(LOWER))   >> (HashEmbed(width, embed_size) | ||||||
|                                                      +HashEmbed(width, embed_size*3)) |                                                      +HashEmbed(width, embed_size)) | ||||||
|             prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size) |             prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2) | ||||||
|             suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size) |             suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2) | ||||||
|             shape = get_col(cols.index(SHAPE))   >> HashEmbed(width, embed_size) |             shape = get_col(cols.index(SHAPE))   >> HashEmbed(width, embed_size//2) | ||||||
| 
 | 
 | ||||||
|             tok2vec = ( |             tok2vec = ( | ||||||
|                 flatten |                 flatten | ||||||
|                 >> (lower | prefix | suffix | shape ) |                 >> (lower | prefix | suffix | shape ) | ||||||
|                 >> BN(Maxout(width, pieces=3)) |                 >> Maxout(width, pieces=3) | ||||||
|                 >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, width*3))) |                 >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) | ||||||
|                 >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, width*3))) |                 >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) | ||||||
|                 >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, width*3))) |                 >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) | ||||||
|                 >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, width*3))) |                 >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) | ||||||
|             ) |             ) | ||||||
|         return tok2vec |         return tok2vec | ||||||
| 
 | 
 | ||||||
|  | @ -80,7 +80,9 @@ class TokenVectorEncoder(object): | ||||||
|         scores, finish_update = self.tagger.begin_update(feats, drop=drop) |         scores, finish_update = self.tagger.begin_update(feats, drop=drop) | ||||||
|         scores, _ = self.tagger.begin_update(feats, drop=drop) |         scores, _ = self.tagger.begin_update(feats, drop=drop) | ||||||
|         idx = 0 |         idx = 0 | ||||||
|         guesses = scores.argmax(axis=1).get() |         guesses = scores.argmax(axis=1) | ||||||
|  |         if not isinstance(guesses, numpy.ndarray): | ||||||
|  |             guesses = guesses.get() | ||||||
|         for i, doc in enumerate(docs): |         for i, doc in enumerate(docs): | ||||||
|             tag_ids = guesses[idx:idx+len(doc)] |             tag_ids = guesses[idx:idx+len(doc)] | ||||||
|             for j, tag_id in enumerate(tag_ids): |             for j, tag_id in enumerate(tag_ids): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user