mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Pass option for pre-trained vectors in parser
This commit is contained in:
		
							parent
							
								
									8665a77f48
								
							
						
					
					
						commit
						5ff2491f24
					
				| 
						 | 
					@ -245,7 +245,7 @@ cdef class Parser:
 | 
				
			||||||
        parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2)
 | 
					        parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2)
 | 
				
			||||||
        embed_size = util.env_opt('embed_size', 4000)
 | 
					        embed_size = util.env_opt('embed_size', 4000)
 | 
				
			||||||
        tensors = fine_tune(Tok2Vec(token_vector_width, embed_size,
 | 
					        tensors = fine_tune(Tok2Vec(token_vector_width, embed_size,
 | 
				
			||||||
                                    preprocess=doc2feats()))
 | 
					                                    pretrained_dims=cfg.get('pretrained_dims')))
 | 
				
			||||||
        if parser_maxout_pieces == 1:
 | 
					        if parser_maxout_pieces == 1:
 | 
				
			||||||
            lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
 | 
					            lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
 | 
				
			||||||
                        nF=cls.nr_feature,
 | 
					                        nF=cls.nr_feature,
 | 
				
			||||||
| 
						 | 
					@ -391,9 +391,10 @@ cdef class Parser:
 | 
				
			||||||
        if isinstance(tokvecses, np.ndarray):
 | 
					        if isinstance(tokvecses, np.ndarray):
 | 
				
			||||||
            tokvecses = [tokvecses]
 | 
					            tokvecses = [tokvecses]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tokvecs = self.model[0].ops.flatten(tokvecses)
 | 
					 | 
				
			||||||
        if USE_FINE_TUNE:
 | 
					        if USE_FINE_TUNE:
 | 
				
			||||||
            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
 | 
					            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            tokvecs = self.model[0].ops.flatten(tokvecses)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        nr_state = len(docs)
 | 
					        nr_state = len(docs)
 | 
				
			||||||
        nr_class = self.moves.n_moves
 | 
					        nr_class = self.moves.n_moves
 | 
				
			||||||
| 
						 | 
					@ -451,9 +452,10 @@ cdef class Parser:
 | 
				
			||||||
        cdef Doc doc
 | 
					        cdef Doc doc
 | 
				
			||||||
        cdef int nr_class = self.moves.n_moves
 | 
					        cdef int nr_class = self.moves.n_moves
 | 
				
			||||||
        cdef StateClass stcls, output
 | 
					        cdef StateClass stcls, output
 | 
				
			||||||
        tokvecs = self.model[0].ops.flatten(tokvecses)
 | 
					 | 
				
			||||||
        if USE_FINE_TUNE:
 | 
					        if USE_FINE_TUNE:
 | 
				
			||||||
            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
 | 
					            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            tokvecs = self.model[0].ops.flatten(tokvecses)
 | 
				
			||||||
        cuda_stream = get_cuda_stream()
 | 
					        cuda_stream = get_cuda_stream()
 | 
				
			||||||
        state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
 | 
					        state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
 | 
				
			||||||
                                                     cuda_stream, 0.0)
 | 
					                                                     cuda_stream, 0.0)
 | 
				
			||||||
| 
						 | 
					@ -533,6 +535,8 @@ cdef class Parser:
 | 
				
			||||||
        if USE_FINE_TUNE:
 | 
					        if USE_FINE_TUNE:
 | 
				
			||||||
            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
 | 
					            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
 | 
				
			||||||
            tokvecs = self.model[0].ops.flatten(my_tokvecs)
 | 
					            tokvecs = self.model[0].ops.flatten(my_tokvecs)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            tokvecs = self.model[0].ops.flatten(docs_tokvecs[1])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        cuda_stream = get_cuda_stream()
 | 
					        cuda_stream = get_cuda_stream()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -603,11 +607,11 @@ cdef class Parser:
 | 
				
			||||||
        docs, tokvecs = docs_tokvecs
 | 
					        docs, tokvecs = docs_tokvecs
 | 
				
			||||||
        lengths = [len(d) for d in docs]
 | 
					        lengths = [len(d) for d in docs]
 | 
				
			||||||
        assert min(lengths) >= 1
 | 
					        assert min(lengths) >= 1
 | 
				
			||||||
        tokvecs = self.model[0].ops.flatten(tokvecs)
 | 
					 | 
				
			||||||
        if USE_FINE_TUNE:
 | 
					        if USE_FINE_TUNE:
 | 
				
			||||||
            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
 | 
					            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
 | 
				
			||||||
            tokvecs += self.model[0].ops.flatten(my_tokvecs)
 | 
					            tokvecs += self.model[0].ops.flatten(my_tokvecs)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            tokvecs = self.model[0].ops.flatten(tokvecs)
 | 
				
			||||||
        states = self.moves.init_batch(docs)
 | 
					        states = self.moves.init_batch(docs)
 | 
				
			||||||
        for gold in golds:
 | 
					        for gold in golds:
 | 
				
			||||||
            self.moves.preprocess_gold(gold)
 | 
					            self.moves.preprocess_gold(gold)
 | 
				
			||||||
| 
						 | 
					@ -775,6 +779,7 @@ cdef class Parser:
 | 
				
			||||||
            for label in labels:
 | 
					            for label in labels:
 | 
				
			||||||
                self.moves.add_action(action, label)
 | 
					                self.moves.add_action(action, label)
 | 
				
			||||||
        if self.model is True:
 | 
					        if self.model is True:
 | 
				
			||||||
 | 
					            cfg['pretrained_dims'] = self.vocab.vectors_length
 | 
				
			||||||
            self.model, cfg = self.Model(self.moves.n_moves, **cfg)
 | 
					            self.model, cfg = self.Model(self.moves.n_moves, **cfg)
 | 
				
			||||||
            self.cfg.update(cfg)
 | 
					            self.cfg.update(cfg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -856,9 +861,11 @@ cdef class Parser:
 | 
				
			||||||
        msg = util.from_bytes(bytes_data, deserializers, exclude)
 | 
					        msg = util.from_bytes(bytes_data, deserializers, exclude)
 | 
				
			||||||
        if 'model' not in exclude:
 | 
					        if 'model' not in exclude:
 | 
				
			||||||
            if self.model is True:
 | 
					            if self.model is True:
 | 
				
			||||||
                self.model, cfg = self.Model(self.moves.n_moves)
 | 
					                self.model, cfg = self.Model(self.moves.n_moves,
 | 
				
			||||||
 | 
					                                    pretrained_dims=self.vocab.vectors_length)
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                cfg = {}
 | 
					                cfg = {}
 | 
				
			||||||
 | 
					            cfg['pretrained_dims'] = self.vocab.vectors_length
 | 
				
			||||||
            if 'tok2vec_model' in msg:
 | 
					            if 'tok2vec_model' in msg:
 | 
				
			||||||
                self.model[0].from_bytes(msg['tok2vec_model'])
 | 
					                self.model[0].from_bytes(msg['tok2vec_model'])
 | 
				
			||||||
            if 'lower_model' in msg:
 | 
					            if 'lower_model' in msg:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user