mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Fix handling of non-projective deps
This commit is contained in:
		
							parent
							
								
									5738d373d5
								
							
						
					
					
						commit
						025d9bbc37
					
				|  | @ -168,10 +168,14 @@ class GoldCorpus(object): | |||
|             n += 1 | ||||
|         return n | ||||
| 
 | ||||
|     def train_docs(self, nlp, shuffle=0, gold_preproc=True): | ||||
|     def train_docs(self, nlp, shuffle=0, gold_preproc=True, | ||||
|                    projectivize=False): | ||||
|         if shuffle: | ||||
|             random.shuffle(self.train_locs) | ||||
|         gold_docs = self.iter_gold_docs(nlp, self.train_tuples, gold_preproc) | ||||
|         if projectivize: | ||||
|             train_tuples = nonproj.PseudoProjectivity.preprocess_training_data( | ||||
|                                self.train_tuples) | ||||
|         gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc) | ||||
|         if shuffle: | ||||
|             gold_docs = util.itershuffle(gold_docs, bufsize=shuffle*1000) | ||||
|         gold_docs = nlp.preprocess_gold(gold_docs) | ||||
|  | @ -184,7 +188,6 @@ class GoldCorpus(object): | |||
| 
 | ||||
|     @classmethod | ||||
|     def iter_gold_docs(cls, nlp, tuples, gold_preproc=True): | ||||
|         tuples = nonproj.PseudoProjectivity.preprocess_training_data(tuples) | ||||
|         for raw_text, paragraph_tuples in tuples: | ||||
|             docs = cls._make_docs(nlp, raw_text, paragraph_tuples, | ||||
|                                   gold_preproc) | ||||
|  | @ -233,7 +236,7 @@ class GoldCorpus(object): | |||
|         return locs | ||||
| 
 | ||||
| 
 | ||||
| def read_json_file(loc, docs_filter=None, limit=None): | ||||
| def read_json_file(loc, docs_filter=None, limit=1000): | ||||
|     loc = ensure_path(loc) | ||||
|     if loc.is_dir(): | ||||
|         for filename in loc.iterdir(): | ||||
|  |  | |||
|  | @ -330,7 +330,7 @@ cdef class Parser: | |||
| 
 | ||||
|         backprops = [] | ||||
|         cdef float loss = 0. | ||||
|         while todo: | ||||
|         while len(todo) >= 3: | ||||
|             states, golds = zip(*todo) | ||||
| 
 | ||||
|             token_ids = self.get_token_ids(states) | ||||
|  | @ -445,8 +445,6 @@ cdef class Parser: | |||
| 
 | ||||
|     def preprocess_gold(self, docs_golds): | ||||
|         for doc, gold in docs_golds: | ||||
|             gold.heads, gold.labels = PseudoProjectivity.projectivize( | ||||
|                                         gold.heads, gold.labels) | ||||
|             yield doc, gold | ||||
| 
 | ||||
|     def use_params(self, params): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user