mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	avoid enumerate to avoid long waiting at 0% (#5159)
This commit is contained in:
		
							parent
							
								
									2b14997b68
								
							
						
					
					
						commit
						9cf965c260
					
				|  | @ -479,11 +479,12 @@ def read_el_docs_golds(nlp, entity_file_path, dev, line_ids, kb, labels_discard= | |||
|     if not labels_discard: | ||||
|         labels_discard = [] | ||||
| 
 | ||||
|     texts = [] | ||||
|     entities_list = [] | ||||
|     max_index = max(line_ids) | ||||
| 
 | ||||
|     with entity_file_path.open("r", encoding="utf8") as file: | ||||
|         for i, line in enumerate(file): | ||||
|     with entity_file_path.open("r", encoding="utf8") as _file: | ||||
|         line = _file.readline() | ||||
|         i = 0 | ||||
|         while line and i < max_index: | ||||
|             if i in line_ids: | ||||
|                 example = json.loads(line) | ||||
|                 article_id = example["article_id"] | ||||
|  | @ -493,15 +494,12 @@ def read_el_docs_golds(nlp, entity_file_path, dev, line_ids, kb, labels_discard= | |||
|                 if dev != is_dev(article_id) or not is_valid_article(clean_text): | ||||
|                     continue | ||||
| 
 | ||||
|                 texts.append(clean_text) | ||||
|                 entities_list.append(entities) | ||||
| 
 | ||||
|     docs = nlp.pipe(texts, batch_size=50) | ||||
| 
 | ||||
|     for doc, entities in zip(docs, entities_list): | ||||
|                 doc = nlp(clean_text) | ||||
|                 gold = _get_gold_parse(doc, entities, dev=dev, kb=kb, labels_discard=labels_discard) | ||||
|                 if gold and len(gold.links) > 0: | ||||
|                     yield doc, gold | ||||
|             i += 1 | ||||
|             line = _file.readline() | ||||
| 
 | ||||
| 
 | ||||
| def _get_gold_parse(doc, entities, dev, kb, labels_discard): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user