mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	avoid enumerate to avoid long waiting at 0% (#5159)
This commit is contained in:
		
							parent
							
								
									2b14997b68
								
							
						
					
					
						commit
						9cf965c260
					
				|  | @ -479,11 +479,12 @@ def read_el_docs_golds(nlp, entity_file_path, dev, line_ids, kb, labels_discard= | ||||||
|     if not labels_discard: |     if not labels_discard: | ||||||
|         labels_discard = [] |         labels_discard = [] | ||||||
| 
 | 
 | ||||||
|     texts = [] |     max_index = max(line_ids) | ||||||
|     entities_list = [] |  | ||||||
| 
 | 
 | ||||||
|     with entity_file_path.open("r", encoding="utf8") as file: |     with entity_file_path.open("r", encoding="utf8") as _file: | ||||||
|         for i, line in enumerate(file): |         line = _file.readline() | ||||||
|  |         i = 0 | ||||||
|  |         while line and i < max_index: | ||||||
|             if i in line_ids: |             if i in line_ids: | ||||||
|                 example = json.loads(line) |                 example = json.loads(line) | ||||||
|                 article_id = example["article_id"] |                 article_id = example["article_id"] | ||||||
|  | @ -493,15 +494,12 @@ def read_el_docs_golds(nlp, entity_file_path, dev, line_ids, kb, labels_discard= | ||||||
|                 if dev != is_dev(article_id) or not is_valid_article(clean_text): |                 if dev != is_dev(article_id) or not is_valid_article(clean_text): | ||||||
|                     continue |                     continue | ||||||
| 
 | 
 | ||||||
|                 texts.append(clean_text) |                 doc = nlp(clean_text) | ||||||
|                 entities_list.append(entities) |  | ||||||
| 
 |  | ||||||
|     docs = nlp.pipe(texts, batch_size=50) |  | ||||||
| 
 |  | ||||||
|     for doc, entities in zip(docs, entities_list): |  | ||||||
|                 gold = _get_gold_parse(doc, entities, dev=dev, kb=kb, labels_discard=labels_discard) |                 gold = _get_gold_parse(doc, entities, dev=dev, kb=kb, labels_discard=labels_discard) | ||||||
|                 if gold and len(gold.links) > 0: |                 if gold and len(gold.links) > 0: | ||||||
|                     yield doc, gold |                     yield doc, gold | ||||||
|  |             i += 1 | ||||||
|  |             line = _file.readline() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _get_gold_parse(doc, entities, dev, kb, labels_discard): | def _get_gold_parse(doc, entities, dev, kb, labels_discard): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user