mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	custom error and warning messages
This commit is contained in:
		
							parent
							
								
									791327e3c5
								
							
						
					
					
						commit
						cc9ae28a52
					
				|  | @ -324,18 +324,19 @@ def read_training(nlp, training_dir, dev, limit): | ||||||
|                                         if 5 < sent_length < 100: |                                         if 5 < sent_length < 100: | ||||||
|                                             ents_by_offset[str(ent.start_char) + "_" + str(ent.end_char)] = ent |                                             ents_by_offset[str(ent.start_char) + "_" + str(ent.end_char)] = ent | ||||||
|                                 else: |                                 else: | ||||||
|                                     skip_articles.add(current_article_id) |                                     skip_articles.add(article_id) | ||||||
|                                     current_doc = None |                                     current_doc = None | ||||||
|                         except Exception as e: |                         except Exception as e: | ||||||
|                             print("Problem parsing article", article_id, e) |                             print("Problem parsing article", article_id, e) | ||||||
|                             skip_articles.add(current_article_id) |                             skip_articles.add(article_id) | ||||||
|  |                             raise e | ||||||
| 
 | 
 | ||||||
|                     # repeat checking this condition in case an exception was thrown |                     # repeat checking this condition in case an exception was thrown | ||||||
|                     if current_doc and (current_article_id == article_id): |                     if current_doc and (current_article_id == article_id): | ||||||
|                         found_ent = ents_by_offset.get(start + "_" + end,  None) |                         found_ent = ents_by_offset.get(start + "_" + end,  None) | ||||||
|                         if found_ent: |                         if found_ent: | ||||||
|                             if found_ent.text != alias: |                             if found_ent.text != alias: | ||||||
|                                 skip_articles.add(current_article_id) |                                 skip_articles.add(article_id) | ||||||
|                                 current_doc = None |                                 current_doc = None | ||||||
|                             else: |                             else: | ||||||
|                                 sent = found_ent.sent.as_doc() |                                 sent = found_ent.sent.as_doc() | ||||||
|  |  | ||||||
|  | @ -399,6 +399,9 @@ class Errors(object): | ||||||
|     E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input includes either the " |     E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input includes either the " | ||||||
|             "`text` or `tokens` key. For more info, see the docs:\n" |             "`text` or `tokens` key. For more info, see the docs:\n" | ||||||
|             "https://spacy.io/api/cli#pretrain-jsonl") |             "https://spacy.io/api/cli#pretrain-jsonl") | ||||||
|  |     E139 = ("Knowledge base for component '{name}' not initialized. Did you forget to call set_kb()?") | ||||||
|  |     E140 = ("The list of entities, prior probabilities and entity vectors should be of equal length.") | ||||||
|  |     E141 = ("Entity vectors should be of length {required} instead of the provided {found}.") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @add_codes | @add_codes | ||||||
|  |  | ||||||
							
								
								
									
										16
									
								
								spacy/kb.pyx
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								spacy/kb.pyx
									
									
									
									
									
								
							|  | @ -106,9 +106,9 @@ cdef class KnowledgeBase: | ||||||
|             user_warning(Warnings.W018.format(entity=entity)) |             user_warning(Warnings.W018.format(entity=entity)) | ||||||
|             return |             return | ||||||
| 
 | 
 | ||||||
|  |         # Raise an error if the provided entity vector is not of the correct length | ||||||
|         if len(entity_vector) != self.entity_vector_length: |         if len(entity_vector) != self.entity_vector_length: | ||||||
|             # TODO: proper error |             raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length)) | ||||||
|             raise ValueError("Entity vector length should have been", self.entity_vector_length) |  | ||||||
| 
 | 
 | ||||||
|         vector_index = self.c_add_vector(entity_vector=entity_vector) |         vector_index = self.c_add_vector(entity_vector=entity_vector) | ||||||
| 
 | 
 | ||||||
|  | @ -121,13 +121,8 @@ cdef class KnowledgeBase: | ||||||
|         return entity_hash |         return entity_hash | ||||||
| 
 | 
 | ||||||
|     cpdef set_entities(self, entity_list, prob_list, vector_list): |     cpdef set_entities(self, entity_list, prob_list, vector_list): | ||||||
|         if len(entity_list) != len(prob_list): |         if len(entity_list) != len(prob_list) or len(entity_list) != len(vector_list): | ||||||
|             # TODO: proper error |             raise ValueError(Errors.E140) | ||||||
|             raise ValueError("Entity list and prob list should have the same length") |  | ||||||
| 
 |  | ||||||
|         if len(entity_list) != len(vector_list): |  | ||||||
|             # TODO: proper error |  | ||||||
|             raise ValueError("Entity list and vector list should have the same length") |  | ||||||
| 
 | 
 | ||||||
|         nr_entities = len(entity_list) |         nr_entities = len(entity_list) | ||||||
|         self._entry_index = PreshMap(nr_entities+1) |         self._entry_index = PreshMap(nr_entities+1) | ||||||
|  | @ -138,8 +133,7 @@ cdef class KnowledgeBase: | ||||||
|         while i < nr_entities: |         while i < nr_entities: | ||||||
|             entity_vector = vector_list[i] |             entity_vector = vector_list[i] | ||||||
|             if len(entity_vector) != self.entity_vector_length: |             if len(entity_vector) != self.entity_vector_length: | ||||||
|                 # TODO: proper error |                 raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length)) | ||||||
|                 raise ValueError("Entity vector is", len(entity_vector), "length but should have been", self.entity_vector_length) |  | ||||||
| 
 | 
 | ||||||
|             entity_hash = self.vocab.strings.add(entity_list[i]) |             entity_hash = self.vocab.strings.add(entity_list[i]) | ||||||
|             entry.entity_hash = entity_hash |             entry.entity_hash = entity_hash | ||||||
|  |  | ||||||
|  | @ -1002,7 +1002,7 @@ cdef class DependencyParser(Parser): | ||||||
| 
 | 
 | ||||||
|     @property |     @property | ||||||
|     def postprocesses(self): |     def postprocesses(self): | ||||||
|         return [nonproj.deprojectivize, merge_subtokens] |         return [nonproj.deprojectivize]  # , merge_subtokens] | ||||||
| 
 | 
 | ||||||
|     def add_multitask_objective(self, target): |     def add_multitask_objective(self, target): | ||||||
|         if target == "cloze": |         if target == "cloze": | ||||||
|  | @ -1100,8 +1100,7 @@ class EntityLinker(Pipe): | ||||||
|     def require_kb(self): |     def require_kb(self): | ||||||
|         # Raise an error if the knowledge base is not initialized. |         # Raise an error if the knowledge base is not initialized. | ||||||
|         if getattr(self, "kb", None) in (None, True, False): |         if getattr(self, "kb", None) in (None, True, False): | ||||||
|             # TODO: custom error |             raise ValueError(Errors.E139.format(name=self.name)) | ||||||
|             raise ValueError(Errors.E109.format(name=self.name)) |  | ||||||
| 
 | 
 | ||||||
|     def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, **kwargs): |     def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, **kwargs): | ||||||
|         self.require_kb() |         self.require_kb() | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user