mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Warn in Tagger.begin_training if no lemma tables are available (#4351)
This commit is contained in:
		
							parent
							
								
									bc7e7db208
								
							
						
					
					
						commit
						3297a19545
					
				|  | @ -88,6 +88,13 @@ class Warnings(object): | |||
|             "loaded. (Shape: {shape})") | ||||
|     W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be " | ||||
|             "incorrect. Modify PhraseMatcher._terminal_hash to fix.") | ||||
|     W022 = ("Training a new part-of-speech tagger using a model with no " | ||||
|             "lemmatization rules or data. This means that the trained model " | ||||
|             "may not be able to lemmatize correctly. If this is intentional " | ||||
|             "or the language you're using doesn't have lemmatization data, " | ||||
|             "you can ignore this warning by setting SPACY_WARNING_IGNORE=W022. " | ||||
|             "If this is surprising, make sure you have the spacy-lookups-data " | ||||
|             "package installed.") | ||||
| 
 | ||||
| 
 | ||||
| @add_codes | ||||
|  |  | |||
|  | @ -30,7 +30,7 @@ from .._ml import build_text_classifier, build_simple_cnn_text_classifier | |||
| from .._ml import build_bow_text_classifier, build_nel_encoder | ||||
| from .._ml import link_vectors_to_models, zero_init, flatten | ||||
| from .._ml import masked_language_model, create_default_optimizer | ||||
| from ..errors import Errors, TempErrors | ||||
| from ..errors import Errors, TempErrors, user_warning, Warnings | ||||
| from .. import util | ||||
| 
 | ||||
| 
 | ||||
|  | @ -501,6 +501,9 @@ class Tagger(Pipe): | |||
| 
 | ||||
|     def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, | ||||
|                        **kwargs): | ||||
|         lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"] | ||||
|         if not any(table in self.vocab.lookups for table in lemma_tables): | ||||
|             user_warning(Warnings.W022) | ||||
|         orig_tag_map = dict(self.vocab.morphology.tag_map) | ||||
|         new_tag_map = OrderedDict() | ||||
|         for raw_text, annots_brackets in get_gold_tuples(): | ||||
|  |  | |||
							
								
								
									
										22
									
								
								spacy/tests/pipeline/test_tagger.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								spacy/tests/pipeline/test_tagger.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,22 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import pytest | ||||
| from spacy.lang.en import English | ||||
| from spacy.lookups import Lookups | ||||
| 
 | ||||
| 
 | ||||
| def test_tagger_warns_no_lemma_lookups(): | ||||
|     nlp = English() | ||||
|     nlp.vocab.lookups = Lookups() | ||||
|     assert not len(nlp.vocab.lookups) | ||||
|     tagger = nlp.create_pipe("tagger") | ||||
|     with pytest.warns(UserWarning): | ||||
|         tagger.begin_training() | ||||
|     nlp.add_pipe(tagger) | ||||
|     with pytest.warns(UserWarning): | ||||
|         nlp.begin_training() | ||||
|     nlp.vocab.lookups.add_table("lemma_lookup") | ||||
|     with pytest.warns(None) as record: | ||||
|         nlp.begin_training() | ||||
|         assert not record.list | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user