mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Warn in Tagger.begin_training if no lemma tables are available (#4351)
This commit is contained in:
		
							parent
							
								
									bc7e7db208
								
							
						
					
					
						commit
						3297a19545
					
				|  | @ -88,6 +88,13 @@ class Warnings(object): | ||||||
|             "loaded. (Shape: {shape})") |             "loaded. (Shape: {shape})") | ||||||
|     W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be " |     W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be " | ||||||
|             "incorrect. Modify PhraseMatcher._terminal_hash to fix.") |             "incorrect. Modify PhraseMatcher._terminal_hash to fix.") | ||||||
|  |     W022 = ("Training a new part-of-speech tagger using a model with no " | ||||||
|  |             "lemmatization rules or data. This means that the trained model " | ||||||
|  |             "may not be able to lemmatize correctly. If this is intentional " | ||||||
|  |             "or the language you're using doesn't have lemmatization data, " | ||||||
|  |             "you can ignore this warning by setting SPACY_WARNING_IGNORE=W022. " | ||||||
|  |             "If this is surprising, make sure you have the spacy-lookups-data " | ||||||
|  |             "package installed.") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @add_codes | @add_codes | ||||||
|  |  | ||||||
|  | @ -30,7 +30,7 @@ from .._ml import build_text_classifier, build_simple_cnn_text_classifier | ||||||
| from .._ml import build_bow_text_classifier, build_nel_encoder | from .._ml import build_bow_text_classifier, build_nel_encoder | ||||||
| from .._ml import link_vectors_to_models, zero_init, flatten | from .._ml import link_vectors_to_models, zero_init, flatten | ||||||
| from .._ml import masked_language_model, create_default_optimizer | from .._ml import masked_language_model, create_default_optimizer | ||||||
| from ..errors import Errors, TempErrors | from ..errors import Errors, TempErrors, user_warning, Warnings | ||||||
| from .. import util | from .. import util | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -501,6 +501,9 @@ class Tagger(Pipe): | ||||||
| 
 | 
 | ||||||
|     def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, |     def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, | ||||||
|                        **kwargs): |                        **kwargs): | ||||||
|  |         lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"] | ||||||
|  |         if not any(table in self.vocab.lookups for table in lemma_tables): | ||||||
|  |             user_warning(Warnings.W022) | ||||||
|         orig_tag_map = dict(self.vocab.morphology.tag_map) |         orig_tag_map = dict(self.vocab.morphology.tag_map) | ||||||
|         new_tag_map = OrderedDict() |         new_tag_map = OrderedDict() | ||||||
|         for raw_text, annots_brackets in get_gold_tuples(): |         for raw_text, annots_brackets in get_gold_tuples(): | ||||||
|  |  | ||||||
							
								
								
									
										22
									
								
								spacy/tests/pipeline/test_tagger.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								spacy/tests/pipeline/test_tagger.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,22 @@ | ||||||
|  | # coding: utf8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
|  | import pytest | ||||||
|  | from spacy.lang.en import English | ||||||
|  | from spacy.lookups import Lookups | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_tagger_warns_no_lemma_lookups(): | ||||||
|  |     nlp = English() | ||||||
|  |     nlp.vocab.lookups = Lookups() | ||||||
|  |     assert not len(nlp.vocab.lookups) | ||||||
|  |     tagger = nlp.create_pipe("tagger") | ||||||
|  |     with pytest.warns(UserWarning): | ||||||
|  |         tagger.begin_training() | ||||||
|  |     nlp.add_pipe(tagger) | ||||||
|  |     with pytest.warns(UserWarning): | ||||||
|  |         nlp.begin_training() | ||||||
|  |     nlp.vocab.lookups.add_table("lemma_lookup") | ||||||
|  |     with pytest.warns(None) as record: | ||||||
|  |         nlp.begin_training() | ||||||
|  |         assert not record.list | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user