mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Use morph hash in lemmatizer cache key (#7690)
Use the morph hash rather than the `MorphAnalysis` object in the cache key so that the `Lemmatizer` can be pickled.
This commit is contained in:
		
							parent
							
								
									3e5bd5055e
								
							
						
					
					
						commit
						8008e2f75b
					
				| 
						 | 
					@ -175,7 +175,7 @@ class Lemmatizer(Pipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
 | 
					        DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        cache_key = (token.orth, token.pos, token.morph)
 | 
					        cache_key = (token.orth, token.pos, token.morph.key)
 | 
				
			||||||
        if cache_key in self.cache:
 | 
					        if cache_key in self.cache:
 | 
				
			||||||
            return self.cache[cache_key]
 | 
					            return self.cache[cache_key]
 | 
				
			||||||
        string = token.text
 | 
					        string = token.text
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,7 @@
 | 
				
			||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import mock
 | 
					import mock
 | 
				
			||||||
 | 
					import pickle
 | 
				
			||||||
from spacy import util, registry
 | 
					from spacy import util, registry
 | 
				
			||||||
from spacy.lang.en import English
 | 
					from spacy.lang.en import English
 | 
				
			||||||
from spacy.lookups import Lookups
 | 
					from spacy.lookups import Lookups
 | 
				
			||||||
| 
						 | 
					@ -106,6 +107,9 @@ def test_lemmatizer_serialize(nlp):
 | 
				
			||||||
    doc2 = nlp2.make_doc("coping")
 | 
					    doc2 = nlp2.make_doc("coping")
 | 
				
			||||||
    doc2[0].pos_ = "VERB"
 | 
					    doc2[0].pos_ = "VERB"
 | 
				
			||||||
    assert doc2[0].lemma_ == ""
 | 
					    assert doc2[0].lemma_ == ""
 | 
				
			||||||
    doc2 = lemmatizer(doc2)
 | 
					    doc2 = lemmatizer2(doc2)
 | 
				
			||||||
    assert doc2[0].text == "coping"
 | 
					    assert doc2[0].text == "coping"
 | 
				
			||||||
    assert doc2[0].lemma_ == "cope"
 | 
					    assert doc2[0].lemma_ == "cope"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Make sure that lemmatizer cache can be pickled
 | 
				
			||||||
 | 
					    b = pickle.dumps(lemmatizer2)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user