mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			109 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			109 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| from spacy import util, registry
 | |
| from spacy.lang.en import English
 | |
| from spacy.lookups import Lookups
 | |
| 
 | |
| from ..util import make_tempdir
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def nlp():
 | |
|     return English()
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def lemmatizer(nlp):
 | |
|     @registry.assets("cope_lookups")
 | |
|     def cope_lookups():
 | |
|         lookups = Lookups()
 | |
|         lookups.add_table("lemma_lookup", {"cope": "cope"})
 | |
|         lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
 | |
|         lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
 | |
|         lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
 | |
|         return lookups
 | |
| 
 | |
|     lemmatizer = nlp.add_pipe(
 | |
|         "lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}}
 | |
|     )
 | |
|     return lemmatizer
 | |
| 
 | |
| 
 | |
| def test_lemmatizer_init(nlp):
 | |
|     @registry.assets("cope_lookups")
 | |
|     def cope_lookups():
 | |
|         lookups = Lookups()
 | |
|         lookups.add_table("lemma_lookup", {"cope": "cope"})
 | |
|         lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
 | |
|         lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
 | |
|         lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
 | |
|         return lookups
 | |
| 
 | |
|     lemmatizer = nlp.add_pipe(
 | |
|         "lemmatizer", config={"mode": "lookup", "lookups": {"@assets": "cope_lookups"}}
 | |
|     )
 | |
|     assert isinstance(lemmatizer.lookups, Lookups)
 | |
|     assert lemmatizer.mode == "lookup"
 | |
|     # replace any tables from spacy-lookups-data
 | |
|     lemmatizer.lookups = Lookups()
 | |
|     doc = nlp("coping")
 | |
|     # lookup with no tables sets text as lemma
 | |
|     assert doc[0].lemma_ == "coping"
 | |
| 
 | |
|     nlp.remove_pipe("lemmatizer")
 | |
| 
 | |
|     @registry.assets("empty_lookups")
 | |
|     def empty_lookups():
 | |
|         return Lookups()
 | |
| 
 | |
|     with pytest.raises(ValueError):
 | |
|         nlp.add_pipe(
 | |
|             "lemmatizer",
 | |
|             config={"mode": "lookup", "lookups": {"@assets": "empty_lookups"}},
 | |
|         )
 | |
| 
 | |
| 
 | |
| def test_lemmatizer_config(nlp, lemmatizer):
 | |
|     doc = nlp.make_doc("coping")
 | |
|     doc[0].pos_ = "VERB"
 | |
|     assert doc[0].lemma_ == ""
 | |
|     doc = lemmatizer(doc)
 | |
|     assert doc[0].text == "coping"
 | |
|     assert doc[0].lemma_ == "cope"
 | |
| 
 | |
|     doc = nlp.make_doc("coping")
 | |
|     doc[0].pos_ = "VERB"
 | |
|     assert doc[0].lemma_ == ""
 | |
|     doc = lemmatizer(doc)
 | |
|     assert doc[0].text == "coping"
 | |
|     assert doc[0].lemma_ == "cope"
 | |
| 
 | |
| 
 | |
| def test_lemmatizer_serialize(nlp, lemmatizer):
 | |
|     @registry.assets("cope_lookups")
 | |
|     def cope_lookups():
 | |
|         lookups = Lookups()
 | |
|         lookups.add_table("lemma_lookup", {"cope": "cope"})
 | |
|         lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
 | |
|         lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
 | |
|         lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
 | |
|         return lookups
 | |
| 
 | |
|     nlp2 = English()
 | |
|     lemmatizer2 = nlp2.add_pipe(
 | |
|         "lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}}
 | |
|     )
 | |
|     lemmatizer2.from_bytes(lemmatizer.to_bytes())
 | |
|     assert lemmatizer.to_bytes() == lemmatizer2.to_bytes()
 | |
|     assert lemmatizer.lookups.tables == lemmatizer2.lookups.tables
 | |
| 
 | |
|     # Also test the results are still the same after IO
 | |
|     with make_tempdir() as tmp_dir:
 | |
|         nlp.to_disk(tmp_dir)
 | |
|         nlp2 = util.load_model_from_path(tmp_dir)
 | |
|         doc2 = nlp2.make_doc("coping")
 | |
|         doc2[0].pos_ = "VERB"
 | |
|         assert doc2[0].lemma_ == ""
 | |
|         doc2 = lemmatizer(doc2)
 | |
|         assert doc2[0].text == "coping"
 | |
|         assert doc2[0].lemma_ == "cope"
 |