mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-24 20:51:30 +03:00 
			
		
		
		
	* pymorph2 issues #11620, #11626, #11625: - #11620: pymorphy2_lookup - #11626: handle multiple forms pointing to the same normal form + handling empty POS tag - #11625: matching DET that are labelled as PRON by pymorhp2 * Move lemmatizer algorithm changes back into RussianLemmatizer * Fix uk pymorphy3_lookup mode init * Move and update tests for ru/uk lookup lemmatizer modes * Fix typo * Remove traces of previous behavior for uninflected POS * Refactor to private generic-looking pymorphy methods * Remove xfailed uk lemmatizer cases * Update spacy/lang/ru/lemmatizer.py Co-authored-by: Richard Hudson <richard@explosion.ai> Co-authored-by: Dmytro S Lituiev <d.lituiev@gmail.com> Co-authored-by: Richard Hudson <richard@explosion.ai>
		
			
				
	
	
		
			28 lines
		
	
	
		
			837 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			28 lines
		
	
	
		
			837 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| from spacy.tokens import Doc
 | |
| 
 | |
| 
 | |
| pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
 | |
| 
 | |
| 
 | |
| def test_uk_lemmatizer(uk_lemmatizer):
 | |
|     """Check that the default uk lemmatizer runs."""
 | |
|     doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
 | |
|     assert uk_lemmatizer.mode == "pymorphy3"
 | |
|     uk_lemmatizer(doc)
 | |
|     assert [token.lemma for token in doc]
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize(
 | |
|     "word,lemma",
 | |
|     (
 | |
|         ("якийсь", "якийсь"),
 | |
|         ("розповідають", "розповідати"),
 | |
|         ("розповіси", "розповісти"),
 | |
|     ),
 | |
| )
 | |
| def test_uk_lookup_lemmatizer(uk_lookup_lemmatizer, word, lemma):
 | |
|     assert uk_lookup_lemmatizer.mode == "pymorphy3_lookup"
 | |
|     doc = Doc(uk_lookup_lemmatizer.vocab, words=[word])
 | |
|     assert uk_lookup_lemmatizer(doc)[0].lemma_ == lemma
 |