mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 05:01:02 +03:00 
			
		
		
		
	* pymorph2 issues #11620, #11626, #11625: - #11620: pymorphy2_lookup - #11626: handle multiple forms pointing to the same normal form + handling empty POS tag - #11625: matching DET that are labelled as PRON by pymorhp2 * Move lemmatizer algorithm changes back into RussianLemmatizer * Fix uk pymorphy3_lookup mode init * Move and update tests for ru/uk lookup lemmatizer modes * Fix typo * Remove traces of previous behavior for uninflected POS * Refactor to private generic-looking pymorphy methods * Remove xfailed uk lemmatizer cases * Update spacy/lang/ru/lemmatizer.py Co-authored-by: Richard Hudson <richard@explosion.ai> Co-authored-by: Dmytro S Lituiev <d.lituiev@gmail.com> Co-authored-by: Richard Hudson <richard@explosion.ai>
		
			
				
	
	
		
			46 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			46 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import Optional, Callable
 | |
| 
 | |
| from thinc.api import Model
 | |
| 
 | |
| from ..ru.lemmatizer import RussianLemmatizer
 | |
| from ...pipeline.lemmatizer import lemmatizer_score
 | |
| from ...vocab import Vocab
 | |
| 
 | |
| 
 | |
| class UkrainianLemmatizer(RussianLemmatizer):
 | |
|     def __init__(
 | |
|         self,
 | |
|         vocab: Vocab,
 | |
|         model: Optional[Model],
 | |
|         name: str = "lemmatizer",
 | |
|         *,
 | |
|         mode: str = "pymorphy3",
 | |
|         overwrite: bool = False,
 | |
|         scorer: Optional[Callable] = lemmatizer_score,
 | |
|     ) -> None:
 | |
|         if mode in {"pymorphy2", "pymorphy2_lookup"}:
 | |
|             try:
 | |
|                 from pymorphy2 import MorphAnalyzer
 | |
|             except ImportError:
 | |
|                 raise ImportError(
 | |
|                     "The Ukrainian lemmatizer mode 'pymorphy2' requires the "
 | |
|                     "pymorphy2 library and dictionaries. Install them with: "
 | |
|                     "pip install pymorphy2 pymorphy2-dicts-uk"
 | |
|                 ) from None
 | |
|             if getattr(self, "_morph", None) is None:
 | |
|                 self._morph = MorphAnalyzer(lang="uk")
 | |
|         elif mode in {"pymorphy3", "pymorphy3_lookup"}:
 | |
|             try:
 | |
|                 from pymorphy3 import MorphAnalyzer
 | |
|             except ImportError:
 | |
|                 raise ImportError(
 | |
|                     "The Ukrainian lemmatizer mode 'pymorphy3' requires the "
 | |
|                     "pymorphy3 library and dictionaries. Install them with: "
 | |
|                     "pip install pymorphy3 pymorphy3-dicts-uk"
 | |
|                 ) from None
 | |
|             if getattr(self, "_morph", None) is None:
 | |
|                 self._morph = MorphAnalyzer(lang="uk")
 | |
|         super().__init__(
 | |
|             vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer
 | |
|         )
 |