mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-03 03:40:24 +03:00
Merge 3cc6c50688
into dfe27516d9
This commit is contained in:
commit
afa9707674
|
@ -1,3 +1,4 @@
|
||||||
|
import re
|
||||||
from typing import Callable, Dict, List, Optional, Tuple
|
from typing import Callable, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
from thinc.api import Model
|
from thinc.api import Model
|
||||||
|
@ -7,6 +8,7 @@ from ...pipeline.lemmatizer import lemmatizer_score
|
||||||
from ...symbols import POS
|
from ...symbols import POS
|
||||||
from ...tokens import Token
|
from ...tokens import Token
|
||||||
from ...vocab import Vocab
|
from ...vocab import Vocab
|
||||||
|
from ..char_classes import COMBINING_DIACRITICS
|
||||||
|
|
||||||
PUNCT_RULES = {"«": '"', "»": '"'}
|
PUNCT_RULES = {"«": '"', "»": '"'}
|
||||||
|
|
||||||
|
@ -51,9 +53,10 @@ class RussianLemmatizer(Lemmatizer):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer
|
vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer
|
||||||
)
|
)
|
||||||
|
self._diacritics_re = re.compile(f"[{COMBINING_DIACRITICS}]")
|
||||||
|
|
||||||
def _pymorphy_lemmatize(self, token: Token) -> List[str]:
|
def _pymorphy_lemmatize(self, token: Token) -> List[str]:
|
||||||
string = token.text
|
string = self._diacritics_re.sub("", token.text)
|
||||||
univ_pos = token.pos_
|
univ_pos = token.pos_
|
||||||
morphology = token.morph.to_dict()
|
morphology = token.morph.to_dict()
|
||||||
if univ_pos == "PUNCT":
|
if univ_pos == "PUNCT":
|
||||||
|
|
|
@ -65,6 +65,8 @@ def test_ru_lemmatizer_works_with_different_pos_homonyms(
|
||||||
("гвоздики", "Gender=Masc", "гвоздик"),
|
("гвоздики", "Gender=Masc", "гвоздик"),
|
||||||
("вина", "Gender=Fem", "вина"),
|
("вина", "Gender=Fem", "вина"),
|
||||||
("вина", "Gender=Neut", "вино"),
|
("вина", "Gender=Neut", "вино"),
|
||||||
|
("жену", "Gender=Fem", "жена"),
|
||||||
|
("жену́", "Gender=Fem", "жена"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_ru_lemmatizer_works_with_noun_homonyms(ru_lemmatizer, text, morph, lemma):
|
def test_ru_lemmatizer_works_with_noun_homonyms(ru_lemmatizer, text, morph, lemma):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user