Merge pull request #6199 from adrianeboyd/bugfix/ru-uk-lemmatizer-init

Update ru/uk lemmatizers for new nlp.initialize
This commit is contained in:
Ines Montani 2020-10-05 11:48:47 +02:00 committed by GitHub
commit 2acbec2d2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 14 additions and 11 deletions

View File

@ -25,8 +25,14 @@ class Russian(Language):
default_config={"model": None, "mode": "pymorphy2"},
default_score_weights={"lemma_acc": 1.0},
)
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
return RussianLemmatizer(nlp.vocab, model, name, mode=mode)
def make_lemmatizer(
nlp: Language,
model: Optional[Model],
name: str,
mode: str,
overwrite: bool = False,
):
return RussianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Russian"]

View File

@ -2,7 +2,6 @@ from typing import Optional, List, Dict, Tuple
from thinc.api import Model
from ...lookups import Lookups
from ...pipeline import Lemmatizer
from ...symbols import POS
from ...tokens import Token
@ -22,9 +21,9 @@ class RussianLemmatizer(Lemmatizer):
name: str = "lemmatizer",
*,
mode: str = "pymorphy2",
lookups: Optional[Lookups] = None,
overwrite: bool = False,
) -> None:
super().__init__(vocab, model, name, mode=mode, lookups=lookups)
super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
try:
from pymorphy2 import MorphAnalyzer

View File

@ -26,8 +26,8 @@ class Ukrainian(Language):
default_config={"model": None, "mode": "pymorphy2"},
default_score_weights={"lemma_acc": 1.0},
)
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str):
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode)
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False,):
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Ukrainian"]

View File

@ -3,7 +3,6 @@ from typing import Optional
from thinc.api import Model
from ..ru.lemmatizer import RussianLemmatizer
from ...lookups import Lookups
from ...vocab import Vocab
@ -15,9 +14,9 @@ class UkrainianLemmatizer(RussianLemmatizer):
name: str = "lemmatizer",
*,
mode: str = "pymorphy2",
lookups: Optional[Lookups] = None,
overwrite: bool = False,
) -> None:
super().__init__(vocab, model, name, mode=mode, lookups=lookups)
super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
try:
from pymorphy2 import MorphAnalyzer
except ImportError:

View File

@ -248,7 +248,6 @@ def tt_tokenizer():
@pytest.fixture(scope="session")
def uk_tokenizer():
pytest.importorskip("pymorphy2")
pytest.importorskip("pymorphy2.lang")
return get_lang_class("uk")().tokenizer