Update ru/uk lemmatizers for new nlp.initialize

This commit is contained in:
Adriane Boyd 2020-10-05 09:26:43 +02:00
parent 549758f67d
commit b0b93854cb
5 changed files with 14 additions and 11 deletions

View File

@ -25,8 +25,14 @@ class Russian(Language):
default_config={"model": None, "mode": "pymorphy2"}, default_config={"model": None, "mode": "pymorphy2"},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return RussianLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language,
model: Optional[Model],
name: str,
mode: str,
overwrite: bool = False,
):
return RussianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Russian"] __all__ = ["Russian"]

View File

@ -2,7 +2,6 @@ from typing import Optional, List, Dict, Tuple
from thinc.api import Model from thinc.api import Model
from ...lookups import Lookups
from ...pipeline import Lemmatizer from ...pipeline import Lemmatizer
from ...symbols import POS from ...symbols import POS
from ...tokens import Token from ...tokens import Token
@ -22,9 +21,9 @@ class RussianLemmatizer(Lemmatizer):
name: str = "lemmatizer", name: str = "lemmatizer",
*, *,
mode: str = "pymorphy2", mode: str = "pymorphy2",
lookups: Optional[Lookups] = None, overwrite: bool = False,
) -> None: ) -> None:
super().__init__(vocab, model, name, mode=mode, lookups=lookups) super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
try: try:
from pymorphy2 import MorphAnalyzer from pymorphy2 import MorphAnalyzer

View File

@ -26,8 +26,8 @@ class Ukrainian(Language):
default_config={"model": None, "mode": "pymorphy2"}, default_config={"model": None, "mode": "pymorphy2"},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False,):
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode) return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Ukrainian"] __all__ = ["Ukrainian"]

View File

@ -3,7 +3,6 @@ from typing import Optional
from thinc.api import Model from thinc.api import Model
from ..ru.lemmatizer import RussianLemmatizer from ..ru.lemmatizer import RussianLemmatizer
from ...lookups import Lookups
from ...vocab import Vocab from ...vocab import Vocab
@ -15,9 +14,9 @@ class UkrainianLemmatizer(RussianLemmatizer):
name: str = "lemmatizer", name: str = "lemmatizer",
*, *,
mode: str = "pymorphy2", mode: str = "pymorphy2",
lookups: Optional[Lookups] = None, overwrite: bool = False,
) -> None: ) -> None:
super().__init__(vocab, model, name, mode=mode, lookups=lookups) super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
try: try:
from pymorphy2 import MorphAnalyzer from pymorphy2 import MorphAnalyzer
except ImportError: except ImportError:

View File

@ -248,7 +248,6 @@ def tt_tokenizer():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def uk_tokenizer(): def uk_tokenizer():
pytest.importorskip("pymorphy2") pytest.importorskip("pymorphy2")
pytest.importorskip("pymorphy2.lang")
return get_lang_class("uk")().tokenizer return get_lang_class("uk")().tokenizer