Merge pull request #6199 from adrianeboyd/bugfix/ru-uk-lemmatizer-init

Update ru/uk lemmatizers for new nlp.initialize
This commit is contained in:
Ines Montani 2020-10-05 11:48:47 +02:00 committed by GitHub
commit 2acbec2d2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 14 additions and 11 deletions

View File

@ -25,8 +25,14 @@ class Russian(Language):
default_config={"model": None, "mode": "pymorphy2"}, default_config={"model": None, "mode": "pymorphy2"},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return RussianLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language,
model: Optional[Model],
name: str,
mode: str,
overwrite: bool = False,
):
return RussianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Russian"] __all__ = ["Russian"]

View File

@ -2,7 +2,6 @@ from typing import Optional, List, Dict, Tuple
from thinc.api import Model from thinc.api import Model
from ...lookups import Lookups
from ...pipeline import Lemmatizer from ...pipeline import Lemmatizer
from ...symbols import POS from ...symbols import POS
from ...tokens import Token from ...tokens import Token
@ -22,9 +21,9 @@ class RussianLemmatizer(Lemmatizer):
name: str = "lemmatizer", name: str = "lemmatizer",
*, *,
mode: str = "pymorphy2", mode: str = "pymorphy2",
lookups: Optional[Lookups] = None, overwrite: bool = False,
) -> None: ) -> None:
super().__init__(vocab, model, name, mode=mode, lookups=lookups) super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
try: try:
from pymorphy2 import MorphAnalyzer from pymorphy2 import MorphAnalyzer

View File

@ -26,8 +26,8 @@ class Ukrainian(Language):
default_config={"model": None, "mode": "pymorphy2"}, default_config={"model": None, "mode": "pymorphy2"},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False,):
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode) return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Ukrainian"] __all__ = ["Ukrainian"]

View File

@ -3,7 +3,6 @@ from typing import Optional
from thinc.api import Model from thinc.api import Model
from ..ru.lemmatizer import RussianLemmatizer from ..ru.lemmatizer import RussianLemmatizer
from ...lookups import Lookups
from ...vocab import Vocab from ...vocab import Vocab
@ -15,9 +14,9 @@ class UkrainianLemmatizer(RussianLemmatizer):
name: str = "lemmatizer", name: str = "lemmatizer",
*, *,
mode: str = "pymorphy2", mode: str = "pymorphy2",
lookups: Optional[Lookups] = None, overwrite: bool = False,
) -> None: ) -> None:
super().__init__(vocab, model, name, mode=mode, lookups=lookups) super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
try: try:
from pymorphy2 import MorphAnalyzer from pymorphy2 import MorphAnalyzer
except ImportError: except ImportError:

View File

@ -248,7 +248,6 @@ def tt_tokenizer():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def uk_tokenizer(): def uk_tokenizer():
pytest.importorskip("pymorphy2") pytest.importorskip("pymorphy2")
pytest.importorskip("pymorphy2.lang")
return get_lang_class("uk")().tokenizer return get_lang_class("uk")().tokenizer