Implement overwrite param for all custom lemmatizers (#6794)

This commit is contained in:
Adriane Boyd 2021-01-26 04:53:43 +01:00 committed by GitHub
parent 2263bc7b28
commit 71a6350744
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 54 additions and 34 deletions

View File

@ -23,11 +23,13 @@ class Bengali(Language):
@Bengali.factory( @Bengali.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return Lemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Bengali"] __all__ = ["Bengali"]

View File

@ -28,11 +28,13 @@ class Greek(Language):
@Greek.factory( @Greek.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return GreekLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return GreekLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Greek"] __all__ = ["Greek"]

View File

@ -26,11 +26,13 @@ class English(Language):
@English.factory( @English.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return EnglishLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return EnglishLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["English"] __all__ = ["English"]

View File

@ -26,11 +26,13 @@ class Persian(Language):
@Persian.factory( @Persian.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return Lemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Persian"] __all__ = ["Persian"]

View File

@ -31,11 +31,13 @@ class French(Language):
@French.factory( @French.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return FrenchLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return FrenchLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["French"] __all__ = ["French"]

View File

@ -38,11 +38,13 @@ class Macedonian(Language):
@Macedonian.factory( @Macedonian.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return MacedonianLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return MacedonianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Macedonian"] __all__ = ["Macedonian"]

View File

@ -26,11 +26,13 @@ class Norwegian(Language):
@Norwegian.factory( @Norwegian.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return Lemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Norwegian"] __all__ = ["Norwegian"]

View File

@ -27,11 +27,13 @@ class Dutch(Language):
@Dutch.factory( @Dutch.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return DutchLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return DutchLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Dutch"] __all__ = ["Dutch"]

View File

@ -33,11 +33,13 @@ class Polish(Language):
@Polish.factory( @Polish.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "pos_lookup"}, default_config={"model": None, "mode": "pos_lookup", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return PolishLemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return PolishLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Polish"] __all__ = ["Polish"]

View File

@ -22,7 +22,7 @@ class Russian(Language):
@Russian.factory( @Russian.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "pymorphy2"}, default_config={"model": None, "mode": "pymorphy2", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer( def make_lemmatizer(
@ -30,7 +30,7 @@ def make_lemmatizer(
model: Optional[Model], model: Optional[Model],
name: str, name: str,
mode: str, mode: str,
overwrite: bool = False, overwrite: bool,
): ):
return RussianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) return RussianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)

View File

@ -29,11 +29,13 @@ class Swedish(Language):
@Swedish.factory( @Swedish.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "rule"}, default_config={"model": None, "mode": "rule", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): def make_lemmatizer(
return Lemmatizer(nlp.vocab, model, name, mode=mode) nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
):
return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
__all__ = ["Swedish"] __all__ = ["Swedish"]

View File

@ -23,11 +23,11 @@ class Ukrainian(Language):
@Ukrainian.factory( @Ukrainian.factory(
"lemmatizer", "lemmatizer",
assigns=["token.lemma"], assigns=["token.lemma"],
default_config={"model": None, "mode": "pymorphy2"}, default_config={"model": None, "mode": "pymorphy2", "overwrite": False},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer( def make_lemmatizer(
nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool
): ):
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)