From 71a635074486a8721ef30319765cb732bb26e444 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 26 Jan 2021 04:53:43 +0100 Subject: [PATCH] Implement overwrite param for all custom lemmatizers (#6794) --- spacy/lang/bn/__init__.py | 8 +++++--- spacy/lang/el/__init__.py | 8 +++++--- spacy/lang/en/__init__.py | 8 +++++--- spacy/lang/fa/__init__.py | 8 +++++--- spacy/lang/fr/__init__.py | 8 +++++--- spacy/lang/mk/__init__.py | 8 +++++--- spacy/lang/nb/__init__.py | 8 +++++--- spacy/lang/nl/__init__.py | 8 +++++--- spacy/lang/pl/__init__.py | 8 +++++--- spacy/lang/ru/__init__.py | 4 ++-- spacy/lang/sv/__init__.py | 8 +++++--- spacy/lang/uk/__init__.py | 4 ++-- 12 files changed, 54 insertions(+), 34 deletions(-) diff --git a/spacy/lang/bn/__init__.py b/spacy/lang/bn/__init__.py index 879229888..23c3ff485 100644 --- a/spacy/lang/bn/__init__.py +++ b/spacy/lang/bn/__init__.py @@ -23,11 +23,13 @@ class Bengali(Language): @Bengali.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return Lemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Bengali"] diff --git a/spacy/lang/el/__init__.py b/spacy/lang/el/__init__.py index 53069334e..be59a3500 100644 --- a/spacy/lang/el/__init__.py +++ b/spacy/lang/el/__init__.py @@ -28,11 +28,13 @@ class Greek(Language): @Greek.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return GreekLemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return GreekLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Greek"] diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py index 3a3ebeefd..eea522908 100644 --- a/spacy/lang/en/__init__.py +++ b/spacy/lang/en/__init__.py @@ -26,11 +26,13 @@ class English(Language): @English.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return EnglishLemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return EnglishLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["English"] diff --git a/spacy/lang/fa/__init__.py b/spacy/lang/fa/__init__.py index 77ee3bca3..77a0a28b9 100644 --- a/spacy/lang/fa/__init__.py +++ b/spacy/lang/fa/__init__.py @@ -26,11 +26,13 @@ class Persian(Language): @Persian.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return Lemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Persian"] diff --git a/spacy/lang/fr/__init__.py b/spacy/lang/fr/__init__.py index 1e0011fba..d69a5a718 100644 --- a/spacy/lang/fr/__init__.py +++ b/spacy/lang/fr/__init__.py @@ -31,11 +31,13 @@ class French(Language): @French.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return FrenchLemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return FrenchLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["French"] diff --git a/spacy/lang/mk/__init__.py b/spacy/lang/mk/__init__.py index ef2670b4a..2f6097f16 100644 --- a/spacy/lang/mk/__init__.py +++ b/spacy/lang/mk/__init__.py @@ -38,11 +38,13 @@ class Macedonian(Language): @Macedonian.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return MacedonianLemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return MacedonianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Macedonian"] diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py index 62d7707f3..0bfde7d28 100644 --- a/spacy/lang/nb/__init__.py +++ b/spacy/lang/nb/__init__.py @@ -26,11 +26,13 @@ class Norwegian(Language): @Norwegian.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return Lemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Norwegian"] diff --git a/spacy/lang/nl/__init__.py b/spacy/lang/nl/__init__.py index a3591f1bf..7fff3c3d2 100644 --- a/spacy/lang/nl/__init__.py +++ b/spacy/lang/nl/__init__.py @@ -27,11 +27,13 @@ class Dutch(Language): @Dutch.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return DutchLemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return DutchLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Dutch"] diff --git a/spacy/lang/pl/__init__.py b/spacy/lang/pl/__init__.py index 9e7303e83..585e08c60 100644 --- a/spacy/lang/pl/__init__.py +++ b/spacy/lang/pl/__init__.py @@ -33,11 +33,13 @@ class Polish(Language): @Polish.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "pos_lookup"}, + default_config={"model": None, "mode": "pos_lookup", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return PolishLemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return PolishLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Polish"] diff --git a/spacy/lang/ru/__init__.py b/spacy/lang/ru/__init__.py index 2f3965fcc..4287cc288 100644 --- a/spacy/lang/ru/__init__.py +++ b/spacy/lang/ru/__init__.py @@ -22,7 +22,7 @@ class Russian(Language): @Russian.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "pymorphy2"}, + default_config={"model": None, "mode": "pymorphy2", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) def make_lemmatizer( @@ -30,7 +30,7 @@ def make_lemmatizer( model: Optional[Model], name: str, mode: str, - overwrite: bool = False, + overwrite: bool, ): return RussianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) diff --git a/spacy/lang/sv/__init__.py b/spacy/lang/sv/__init__.py index 2490eb9ec..1b1b69fac 100644 --- a/spacy/lang/sv/__init__.py +++ b/spacy/lang/sv/__init__.py @@ -29,11 +29,13 @@ class Swedish(Language): @Swedish.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "rule"}, + default_config={"model": None, "mode": "rule", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) -def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str): - return Lemmatizer(nlp.vocab, model, name, mode=mode) +def make_lemmatizer( + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool +): + return Lemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) __all__ = ["Swedish"] diff --git a/spacy/lang/uk/__init__.py b/spacy/lang/uk/__init__.py index 24c88e5a7..677281ec6 100644 --- a/spacy/lang/uk/__init__.py +++ b/spacy/lang/uk/__init__.py @@ -23,11 +23,11 @@ class Ukrainian(Language): @Ukrainian.factory( "lemmatizer", assigns=["token.lemma"], - default_config={"model": None, "mode": "pymorphy2"}, + default_config={"model": None, "mode": "pymorphy2", "overwrite": False}, default_score_weights={"lemma_acc": 1.0}, ) def make_lemmatizer( - nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False + nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool ): return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)