From 7c4ab7e82c5eba0133dee880f5e79d86ec083b13 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 3 Oct 2020 17:16:10 +0200 Subject: [PATCH] Fix Lemmatizer.get_lookups_config --- spacy/lang/fr/lemmatizer.py | 15 ++++----------- spacy/lang/nl/lemmatizer.py | 14 ++++---------- spacy/lang/pl/lemmatizer.py | 25 ++++++++++--------------- spacy/tests/lang/test_lemmatizers.py | 11 +++++++++-- website/docs/api/lemmatizer.md | 21 ++++----------------- 5 files changed, 31 insertions(+), 55 deletions(-) diff --git a/spacy/lang/fr/lemmatizer.py b/spacy/lang/fr/lemmatizer.py index 0dd782cc4..bb5a270ab 100644 --- a/spacy/lang/fr/lemmatizer.py +++ b/spacy/lang/fr/lemmatizer.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Tuple from ...pipeline import Lemmatizer from ...tokens import Token @@ -15,17 +15,10 @@ class FrenchLemmatizer(Lemmatizer): """ @classmethod - def get_lookups_config(cls, mode: str) -> Dict: + def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]: if mode == "rule": - return { - "required_tables": [ - "lemma_lookup", - "lemma_rules", - "lemma_exc", - "lemma_index", - ], - "optional_tables": [], - } + required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"] + return (required, []) else: return super().get_lookups_config(mode) diff --git a/spacy/lang/nl/lemmatizer.py b/spacy/lang/nl/lemmatizer.py index 42b97a862..6c025dcf6 100644 --- a/spacy/lang/nl/lemmatizer.py +++ b/spacy/lang/nl/lemmatizer.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Tuple from ...pipeline import Lemmatizer from ...tokens import Token @@ -6,16 +6,10 @@ from ...tokens import Token class DutchLemmatizer(Lemmatizer): @classmethod - def get_lookups_config(cls, mode: str) -> Dict: + def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]: if mode == "rule": - return { - "required_tables": [ - "lemma_lookup", - "lemma_rules", - "lemma_exc", - "lemma_index", - ], - } + required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"] + return (required, []) else: return super().get_lookups_config(mode) diff --git a/spacy/lang/pl/lemmatizer.py b/spacy/lang/pl/lemmatizer.py index 406ef9e4a..059d0609a 100644 --- a/spacy/lang/pl/lemmatizer.py +++ b/spacy/lang/pl/lemmatizer.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Tuple from ...pipeline import Lemmatizer from ...tokens import Token @@ -11,21 +11,16 @@ class PolishLemmatizer(Lemmatizer): # lemmatization, as well as case-sensitive lemmatization for nouns. @classmethod - def get_lookups_config(cls, mode: str) -> Dict: + def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]: if mode == "pos_lookup": - return { - "required_tables": [ - "lemma_lookup_adj", - "lemma_lookup_adp", - "lemma_lookup_adv", - "lemma_lookup_aux", - "lemma_lookup_noun", - "lemma_lookup_num", - "lemma_lookup_part", - "lemma_lookup_pron", - "lemma_lookup_verb", - ] - } + # fmt: off + required = [ + "lemma_lookup_adj", "lemma_lookup_adp", "lemma_lookup_adv", + "lemma_lookup_aux", "lemma_lookup_noun", "lemma_lookup_num", + "lemma_lookup_part", "lemma_lookup_pron", "lemma_lookup_verb" + ] + # fmt: on + return (required, []) else: return super().get_lookups_config(mode) diff --git a/spacy/tests/lang/test_lemmatizers.py b/spacy/tests/lang/test_lemmatizers.py index 5f45664eb..a49d70d6b 100644 --- a/spacy/tests/lang/test_lemmatizers.py +++ b/spacy/tests/lang/test_lemmatizers.py @@ -23,8 +23,9 @@ def test_lemmatizer_initialize(lang, capfd): lookups.add_table("lemma_rules", {"verb": [["ing", ""]]}) return lookups + lang_cls = get_lang_class(lang) # Test that languages can be initialized - nlp = get_lang_class(lang)() + nlp = lang_cls() lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"}) assert not lemmatizer.lookups.tables nlp.config["initialize"]["components"]["lemmatizer"] = { @@ -41,7 +42,13 @@ def test_lemmatizer_initialize(lang, capfd): assert doc[0].lemma_ == "y" # Test initialization by calling .initialize() directly - nlp = get_lang_class(lang)() + nlp = lang_cls() lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"}) lemmatizer.initialize(lookups=lemmatizer_init_lookups()) assert nlp("x")[0].lemma_ == "y" + + # Test lookups config format + for mode in ("rule", "lookup", "pos_lookup"): + required, optional = lemmatizer.get_lookups_config(mode) + assert isinstance(required, list) + assert isinstance(optional, list) diff --git a/website/docs/api/lemmatizer.md b/website/docs/api/lemmatizer.md index 27ea04432..e838c75b2 100644 --- a/website/docs/api/lemmatizer.md +++ b/website/docs/api/lemmatizer.md @@ -190,23 +190,10 @@ lemmatization entirely. Returns the lookups configuration settings for a given mode for use in [`Lemmatizer.load_lookups`](/api/lemmatizer#load_lookups). -| Name | Description | -| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `mode` | The lemmatizer mode. ~~str~~ | -| **RETURNS** | The lookups configuration settings for this mode. Includes the keys `"required_tables"` and `"optional_tables"`, mapped to a list of table string names. ~~Dict[str, List[str]]~~ | - -## Lemmatizer.load_lookups {#load_lookups tag="classmethod"} - -Load and validate lookups tables. If the provided lookups is `None`, load the -default lookups tables according to the language and mode settings. Confirm that -all required tables for the language and mode are present. - -| Name | Description | -| ----------- | -------------------------------------------------------------------------------------------------- | -| `lang` | The language. ~~str~~ | -| `mode` | The lemmatizer mode. ~~str~~ | -| `lookups` | The provided lookups, may be `None` if the default lookups should be loaded. ~~Optional[Lookups]~~ | -| **RETURNS** | The lookups. ~~Lookups~~ | +| Name | Description | +| ----------- | -------------------------------------------------------------------------------------- | +| `mode` | The lemmatizer mode. ~~str~~ | +| **RETURNS** | The required table names and the optional table names. ~~Tuple[List[str], List[str]]~~ | ## Lemmatizer.to_disk {#to_disk tag="method"}