Fix Lemmatizer.get_lookups_config

This commit is contained in:
Ines Montani 2020-10-03 17:16:10 +02:00
parent dd542ec6a4
commit 7c4ab7e82c
5 changed files with 31 additions and 55 deletions

View File

@ -1,4 +1,4 @@
from typing import List, Dict
from typing import List, Tuple
from ...pipeline import Lemmatizer
from ...tokens import Token
@ -15,17 +15,10 @@ class FrenchLemmatizer(Lemmatizer):
"""
@classmethod
def get_lookups_config(cls, mode: str) -> Dict:
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
if mode == "rule":
return {
"required_tables": [
"lemma_lookup",
"lemma_rules",
"lemma_exc",
"lemma_index",
],
"optional_tables": [],
}
required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
return (required, [])
else:
return super().get_lookups_config(mode)

View File

@ -1,4 +1,4 @@
from typing import List, Dict
from typing import List, Tuple
from ...pipeline import Lemmatizer
from ...tokens import Token
@ -6,16 +6,10 @@ from ...tokens import Token
class DutchLemmatizer(Lemmatizer):
@classmethod
def get_lookups_config(cls, mode: str) -> Dict:
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
if mode == "rule":
return {
"required_tables": [
"lemma_lookup",
"lemma_rules",
"lemma_exc",
"lemma_index",
],
}
required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
return (required, [])
else:
return super().get_lookups_config(mode)

View File

@ -1,4 +1,4 @@
from typing import List, Dict
from typing import List, Dict, Tuple
from ...pipeline import Lemmatizer
from ...tokens import Token
@ -11,21 +11,16 @@ class PolishLemmatizer(Lemmatizer):
# lemmatization, as well as case-sensitive lemmatization for nouns.
@classmethod
def get_lookups_config(cls, mode: str) -> Dict:
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
if mode == "pos_lookup":
return {
"required_tables": [
"lemma_lookup_adj",
"lemma_lookup_adp",
"lemma_lookup_adv",
"lemma_lookup_aux",
"lemma_lookup_noun",
"lemma_lookup_num",
"lemma_lookup_part",
"lemma_lookup_pron",
"lemma_lookup_verb",
]
}
# fmt: off
required = [
"lemma_lookup_adj", "lemma_lookup_adp", "lemma_lookup_adv",
"lemma_lookup_aux", "lemma_lookup_noun", "lemma_lookup_num",
"lemma_lookup_part", "lemma_lookup_pron", "lemma_lookup_verb"
]
# fmt: on
return (required, [])
else:
return super().get_lookups_config(mode)

View File

@ -23,8 +23,9 @@ def test_lemmatizer_initialize(lang, capfd):
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
return lookups
lang_cls = get_lang_class(lang)
# Test that languages can be initialized
nlp = get_lang_class(lang)()
nlp = lang_cls()
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
assert not lemmatizer.lookups.tables
nlp.config["initialize"]["components"]["lemmatizer"] = {
@ -41,7 +42,13 @@ def test_lemmatizer_initialize(lang, capfd):
assert doc[0].lemma_ == "y"
# Test initialization by calling .initialize() directly
nlp = get_lang_class(lang)()
nlp = lang_cls()
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
lemmatizer.initialize(lookups=lemmatizer_init_lookups())
assert nlp("x")[0].lemma_ == "y"
# Test lookups config format
for mode in ("rule", "lookup", "pos_lookup"):
required, optional = lemmatizer.get_lookups_config(mode)
assert isinstance(required, list)
assert isinstance(optional, list)

View File

@ -190,23 +190,10 @@ lemmatization entirely.
Returns the lookups configuration settings for a given mode for use in
[`Lemmatizer.load_lookups`](/api/lemmatizer#load_lookups).
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `mode` | The lemmatizer mode. ~~str~~ |
| **RETURNS** | The lookups configuration settings for this mode. Includes the keys `"required_tables"` and `"optional_tables"`, mapped to a list of table string names. ~~Dict[str, List[str]]~~ |
## Lemmatizer.load_lookups {#load_lookups tag="classmethod"}
Load and validate lookups tables. If the provided lookups is `None`, load the
default lookups tables according to the language and mode settings. Confirm that
all required tables for the language and mode are present.
| Name | Description |
| ----------- | -------------------------------------------------------------------------------------------------- |
| `lang` | The language. ~~str~~ |
| `mode` | The lemmatizer mode. ~~str~~ |
| `lookups` | The provided lookups, may be `None` if the default lookups should be loaded. ~~Optional[Lookups]~~ |
| **RETURNS** | The lookups. ~~Lookups~~ |
| Name | Description |
| ----------- | -------------------------------------------------------------------------------------- |
| `mode` | The lemmatizer mode. ~~str~~ |
| **RETURNS** | The required table names and the optional table names. ~~Tuple[List[str], List[str]]~~ |
## Lemmatizer.to_disk {#to_disk tag="method"}