mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Fix Lemmatizer.get_lookups_config
This commit is contained in:
parent
dd542ec6a4
commit
7c4ab7e82c
|
@ -1,4 +1,4 @@
|
|||
from typing import List, Dict
|
||||
from typing import List, Tuple
|
||||
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
@ -15,17 +15,10 @@ class FrenchLemmatizer(Lemmatizer):
|
|||
"""
|
||||
|
||||
@classmethod
|
||||
def get_lookups_config(cls, mode: str) -> Dict:
|
||||
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
|
||||
if mode == "rule":
|
||||
return {
|
||||
"required_tables": [
|
||||
"lemma_lookup",
|
||||
"lemma_rules",
|
||||
"lemma_exc",
|
||||
"lemma_index",
|
||||
],
|
||||
"optional_tables": [],
|
||||
}
|
||||
required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
|
||||
return (required, [])
|
||||
else:
|
||||
return super().get_lookups_config(mode)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import List, Dict
|
||||
from typing import List, Tuple
|
||||
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
@ -6,16 +6,10 @@ from ...tokens import Token
|
|||
|
||||
class DutchLemmatizer(Lemmatizer):
|
||||
@classmethod
|
||||
def get_lookups_config(cls, mode: str) -> Dict:
|
||||
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
|
||||
if mode == "rule":
|
||||
return {
|
||||
"required_tables": [
|
||||
"lemma_lookup",
|
||||
"lemma_rules",
|
||||
"lemma_exc",
|
||||
"lemma_index",
|
||||
],
|
||||
}
|
||||
required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
|
||||
return (required, [])
|
||||
else:
|
||||
return super().get_lookups_config(mode)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import List, Dict
|
||||
from typing import List, Dict, Tuple
|
||||
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
@ -11,21 +11,16 @@ class PolishLemmatizer(Lemmatizer):
|
|||
# lemmatization, as well as case-sensitive lemmatization for nouns.
|
||||
|
||||
@classmethod
|
||||
def get_lookups_config(cls, mode: str) -> Dict:
|
||||
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
|
||||
if mode == "pos_lookup":
|
||||
return {
|
||||
"required_tables": [
|
||||
"lemma_lookup_adj",
|
||||
"lemma_lookup_adp",
|
||||
"lemma_lookup_adv",
|
||||
"lemma_lookup_aux",
|
||||
"lemma_lookup_noun",
|
||||
"lemma_lookup_num",
|
||||
"lemma_lookup_part",
|
||||
"lemma_lookup_pron",
|
||||
"lemma_lookup_verb",
|
||||
# fmt: off
|
||||
required = [
|
||||
"lemma_lookup_adj", "lemma_lookup_adp", "lemma_lookup_adv",
|
||||
"lemma_lookup_aux", "lemma_lookup_noun", "lemma_lookup_num",
|
||||
"lemma_lookup_part", "lemma_lookup_pron", "lemma_lookup_verb"
|
||||
]
|
||||
}
|
||||
# fmt: on
|
||||
return (required, [])
|
||||
else:
|
||||
return super().get_lookups_config(mode)
|
||||
|
||||
|
|
|
@ -23,8 +23,9 @@ def test_lemmatizer_initialize(lang, capfd):
|
|||
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
||||
return lookups
|
||||
|
||||
lang_cls = get_lang_class(lang)
|
||||
# Test that languages can be initialized
|
||||
nlp = get_lang_class(lang)()
|
||||
nlp = lang_cls()
|
||||
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
|
||||
assert not lemmatizer.lookups.tables
|
||||
nlp.config["initialize"]["components"]["lemmatizer"] = {
|
||||
|
@ -41,7 +42,13 @@ def test_lemmatizer_initialize(lang, capfd):
|
|||
assert doc[0].lemma_ == "y"
|
||||
|
||||
# Test initialization by calling .initialize() directly
|
||||
nlp = get_lang_class(lang)()
|
||||
nlp = lang_cls()
|
||||
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
|
||||
lemmatizer.initialize(lookups=lemmatizer_init_lookups())
|
||||
assert nlp("x")[0].lemma_ == "y"
|
||||
|
||||
# Test lookups config format
|
||||
for mode in ("rule", "lookup", "pos_lookup"):
|
||||
required, optional = lemmatizer.get_lookups_config(mode)
|
||||
assert isinstance(required, list)
|
||||
assert isinstance(optional, list)
|
||||
|
|
|
@ -191,22 +191,9 @@ Returns the lookups configuration settings for a given mode for use in
|
|||
[`Lemmatizer.load_lookups`](/api/lemmatizer#load_lookups).
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| ----------- | -------------------------------------------------------------------------------------- |
|
||||
| `mode` | The lemmatizer mode. ~~str~~ |
|
||||
| **RETURNS** | The lookups configuration settings for this mode. Includes the keys `"required_tables"` and `"optional_tables"`, mapped to a list of table string names. ~~Dict[str, List[str]]~~ |
|
||||
|
||||
## Lemmatizer.load_lookups {#load_lookups tag="classmethod"}
|
||||
|
||||
Load and validate lookups tables. If the provided lookups is `None`, load the
|
||||
default lookups tables according to the language and mode settings. Confirm that
|
||||
all required tables for the language and mode are present.
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------------------------------------------------------------- |
|
||||
| `lang` | The language. ~~str~~ |
|
||||
| `mode` | The lemmatizer mode. ~~str~~ |
|
||||
| `lookups` | The provided lookups, may be `None` if the default lookups should be loaded. ~~Optional[Lookups]~~ |
|
||||
| **RETURNS** | The lookups. ~~Lookups~~ |
|
||||
| **RETURNS** | The required table names and the optional table names. ~~Tuple[List[str], List[str]]~~ |
|
||||
|
||||
## Lemmatizer.to_disk {#to_disk tag="method"}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user