mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Fix Lemmatizer.get_lookups_config
This commit is contained in:
parent
dd542ec6a4
commit
7c4ab7e82c
|
@ -1,4 +1,4 @@
|
||||||
from typing import List, Dict
|
from typing import List, Tuple
|
||||||
|
|
||||||
from ...pipeline import Lemmatizer
|
from ...pipeline import Lemmatizer
|
||||||
from ...tokens import Token
|
from ...tokens import Token
|
||||||
|
@ -15,17 +15,10 @@ class FrenchLemmatizer(Lemmatizer):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_lookups_config(cls, mode: str) -> Dict:
|
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
|
||||||
if mode == "rule":
|
if mode == "rule":
|
||||||
return {
|
required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
|
||||||
"required_tables": [
|
return (required, [])
|
||||||
"lemma_lookup",
|
|
||||||
"lemma_rules",
|
|
||||||
"lemma_exc",
|
|
||||||
"lemma_index",
|
|
||||||
],
|
|
||||||
"optional_tables": [],
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
return super().get_lookups_config(mode)
|
return super().get_lookups_config(mode)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import List, Dict
|
from typing import List, Tuple
|
||||||
|
|
||||||
from ...pipeline import Lemmatizer
|
from ...pipeline import Lemmatizer
|
||||||
from ...tokens import Token
|
from ...tokens import Token
|
||||||
|
@ -6,16 +6,10 @@ from ...tokens import Token
|
||||||
|
|
||||||
class DutchLemmatizer(Lemmatizer):
|
class DutchLemmatizer(Lemmatizer):
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_lookups_config(cls, mode: str) -> Dict:
|
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
|
||||||
if mode == "rule":
|
if mode == "rule":
|
||||||
return {
|
required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
|
||||||
"required_tables": [
|
return (required, [])
|
||||||
"lemma_lookup",
|
|
||||||
"lemma_rules",
|
|
||||||
"lemma_exc",
|
|
||||||
"lemma_index",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
return super().get_lookups_config(mode)
|
return super().get_lookups_config(mode)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import List, Dict
|
from typing import List, Dict, Tuple
|
||||||
|
|
||||||
from ...pipeline import Lemmatizer
|
from ...pipeline import Lemmatizer
|
||||||
from ...tokens import Token
|
from ...tokens import Token
|
||||||
|
@ -11,21 +11,16 @@ class PolishLemmatizer(Lemmatizer):
|
||||||
# lemmatization, as well as case-sensitive lemmatization for nouns.
|
# lemmatization, as well as case-sensitive lemmatization for nouns.
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_lookups_config(cls, mode: str) -> Dict:
|
def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
|
||||||
if mode == "pos_lookup":
|
if mode == "pos_lookup":
|
||||||
return {
|
# fmt: off
|
||||||
"required_tables": [
|
required = [
|
||||||
"lemma_lookup_adj",
|
"lemma_lookup_adj", "lemma_lookup_adp", "lemma_lookup_adv",
|
||||||
"lemma_lookup_adp",
|
"lemma_lookup_aux", "lemma_lookup_noun", "lemma_lookup_num",
|
||||||
"lemma_lookup_adv",
|
"lemma_lookup_part", "lemma_lookup_pron", "lemma_lookup_verb"
|
||||||
"lemma_lookup_aux",
|
|
||||||
"lemma_lookup_noun",
|
|
||||||
"lemma_lookup_num",
|
|
||||||
"lemma_lookup_part",
|
|
||||||
"lemma_lookup_pron",
|
|
||||||
"lemma_lookup_verb",
|
|
||||||
]
|
]
|
||||||
}
|
# fmt: on
|
||||||
|
return (required, [])
|
||||||
else:
|
else:
|
||||||
return super().get_lookups_config(mode)
|
return super().get_lookups_config(mode)
|
||||||
|
|
||||||
|
|
|
@ -23,8 +23,9 @@ def test_lemmatizer_initialize(lang, capfd):
|
||||||
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
||||||
return lookups
|
return lookups
|
||||||
|
|
||||||
|
lang_cls = get_lang_class(lang)
|
||||||
# Test that languages can be initialized
|
# Test that languages can be initialized
|
||||||
nlp = get_lang_class(lang)()
|
nlp = lang_cls()
|
||||||
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
|
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
|
||||||
assert not lemmatizer.lookups.tables
|
assert not lemmatizer.lookups.tables
|
||||||
nlp.config["initialize"]["components"]["lemmatizer"] = {
|
nlp.config["initialize"]["components"]["lemmatizer"] = {
|
||||||
|
@ -41,7 +42,13 @@ def test_lemmatizer_initialize(lang, capfd):
|
||||||
assert doc[0].lemma_ == "y"
|
assert doc[0].lemma_ == "y"
|
||||||
|
|
||||||
# Test initialization by calling .initialize() directly
|
# Test initialization by calling .initialize() directly
|
||||||
nlp = get_lang_class(lang)()
|
nlp = lang_cls()
|
||||||
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
|
lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
|
||||||
lemmatizer.initialize(lookups=lemmatizer_init_lookups())
|
lemmatizer.initialize(lookups=lemmatizer_init_lookups())
|
||||||
assert nlp("x")[0].lemma_ == "y"
|
assert nlp("x")[0].lemma_ == "y"
|
||||||
|
|
||||||
|
# Test lookups config format
|
||||||
|
for mode in ("rule", "lookup", "pos_lookup"):
|
||||||
|
required, optional = lemmatizer.get_lookups_config(mode)
|
||||||
|
assert isinstance(required, list)
|
||||||
|
assert isinstance(optional, list)
|
||||||
|
|
|
@ -191,22 +191,9 @@ Returns the lookups configuration settings for a given mode for use in
|
||||||
[`Lemmatizer.load_lookups`](/api/lemmatizer#load_lookups).
|
[`Lemmatizer.load_lookups`](/api/lemmatizer#load_lookups).
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ----------- | -------------------------------------------------------------------------------------- |
|
||||||
| `mode` | The lemmatizer mode. ~~str~~ |
|
| `mode` | The lemmatizer mode. ~~str~~ |
|
||||||
| **RETURNS** | The lookups configuration settings for this mode. Includes the keys `"required_tables"` and `"optional_tables"`, mapped to a list of table string names. ~~Dict[str, List[str]]~~ |
|
| **RETURNS** | The required table names and the optional table names. ~~Tuple[List[str], List[str]]~~ |
|
||||||
|
|
||||||
## Lemmatizer.load_lookups {#load_lookups tag="classmethod"}
|
|
||||||
|
|
||||||
Load and validate lookups tables. If the provided lookups is `None`, load the
|
|
||||||
default lookups tables according to the language and mode settings. Confirm that
|
|
||||||
all required tables for the language and mode are present.
|
|
||||||
|
|
||||||
| Name | Description |
|
|
||||||
| ----------- | -------------------------------------------------------------------------------------------------- |
|
|
||||||
| `lang` | The language. ~~str~~ |
|
|
||||||
| `mode` | The lemmatizer mode. ~~str~~ |
|
|
||||||
| `lookups` | The provided lookups, may be `None` if the default lookups should be loaded. ~~Optional[Lookups]~~ |
|
|
||||||
| **RETURNS** | The lookups. ~~Lookups~~ |
|
|
||||||
|
|
||||||
## Lemmatizer.to_disk {#to_disk tag="method"}
|
## Lemmatizer.to_disk {#to_disk tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user