mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Fix Lemmatizer.get_lookups_config
This commit is contained in:
		
							parent
							
								
									dd542ec6a4
								
							
						
					
					
						commit
						7c4ab7e82c
					
				| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
from typing import List, Dict
 | 
					from typing import List, Tuple
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ...pipeline import Lemmatizer
 | 
					from ...pipeline import Lemmatizer
 | 
				
			||||||
from ...tokens import Token
 | 
					from ...tokens import Token
 | 
				
			||||||
| 
						 | 
					@ -15,17 +15,10 @@ class FrenchLemmatizer(Lemmatizer):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def get_lookups_config(cls, mode: str) -> Dict:
 | 
					    def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
 | 
				
			||||||
        if mode == "rule":
 | 
					        if mode == "rule":
 | 
				
			||||||
            return {
 | 
					            required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
 | 
				
			||||||
                "required_tables": [
 | 
					            return (required, [])
 | 
				
			||||||
                    "lemma_lookup",
 | 
					 | 
				
			||||||
                    "lemma_rules",
 | 
					 | 
				
			||||||
                    "lemma_exc",
 | 
					 | 
				
			||||||
                    "lemma_index",
 | 
					 | 
				
			||||||
                ],
 | 
					 | 
				
			||||||
                "optional_tables": [],
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            return super().get_lookups_config(mode)
 | 
					            return super().get_lookups_config(mode)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
from typing import List, Dict
 | 
					from typing import List, Tuple
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ...pipeline import Lemmatizer
 | 
					from ...pipeline import Lemmatizer
 | 
				
			||||||
from ...tokens import Token
 | 
					from ...tokens import Token
 | 
				
			||||||
| 
						 | 
					@ -6,16 +6,10 @@ from ...tokens import Token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DutchLemmatizer(Lemmatizer):
 | 
					class DutchLemmatizer(Lemmatizer):
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def get_lookups_config(cls, mode: str) -> Dict:
 | 
					    def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
 | 
				
			||||||
        if mode == "rule":
 | 
					        if mode == "rule":
 | 
				
			||||||
            return {
 | 
					            required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
 | 
				
			||||||
                "required_tables": [
 | 
					            return (required, [])
 | 
				
			||||||
                    "lemma_lookup",
 | 
					 | 
				
			||||||
                    "lemma_rules",
 | 
					 | 
				
			||||||
                    "lemma_exc",
 | 
					 | 
				
			||||||
                    "lemma_index",
 | 
					 | 
				
			||||||
                ],
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            return super().get_lookups_config(mode)
 | 
					            return super().get_lookups_config(mode)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
from typing import List, Dict
 | 
					from typing import List, Dict, Tuple
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ...pipeline import Lemmatizer
 | 
					from ...pipeline import Lemmatizer
 | 
				
			||||||
from ...tokens import Token
 | 
					from ...tokens import Token
 | 
				
			||||||
| 
						 | 
					@ -11,21 +11,16 @@ class PolishLemmatizer(Lemmatizer):
 | 
				
			||||||
    # lemmatization, as well as case-sensitive lemmatization for nouns.
 | 
					    # lemmatization, as well as case-sensitive lemmatization for nouns.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def get_lookups_config(cls, mode: str) -> Dict:
 | 
					    def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
 | 
				
			||||||
        if mode == "pos_lookup":
 | 
					        if mode == "pos_lookup":
 | 
				
			||||||
            return {
 | 
					            # fmt: off
 | 
				
			||||||
                "required_tables": [
 | 
					            required = [
 | 
				
			||||||
                    "lemma_lookup_adj",
 | 
					                "lemma_lookup_adj", "lemma_lookup_adp", "lemma_lookup_adv",
 | 
				
			||||||
                    "lemma_lookup_adp",
 | 
					                "lemma_lookup_aux", "lemma_lookup_noun", "lemma_lookup_num",
 | 
				
			||||||
                    "lemma_lookup_adv",
 | 
					                "lemma_lookup_part", "lemma_lookup_pron", "lemma_lookup_verb"
 | 
				
			||||||
                    "lemma_lookup_aux",
 | 
					 | 
				
			||||||
                    "lemma_lookup_noun",
 | 
					 | 
				
			||||||
                    "lemma_lookup_num",
 | 
					 | 
				
			||||||
                    "lemma_lookup_part",
 | 
					 | 
				
			||||||
                    "lemma_lookup_pron",
 | 
					 | 
				
			||||||
                    "lemma_lookup_verb",
 | 
					 | 
				
			||||||
            ]
 | 
					            ]
 | 
				
			||||||
            }
 | 
					            # fmt: on
 | 
				
			||||||
 | 
					            return (required, [])
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            return super().get_lookups_config(mode)
 | 
					            return super().get_lookups_config(mode)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -23,8 +23,9 @@ def test_lemmatizer_initialize(lang, capfd):
 | 
				
			||||||
        lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
 | 
					        lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
 | 
				
			||||||
        return lookups
 | 
					        return lookups
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    lang_cls = get_lang_class(lang)
 | 
				
			||||||
    # Test that languages can be initialized
 | 
					    # Test that languages can be initialized
 | 
				
			||||||
    nlp = get_lang_class(lang)()
 | 
					    nlp = lang_cls()
 | 
				
			||||||
    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
 | 
					    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
 | 
				
			||||||
    assert not lemmatizer.lookups.tables
 | 
					    assert not lemmatizer.lookups.tables
 | 
				
			||||||
    nlp.config["initialize"]["components"]["lemmatizer"] = {
 | 
					    nlp.config["initialize"]["components"]["lemmatizer"] = {
 | 
				
			||||||
| 
						 | 
					@ -41,7 +42,13 @@ def test_lemmatizer_initialize(lang, capfd):
 | 
				
			||||||
    assert doc[0].lemma_ == "y"
 | 
					    assert doc[0].lemma_ == "y"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Test initialization by calling .initialize() directly
 | 
					    # Test initialization by calling .initialize() directly
 | 
				
			||||||
    nlp = get_lang_class(lang)()
 | 
					    nlp = lang_cls()
 | 
				
			||||||
    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
 | 
					    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
 | 
				
			||||||
    lemmatizer.initialize(lookups=lemmatizer_init_lookups())
 | 
					    lemmatizer.initialize(lookups=lemmatizer_init_lookups())
 | 
				
			||||||
    assert nlp("x")[0].lemma_ == "y"
 | 
					    assert nlp("x")[0].lemma_ == "y"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Test lookups config format
 | 
				
			||||||
 | 
					    for mode in ("rule", "lookup", "pos_lookup"):
 | 
				
			||||||
 | 
					        required, optional = lemmatizer.get_lookups_config(mode)
 | 
				
			||||||
 | 
					        assert isinstance(required, list)
 | 
				
			||||||
 | 
					        assert isinstance(optional, list)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -191,22 +191,9 @@ Returns the lookups configuration settings for a given mode for use in
 | 
				
			||||||
[`Lemmatizer.load_lookups`](/api/lemmatizer#load_lookups).
 | 
					[`Lemmatizer.load_lookups`](/api/lemmatizer#load_lookups).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| Name        | Description                                                                            |
 | 
					| Name        | Description                                                                            |
 | 
				
			||||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | 
					| ----------- | -------------------------------------------------------------------------------------- |
 | 
				
			||||||
| `mode`      | The lemmatizer mode. ~~str~~                                                           |
 | 
					| `mode`      | The lemmatizer mode. ~~str~~                                                           |
 | 
				
			||||||
| **RETURNS** | The lookups configuration settings for this mode. Includes the keys `"required_tables"` and `"optional_tables"`, mapped to a list of table string names. ~~Dict[str, List[str]]~~ |
 | 
					| **RETURNS** | The required table names and the optional table names. ~~Tuple[List[str], List[str]]~~ |
 | 
				
			||||||
 | 
					 | 
				
			||||||
## Lemmatizer.load_lookups {#load_lookups tag="classmethod"}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Load and validate lookups tables. If the provided lookups is `None`, load the
 | 
					 | 
				
			||||||
default lookups tables according to the language and mode settings. Confirm that
 | 
					 | 
				
			||||||
all required tables for the language and mode are present.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
| Name        | Description                                                                                        |
 | 
					 | 
				
			||||||
| ----------- | -------------------------------------------------------------------------------------------------- |
 | 
					 | 
				
			||||||
| `lang`      | The language. ~~str~~                                                                              |
 | 
					 | 
				
			||||||
| `mode`      | The lemmatizer mode. ~~str~~                                                                       |
 | 
					 | 
				
			||||||
| `lookups`   | The provided lookups, may be `None` if the default lookups should be loaded. ~~Optional[Lookups]~~ |
 | 
					 | 
				
			||||||
| **RETURNS** | The lookups. ~~Lookups~~                                                                           |
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Lemmatizer.to_disk {#to_disk tag="method"}
 | 
					## Lemmatizer.to_disk {#to_disk tag="method"}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user