spaCy/spacy/lang/es/lemmatizer.py

from typing import List, Optional, Tuple
import re

from ...pipeline import Lemmatizer
from ...tokens import Token


class SpanishLemmatizer(Lemmatizer):
    """
    Spanish rule-based lemmatizer with morph-based rule selection.
    """

    @classmethod
    def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
        if mode == "rule":
            required = ["lemma_rules", "lemma_rules_groups", "lemma_index", "lemma_exc"]
            return (required, [])
        else:
            return super().get_lookups_config(mode)

    def rule_lemmatize(self, token: Token) -> List[str]:
        cache_key = (token.orth, token.pos, str(token.morph))
        if cache_key in self.cache:
            return self.cache[cache_key]
        string = token.text
        pos = token.pos_.lower()
        features = set(token.morph)
        if pos in ("", "eol", "space"):
            return [string.lower()]
        if pos in (
            "adp",
            "cconj",
            "intj",
            "part",
            "propn",
            "punct",
            "sconj",
            "sym",
            "x",
        ):
            if token.is_sent_start and pos != "propn":
                return [string.lower()]
            else:
                return [string]

        string = string.lower()
        exc = self.lookups.get_table("lemma_exc").get(pos, {}).get(string)
        if exc is not None:
            lemmas = list(exc)
        else:
            if pos == "aux":
                rule_pos = "verb"
            else:
                rule_pos = pos
            rule = self.select_rule(rule_pos, features)
            index = self.lookups.get_table("lemma_index").get(rule_pos, [])
            lemmas = getattr(self, "lemmatize_" + rule_pos)(
                string, features, rule, index
            )
            # Remove duplicates but preserve the ordering
            lemmas = list(dict.fromkeys(lemmas))

        self.cache[cache_key] = lemmas
        return lemmas

    def select_rule(self, pos: str, features: List[str]) -> Optional[str]:
        groups = self.lookups.get_table("lemma_rules_groups")
        if pos in groups:
            for group in groups[pos]:
                if set(group[1]).issubset(features):
                    return group[0]
        return None

    def lemmatize_adj(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        """
        Lemmatize an adjective.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        """

        # Initialize empty lists for the generated lemmas
        possible_lemmas = []
        selected_lemmas = []

        # Apply lemmatization rules
        for old, new in self.lookups.get_table("lemma_rules").get(rule, []):
            possible_lemma = re.sub(old + "$", new, word)
            if possible_lemma != word:
                possible_lemmas.append(possible_lemma)

        # Additional rule for plurals that go from esdrújula to grave and end in
        # 'n' or 's', e.g., jóvenes -> joven
        additional_lemmas = []
        if "Number=Plur" in features:
            for possible_lemma in possible_lemmas:
                if possible_lemma.endswith("n") or possible_lemma.endswith("s"):
                    for old, new in self.lookups.get_table("lemma_rules").get(
                        "accents", []
                    ):
                        additional_lemmas.append(re.sub(old, new, possible_lemma))
        possible_lemmas.extend(additional_lemmas)

        for lemma in possible_lemmas:
            if lemma in index:
                selected_lemmas.append(lemma)
        # If one or more of the created possible lemmas are in the lookup list,
        # return all of them
        if len(selected_lemmas) > 0:
            return selected_lemmas
        elif len(possible_lemmas) > 0:
            return possible_lemmas
        else:
            return [word]

    def lemmatize_adv(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        """
        Lemmatize an adverb.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        """

        # Apply lemmatization rules
        for old, new in self.lookups.get_table("lemma_rules").get("adverbs", []):
            if word == old:
                return [new]

        # If none of the rules applies, return the original word
        return [word]

    def lemmatize_det(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        """
        Lemmatize a determiner.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        """

        # Initialize empty lists for the generated lemmas
        possible_lemmas = []
        selected_lemmas = []

        # First, search in rules specific to determiners
        for old, new in self.lookups.get_table("lemma_rules").get("det", []):
            if word == old:
                return [new]
        # If none of the specfic rules apply, search in the common rules for
        # determiners and pronouns that follow a unique pattern for
        # lemmatization. If the word is in the list, return the corresponding
        # lemma.
        for old, new in self.lookups.get_table("lemma_rules").get(
            "det_and_pron_fixed", []
        ):
            if word == old:
                return [new]
        # If the word is not in the list of unique determiners and pronouns,
        # apply general rules of lemmatization. Include the original word in the        # list of possible lemmas.
        for old, new in self.lookups.get_table("lemma_rules").get(
            "det_and_pron_general", []
        ):
            possible_lemma = re.sub(old + "$", new, word)
            possible_lemmas.append(possible_lemma)
        possible_lemmas.append(word)

        if len(possible_lemmas) == 1:
            return possible_lemmas
        elif len(possible_lemmas) > 1:
            for lemma in possible_lemmas:
                if lemma in index:
                    selected_lemmas.append(lemma)
            if len(selected_lemmas) >= 1:
                return selected_lemmas
            else:
                return possible_lemmas

    def lemmatize_noun(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        """
        Lemmatize a noun.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        """

        # Initialize empty lists for the generated lemmas
        possible_lemmas = []
        selected_lemmas = []

        # Apply lemmatization rules
        for old, new in self.lookups.get_table("lemma_rules").get(rule, []):
            possible_lemma = re.sub(old + "$", new, word)
            if possible_lemma != word:
                possible_lemmas.append(possible_lemma)

        # Additional rule for plurals that go from esdrújula to grave and end in
        # 'n' or 's', e.g., órdenes -> orden, exámenes -> examen
        additional_lemmas = []
        if "Number=Plur" in features:
            for possible_lemma in possible_lemmas:
                if possible_lemma.endswith("n") or possible_lemma.endswith("s"):
                    for old, new in self.lookups.get_table("lemma_rules").get(
                        "accents", []
                    ):
                        additional_lemmas.append(re.sub(old, new, possible_lemma))
        possible_lemmas.extend(additional_lemmas)

        for lemma in possible_lemmas:
            if lemma in index:
                selected_lemmas.append(lemma)
        # If one or more of the created possible lemmas are in the lookup list,
        # return all of them
        if len(selected_lemmas) > 0:
            return selected_lemmas
        elif len(possible_lemmas) > 0:
            return possible_lemmas
        else:
            return [word]

    def lemmatize_num(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        """
        Lemmatize a numeral.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        """

        # If the word is in the list of rules for numerals, return the
        # corresponding lemma
        for old, new in self.lookups.get_table("lemma_rules").get("num", []):
            if word == old:
                return [new]

        # Normalize punctuation
        splitted_word = word.split(",")
        if re.search(r"(\.)([0-9]{3})$", splitted_word[0]):
            word = re.sub(r"\.", r"", word)
        word = re.sub(r",", r".", word)
        return [word]

    def lemmatize_pron(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        """
        Lemmatize a pronoun.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        """

        # Initialize empty lists for the generated lemmas
        possible_lemmas = []
        selected_lemmas = []

        # First, search in rules specific to pronouns
        for old, new in self.lookups.get_table("lemma_rules").get("pron", []):
            if word == old:
                return [new]
        # If none of the specfic rules apply, search in the common rules for
        # determiners and pronouns that follow a unique pattern for
        # lemmatization. If the word is in the list, return the corresponding
        # lemma.
        for old, new in self.lookups.get_table("lemma_rules").get(
            "det_and_pron_fixed", []
        ):
            if word == old:
                return [new]
        # If the word is not in the list of unique determiners and pronouns,
        # apply general rules of lemmatization. Include the original word in the
        # list of possible lemmas.
        for old, new in self.lookups.get_table("lemma_rules").get(
            "det_and_pron_general", []
        ):
            possible_lemma = re.sub(old + "$", new, word)
            if possible_lemma != word:
                possible_lemmas.append(possible_lemma)
        possible_lemmas.append(word)

        if len(possible_lemmas) == 1:
            return possible_lemmas
        elif len(possible_lemmas) > 1:
            for lemma in possible_lemmas:
                if lemma in index:
                    selected_lemmas.append(lemma)
            if len(selected_lemmas) >= 1:
                return selected_lemmas
            else:
                return possible_lemmas

    def lemmatize_verb(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        """
        Lemmatize a verb.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        """
        # Exceptions for verb+pronoun(s)
        if "PronType=Prs" in features:
            return self.lemmatize_verb_pron(word, features, rule, index)

        # Initialize empty lists for the generated lemmas
        possible_lemmas = []
        selected_lemmas = []

        # Apply lemmatization rules
        for old, new in self.lookups.get_table("lemma_rules").get(rule, []):
            possible_lemma = re.sub(old + "$", new, word)
            if possible_lemma != word:
                possible_lemmas.append(possible_lemma)

        for lemma in possible_lemmas:
            if lemma in index:
                selected_lemmas.append(lemma)
        if len(selected_lemmas) == 0:
            # If none of the possible lemmas are in the lookup list,
            # apply vocalic alternation rules and search in the lookup list
            # again
            for lemma in possible_lemmas:
                for old, new in self.lookups.get_table("lemma_rules").get(
                    "voc_alt_1", []
                ):
                    if old in lemma:
                        for i, char in enumerate(lemma):
                            if char == old:
                                voc_alt_lemma = lemma[:i] + new + lemma[i + 1 :]
                                if voc_alt_lemma in index:
                                    selected_lemmas.append(voc_alt_lemma)
                for old, new in self.lookups.get_table("lemma_rules").get(
                    "voc_alt_2", []
                ):
                    if old in lemma:
                        voc_alt_lemma = lemma.replace(old, new, 1)
                        if voc_alt_lemma in index:
                            selected_lemmas.append(voc_alt_lemma)
        # Additional rule for verbs that lose the accent mark when lemmatized,
        # e.g., amplían -> ampliar
        additional_lemmas = []
        for possible_lemma in possible_lemmas:
            for old, new in self.lookups.get_table("lemma_rules").get("accents", []):
                additional_lemmas.append(re.sub(old, new, possible_lemma))
        possible_lemmas.extend(additional_lemmas)

        # If one or more of the created possible lemmas are in the lookup list,
        # return all of them
        if len(selected_lemmas) > 0:
            return selected_lemmas
        elif len(possible_lemmas) > 0:
            return possible_lemmas
        else:
            return [word]

    def lemmatize_verb_pron(
        self, word: str, features: List[str], rule: str, index: List[str]
    ) -> List[str]:
        # Strip and collect pronouns
        pron_patt = "^(.*?)([mts]e|l[aeo]s?|n?os)$"
        prons = []
        verb = word
        m = re.search(pron_patt, verb)
        while m is not None and len(prons) <= 3:
            verb = re.sub(m.group(2) + "$", "", verb)
            prons = [m.group(2)] + prons
            m = re.search(pron_patt, verb)
        # Strip accents from verb form
        for old, new in self.lookups.get_table("lemma_rules").get("accents", []):
            verb = re.sub(old, new, verb)
        # Lemmatize the verb and pronouns, checking for exceptions
        exc = self.lookups.get_table("lemma_exc").get("verb", {}).get(verb)
        if exc is not None:
            verb_lemma = exc[0]
        else:
            rule = self.select_rule("verb", features)
            verb_lemma = self.lemmatize_verb(
                verb, features - {"PronType=Prs"}, rule, index
            )[0]
        pron_lemmas = []
        for pron in prons:
            exc = self.lookups.get_table("lemma_exc").get("pron", {}).get(pron)
            if exc is not None:
                pron_lemmas.append(exc[0])
            else:
                rule = self.select_rule("pron", features)
                pron_lemmas.append(self.lemmatize_pron(pron, features, rule, index)[0])
        return [verb_lemma + " " + " ".join(pron_lemmas)]
Add Spanish rule-based lemmatizer (#6833) * Initial Spanish lemmatizer * Handle merged verb+pron(s) multi-word tokens * Use VERB for AUX rule lookup * Add morph to lemma cache key * Fix aux lookups, minor refactoring * Improve verb+pron handling * Move verb+pron handling into its own method * Check for exceptions (primarily for se) * Collect pronouns in the same (not reversed) order * Only add modified possible lemmas 2021-01-27 14:21:35 +03:00			`from typing import List, Optional, Tuple`
			`import re`

			`from ...pipeline import Lemmatizer`
			`from ...tokens import Token`


			`class SpanishLemmatizer(Lemmatizer):`
			`"""`
			`Spanish rule-based lemmatizer with morph-based rule selection.`
			`"""`

			`@classmethod`
			`def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:`
			`if mode == "rule":`
			`required = ["lemma_rules", "lemma_rules_groups", "lemma_index", "lemma_exc"]`
			`return (required, [])`
			`else:`
			`return super().get_lookups_config(mode)`

			`def rule_lemmatize(self, token: Token) -> List[str]:`
			`cache_key = (token.orth, token.pos, str(token.morph))`
			`if cache_key in self.cache:`
			`return self.cache[cache_key]`
			`string = token.text`
			`pos = token.pos_.lower()`
			`features = set(token.morph)`
			`if pos in ("", "eol", "space"):`
			`return [string.lower()]`
			`if pos in (`
			`"adp",`
			`"cconj",`
			`"intj",`
			`"part",`
			`"propn",`
			`"punct",`
			`"sconj",`
			`"sym",`
			`"x",`
			`):`
			`if token.is_sent_start and pos != "propn":`
			`return [string.lower()]`
			`else:`
			`return [string]`

			`string = string.lower()`
			`exc = self.lookups.get_table("lemma_exc").get(pos, {}).get(string)`
			`if exc is not None:`
			`lemmas = list(exc)`
			`else:`
			`if pos == "aux":`
			`rule_pos = "verb"`
			`else:`
			`rule_pos = pos`
			`rule = self.select_rule(rule_pos, features)`
			`index = self.lookups.get_table("lemma_index").get(rule_pos, [])`
			`lemmas = getattr(self, "lemmatize_" + rule_pos)(`
			`string, features, rule, index`
			`)`
			`# Remove duplicates but preserve the ordering`
			`lemmas = list(dict.fromkeys(lemmas))`

			`self.cache[cache_key] = lemmas`
			`return lemmas`

			`def select_rule(self, pos: str, features: List[str]) -> Optional[str]:`
			`groups = self.lookups.get_table("lemma_rules_groups")`
			`if pos in groups:`
			`for group in groups[pos]:`
			`if set(group[1]).issubset(features):`
			`return group[0]`
			`return None`

			`def lemmatize_adj(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`"""`
			`Lemmatize an adjective.`

			`word (str): The word to lemmatize.`
			`features (List[str]): The morphological features as a list of Feat=Val`
			`pairs.`
			`index (List[str]): The POS-specific lookup list.`

			`RETURNS (List[str]): The list of lemmas.`
			`"""`

			`# Initialize empty lists for the generated lemmas`
			`possible_lemmas = []`
			`selected_lemmas = []`

			`# Apply lemmatization rules`
			`for old, new in self.lookups.get_table("lemma_rules").get(rule, []):`
			`possible_lemma = re.sub(old + "$", new, word)`
			`if possible_lemma != word:`
			`possible_lemmas.append(possible_lemma)`

			`# Additional rule for plurals that go from esdrújula to grave and end in`
			`# 'n' or 's', e.g., jóvenes -> joven`
			`additional_lemmas = []`
			`if "Number=Plur" in features:`
			`for possible_lemma in possible_lemmas:`
			`if possible_lemma.endswith("n") or possible_lemma.endswith("s"):`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"accents", []`
			`):`
			`additional_lemmas.append(re.sub(old, new, possible_lemma))`
			`possible_lemmas.extend(additional_lemmas)`

			`for lemma in possible_lemmas:`
			`if lemma in index:`
			`selected_lemmas.append(lemma)`
			`# If one or more of the created possible lemmas are in the lookup list,`
			`# return all of them`
			`if len(selected_lemmas) > 0:`
			`return selected_lemmas`
			`elif len(possible_lemmas) > 0:`
			`return possible_lemmas`
			`else:`
			`return [word]`

			`def lemmatize_adv(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`"""`
			`Lemmatize an adverb.`

			`word (str): The word to lemmatize.`
			`features (List[str]): The morphological features as a list of Feat=Val`
			`pairs.`
			`index (List[str]): The POS-specific lookup list.`

			`RETURNS (List[str]): The list of lemmas.`
			`"""`

			`# Apply lemmatization rules`
			`for old, new in self.lookups.get_table("lemma_rules").get("adverbs", []):`
			`if word == old:`
			`return [new]`

			`# If none of the rules applies, return the original word`
			`return [word]`

			`def lemmatize_det(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`"""`
			`Lemmatize a determiner.`

			`word (str): The word to lemmatize.`
			`features (List[str]): The morphological features as a list of Feat=Val`
			`pairs.`
			`index (List[str]): The POS-specific lookup list.`

			`RETURNS (List[str]): The list of lemmas.`
			`"""`

			`# Initialize empty lists for the generated lemmas`
			`possible_lemmas = []`
			`selected_lemmas = []`

			`# First, search in rules specific to determiners`
			`for old, new in self.lookups.get_table("lemma_rules").get("det", []):`
			`if word == old:`
			`return [new]`
			`# If none of the specfic rules apply, search in the common rules for`
			`# determiners and pronouns that follow a unique pattern for`
			`# lemmatization. If the word is in the list, return the corresponding`
			`# lemma.`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"det_and_pron_fixed", []`
			`):`
			`if word == old:`
			`return [new]`
			`# If the word is not in the list of unique determiners and pronouns,`
			`# apply general rules of lemmatization. Include the original word in the # list of possible lemmas.`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"det_and_pron_general", []`
			`):`
			`possible_lemma = re.sub(old + "$", new, word)`
			`possible_lemmas.append(possible_lemma)`
			`possible_lemmas.append(word)`

			`if len(possible_lemmas) == 1:`
			`return possible_lemmas`
			`elif len(possible_lemmas) > 1:`
			`for lemma in possible_lemmas:`
			`if lemma in index:`
			`selected_lemmas.append(lemma)`
			`if len(selected_lemmas) >= 1:`
			`return selected_lemmas`
			`else:`
			`return possible_lemmas`

			`def lemmatize_noun(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`"""`
			`Lemmatize a noun.`

			`word (str): The word to lemmatize.`
			`features (List[str]): The morphological features as a list of Feat=Val`
			`pairs.`
			`index (List[str]): The POS-specific lookup list.`

			`RETURNS (List[str]): The list of lemmas.`
			`"""`

			`# Initialize empty lists for the generated lemmas`
			`possible_lemmas = []`
			`selected_lemmas = []`

			`# Apply lemmatization rules`
			`for old, new in self.lookups.get_table("lemma_rules").get(rule, []):`
			`possible_lemma = re.sub(old + "$", new, word)`
			`if possible_lemma != word:`
			`possible_lemmas.append(possible_lemma)`

			`# Additional rule for plurals that go from esdrújula to grave and end in`
			`# 'n' or 's', e.g., órdenes -> orden, exámenes -> examen`
			`additional_lemmas = []`
			`if "Number=Plur" in features:`
			`for possible_lemma in possible_lemmas:`
			`if possible_lemma.endswith("n") or possible_lemma.endswith("s"):`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"accents", []`
			`):`
			`additional_lemmas.append(re.sub(old, new, possible_lemma))`
			`possible_lemmas.extend(additional_lemmas)`

			`for lemma in possible_lemmas:`
			`if lemma in index:`
			`selected_lemmas.append(lemma)`
			`# If one or more of the created possible lemmas are in the lookup list,`
			`# return all of them`
			`if len(selected_lemmas) > 0:`
			`return selected_lemmas`
			`elif len(possible_lemmas) > 0:`
			`return possible_lemmas`
			`else:`
			`return [word]`

			`def lemmatize_num(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`"""`
			`Lemmatize a numeral.`

			`word (str): The word to lemmatize.`
			`features (List[str]): The morphological features as a list of Feat=Val`
			`pairs.`
			`index (List[str]): The POS-specific lookup list.`

			`RETURNS (List[str]): The list of lemmas.`
			`"""`

			`# If the word is in the list of rules for numerals, return the`
			`# corresponding lemma`
			`for old, new in self.lookups.get_table("lemma_rules").get("num", []):`
			`if word == old:`
			`return [new]`

			`# Normalize punctuation`
			`splitted_word = word.split(",")`
			`if re.search(r"(\.)([0-9]{3})$", splitted_word[0]):`
			`word = re.sub(r"\.", r"", word)`
			`word = re.sub(r",", r".", word)`
			`return [word]`

			`def lemmatize_pron(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`"""`
			`Lemmatize a pronoun.`

			`word (str): The word to lemmatize.`
			`features (List[str]): The morphological features as a list of Feat=Val`
			`pairs.`
			`index (List[str]): The POS-specific lookup list.`

			`RETURNS (List[str]): The list of lemmas.`
			`"""`

			`# Initialize empty lists for the generated lemmas`
			`possible_lemmas = []`
			`selected_lemmas = []`

			`# First, search in rules specific to pronouns`
			`for old, new in self.lookups.get_table("lemma_rules").get("pron", []):`
			`if word == old:`
			`return [new]`
			`# If none of the specfic rules apply, search in the common rules for`
			`# determiners and pronouns that follow a unique pattern for`
			`# lemmatization. If the word is in the list, return the corresponding`
			`# lemma.`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"det_and_pron_fixed", []`
			`):`
			`if word == old:`
			`return [new]`
			`# If the word is not in the list of unique determiners and pronouns,`
			`# apply general rules of lemmatization. Include the original word in the`
			`# list of possible lemmas.`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"det_and_pron_general", []`
			`):`
			`possible_lemma = re.sub(old + "$", new, word)`
			`if possible_lemma != word:`
			`possible_lemmas.append(possible_lemma)`
			`possible_lemmas.append(word)`

			`if len(possible_lemmas) == 1:`
			`return possible_lemmas`
			`elif len(possible_lemmas) > 1:`
			`for lemma in possible_lemmas:`
			`if lemma in index:`
			`selected_lemmas.append(lemma)`
			`if len(selected_lemmas) >= 1:`
			`return selected_lemmas`
			`else:`
			`return possible_lemmas`

			`def lemmatize_verb(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`"""`
			`Lemmatize a verb.`

			`word (str): The word to lemmatize.`
			`features (List[str]): The morphological features as a list of Feat=Val`
			`pairs.`
			`index (List[str]): The POS-specific lookup list.`

			`RETURNS (List[str]): The list of lemmas.`
			`"""`
			`# Exceptions for verb+pronoun(s)`
			`if "PronType=Prs" in features:`
			`return self.lemmatize_verb_pron(word, features, rule, index)`

			`# Initialize empty lists for the generated lemmas`
			`possible_lemmas = []`
			`selected_lemmas = []`

			`# Apply lemmatization rules`
			`for old, new in self.lookups.get_table("lemma_rules").get(rule, []):`
			`possible_lemma = re.sub(old + "$", new, word)`
			`if possible_lemma != word:`
			`possible_lemmas.append(possible_lemma)`

			`for lemma in possible_lemmas:`
			`if lemma in index:`
			`selected_lemmas.append(lemma)`
			`if len(selected_lemmas) == 0:`
			`# If none of the possible lemmas are in the lookup list,`
			`# apply vocalic alternation rules and search in the lookup list`
			`# again`
			`for lemma in possible_lemmas:`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"voc_alt_1", []`
			`):`
			`if old in lemma:`
			`for i, char in enumerate(lemma):`
			`if char == old:`
			`voc_alt_lemma = lemma[:i] + new + lemma[i + 1 :]`
			`if voc_alt_lemma in index:`
			`selected_lemmas.append(voc_alt_lemma)`
			`for old, new in self.lookups.get_table("lemma_rules").get(`
			`"voc_alt_2", []`
			`):`
			`if old in lemma:`
			`voc_alt_lemma = lemma.replace(old, new, 1)`
			`if voc_alt_lemma in index:`
			`selected_lemmas.append(voc_alt_lemma)`
			`# Additional rule for verbs that lose the accent mark when lemmatized,`
			`# e.g., amplían -> ampliar`
			`additional_lemmas = []`
			`for possible_lemma in possible_lemmas:`
			`for old, new in self.lookups.get_table("lemma_rules").get("accents", []):`
			`additional_lemmas.append(re.sub(old, new, possible_lemma))`
			`possible_lemmas.extend(additional_lemmas)`

			`# If one or more of the created possible lemmas are in the lookup list,`
			`# return all of them`
			`if len(selected_lemmas) > 0:`
			`return selected_lemmas`
			`elif len(possible_lemmas) > 0:`
			`return possible_lemmas`
			`else:`
			`return [word]`

			`def lemmatize_verb_pron(`
			`self, word: str, features: List[str], rule: str, index: List[str]`
			`) -> List[str]:`
			`# Strip and collect pronouns`
			`pron_patt = "^(.*?)([mts]e\|l[aeo]s?\|n?os)$"`
			`prons = []`
			`verb = word`
			`m = re.search(pron_patt, verb)`
			`while m is not None and len(prons) <= 3:`
			`verb = re.sub(m.group(2) + "$", "", verb)`
			`prons = [m.group(2)] + prons`
			`m = re.search(pron_patt, verb)`
			`# Strip accents from verb form`
			`for old, new in self.lookups.get_table("lemma_rules").get("accents", []):`
			`verb = re.sub(old, new, verb)`
			`# Lemmatize the verb and pronouns, checking for exceptions`
			`exc = self.lookups.get_table("lemma_exc").get("verb", {}).get(verb)`
			`if exc is not None:`
			`verb_lemma = exc[0]`
			`else:`
			`rule = self.select_rule("verb", features)`
			`verb_lemma = self.lemmatize_verb(`
			`verb, features - {"PronType=Prs"}, rule, index`
			`)[0]`
			`pron_lemmas = []`
			`for pron in prons:`
			`exc = self.lookups.get_table("lemma_exc").get("pron", {}).get(pron)`
			`if exc is not None:`
			`pron_lemmas.append(exc[0])`
			`else:`
			`rule = self.select_rule("pron", features)`
			`pron_lemmas.append(self.lemmatize_pron(pron, features, rule, index)[0])`
			`return [verb_lemma + " " + " ".join(pron_lemmas)]`