//- 💫 DOCS > API > LEMMATIZER include ../_includes/_mixins p | The #[code Lemmatizer] supports simple part-of-speech-sensitive suffix | rules and lookup tables. +h(2, "init") Lemmatizer.__init__ +tag method p Create a #[code Lemmatizer]. +aside-code("Example"). from spacy.lemmatizer import Lemmatizer lemmatizer = Lemmatizer() +table(["Name", "Type", "Description"]) +row +cell #[code index] +cell dict / #[code None] +cell Inventory of lemmas in the language. +row +cell #[code exceptions] +cell dict / #[code None] +cell Mapping of string forms to lemmas that bypass the #[code rules]. +row +cell #[code rules] +cell dict / #[code None] +cell List of suffix rewrite rules. +row +cell #[code lookup] +cell dict / #[code None] +cell Lookup table mapping string to their lemmas. +row("foot") +cell returns +cell #[code Lemmatizer] +cell The newly created object. +h(2, "call") Lemmatizer.__call__ +tag method p Lemmatize a string. +aside-code("Example"). from spacy.lemmatizer import Lemmatizer from spacy.lang.en import LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES) lemmas = lemmatizer(u'ducks', u'NOUN') assert lemmas == [u'duck'] +table(["Name", "Type", "Description"]) +row +cell #[code string] +cell unicode +cell The string to lemmatize, e.g. the token text. +row +cell #[code univ_pos] +cell unicode / int +cell The token's universal part-of-speech tag. +row +cell #[code morphology] +cell dict / #[code None] +cell | Morphological features following the | #[+a("http://universaldependencies.org/") Universal Dependencies] | scheme. +row("foot") +cell returns +cell list +cell The available lemmas for the string. +h(2, "lookup") Lemmatizer.lookup +tag method +tag-new(2) p | Look up a lemma in the lookup table, if available. If no lemma is found, | the original string is returned. Languages can provide a | #[+a("/usage/adding-languages#lemmatizer") lookup table] via the | #[code lemma_lookup] variable, set on the individual #[code Language] | class. +aside-code("Example"). lookup = {u'going': u'go'} lemmatizer = Lemmatizer(lookup=lookup) assert lemmatizer.lookup(u'going') == u'go' +table(["Name", "Type", "Description"]) +row +cell #[code string] +cell unicode +cell The string to look up. +row("foot") +cell returns +cell unicode +cell The lemma if the string was found, otherwise the original string. +h(2, "is_base_form") Lemmatizer.is_base_form +tag method p | Check whether we're dealing with an uninflected paradigm, so we can | avoid lemmatization entirely. +aside-code("Example"). pos = 'verb' morph = {'VerbForm': 'inf'} is_base_form = lemmatizer.is_base_form(pos, morph) assert is_base_form == True +table(["Name", "Type", "Description"]) +row +cell #[code univ_pos] +cell unicode / int +cell The token's universal part-of-speech tag. +row +cell #[code morphology] +cell dict +cell The token's morphological features. +row("foot") +cell returns +cell bool +cell | Whether the token's part-of-speech tag and morphological features | describe a base form. +h(2, "attributes") Attributes +table(["Name", "Type", "Description"]) +row +cell #[code index] +cell dict / #[code None] +cell Inventory of lemmas in the language. +row +cell #[code exc] +cell dict / #[code None] +cell Mapping of string forms to lemmas that bypass the #[code rules]. +row +cell #[code rules] +cell dict / #[code None] +cell List of suffix rewrite rules. +row +cell #[code lookup_table] +tag-new(2) +cell dict / #[code None] +cell The lemma lookup table, if available.