//- 💫 DOCS > API > LEXEME include ../_includes/_mixins p | An entry in the vocabulary. A #[code Lexeme] has no string context – it's | a word type, as opposed to a word token. It therefore has no | part-of-speech tag, dependency parse, or lemma (if lemmatization depends | on the part-of-speech tag). +h(2, "init") Lexeme.__init__ +tag method p Create a #[code Lexeme] object. +table(["Name", "Type", "Description"]) +row +cell #[code vocab] +cell #[code Vocab] +cell The parent vocabulary. +row +cell #[code orth] +cell int +cell The orth id of the lexeme. +row("foot") +cell returns +cell #[code Lexeme] +cell The newly constructed object. +h(2, "set_flag") Lexeme.set_flag +tag method p Change the value of a boolean flag. +aside-code("Example"). COOL_FLAG = nlp.vocab.add_flag(lambda text: False) nlp.vocab[u'spaCy'].set_flag(COOL_FLAG, True) +table(["Name", "Type", "Description"]) +row +cell #[code flag_id] +cell int +cell The attribute ID of the flag to set. +row +cell #[code value] +cell bool +cell The new value of the flag. +h(2, "check_flag") Lexeme.check_flag +tag method p Check the value of a boolean flag. +aside-code("Example"). is_my_library = lambda text: text in ['spaCy', 'Thinc'] MY_LIBRARY = nlp.vocab.add_flag(is_my_library) assert nlp.vocab[u'spaCy'].check_flag(MY_LIBRARY) == True +table(["Name", "Type", "Description"]) +row +cell #[code flag_id] +cell int +cell The attribute ID of the flag to query. +row("foot") +cell returns +cell bool +cell The value of the flag. +h(2, "similarity") Lexeme.similarity +tag method +tag-model("vectors") p Compute a semantic similarity estimate. Defaults to cosine over vectors. +aside-code("Example"). apple = nlp.vocab[u'apple'] orange = nlp.vocab[u'orange'] apple_orange = apple.similarity(orange) orange_apple = orange.similarity(apple) assert apple_orange == orange_apple +table(["Name", "Type", "Description"]) +row +cell other +cell - +cell | The object to compare with. By default, accepts #[code Doc], | #[code Span], #[code Token] and #[code Lexeme] objects. +row("foot") +cell returns +cell float +cell A scalar similarity score. Higher is more similar. +h(2, "has_vector") Lexeme.has_vector +tag property +tag-model("vectors") p | A boolean value indicating whether a word vector is associated with the | lexeme. +aside-code("Example"). apple = nlp.vocab[u'apple'] assert apple.has_vector +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell bool +cell Whether the lexeme has a vector data attached. +h(2, "vector") Lexeme.vector +tag property +tag-model("vectors") p A real-valued meaning representation. +aside-code("Example"). apple = nlp.vocab[u'apple'] assert apple.vector.dtype == 'float32' assert apple.vector.shape == (300,) +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']] +cell A 1D numpy array representing the lexeme's semantics. +h(2, "vector_norm") Lexeme.vector_norm +tag property +tag-model("vectors") p The L2 norm of the lexeme's vector representation. +aside-code("Example"). apple = nlp.vocab[u'apple'] pasta = nlp.vocab[u'pasta'] apple.vector_norm # 7.1346845626831055 pasta.vector_norm # 7.759851932525635 assert apple.vector_norm != pasta.vector_norm +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell float +cell The L2 norm of the vector representation. +h(2, "attributes") Attributes +table(["Name", "Type", "Description"]) +row +cell #[code vocab] +cell #[code Vocab] +cell The lexeme's vocabulary. +row +cell #[code text] +cell unicode +cell Verbatim text content. +row +cell #[code orth] +cell int +cell ID of the verbatim text content. +row +cell #[code orth_] +cell unicode +cell | Verbatim text content (identical to #[code Lexeme.text]). Existst | mostly for consistency with the other attributes. +row +cell #[code lex_id] +cell int +cell ID of the lexeme's lexical type. +row +cell #[code rank] +cell int +cell | Sequential ID of the lexemes's lexical type, used to index into | tables, e.g. for word vectors. +row +cell #[code flags] +cell int +cell Container of the lexeme's binary flags. +row +cell #[code norm] +cell int +cell The lexemes's norm, i.e. a normalised form of the lexeme text. +row +cell #[code norm_] +cell unicode +cell The lexemes's norm, i.e. a normalised form of the lexeme text. +row +cell #[code lower] +cell int +cell Lowercase form of the word. +row +cell #[code lower_] +cell unicode +cell Lowercase form of the word. +row +cell #[code shape] +cell int +cell Transform of the word's string, to show orthographic features. +row +cell #[code shape_] +cell unicode +cell Transform of the word's string, to show orthographic features. +row +cell #[code prefix] +cell int +cell | Length-N substring from the start of the word. Defaults to | #[code N=1]. +row +cell #[code prefix_] +cell unicode +cell | Length-N substring from the start of the word. Defaults to | #[code N=1]. +row +cell #[code suffix] +cell int +cell | Length-N substring from the end of the word. Defaults to | #[code N=3]. +row +cell #[code suffix_] +cell unicode +cell | Length-N substring from the start of the word. Defaults to | #[code N=3]. +row +cell #[code is_alpha] +cell bool +cell | Does the lexeme consist of alphabetic characters? Equivalent to | #[code lexeme.text.isalpha()]. +row +cell #[code is_ascii] +cell bool +cell | Does the lexeme consist of ASCII characters? Equivalent to | #[code [any(ord(c) >= 128 for c in lexeme.text)]]. +row +cell #[code is_digit] +cell bool +cell | Does the lexeme consist of digits? Equivalent to | #[code lexeme.text.isdigit()]. +row +cell #[code is_lower] +cell bool +cell | Is the lexeme in lowercase? Equivalent to | #[code lexeme.text.islower()]. +row +cell #[code is_upper] +cell bool +cell | Is the lexeme in uppercase? Equivalent to | #[code lexeme.text.isupper()]. +row +cell #[code is_title] +cell bool +cell | Is the lexeme in titlecase? Equivalent to | #[code lexeme.text.istitle()]. +row +cell #[code is_punct] +cell bool +cell Is the lexeme punctuation? +row +cell #[code is_left_punct] +cell bool +cell Is the lexeme a left punctuation mark, e.g. #[code (]? +row +cell #[code is_right_punct] +cell bool +cell Is the lexeme a right punctuation mark, e.g. #[code )]? +row +cell #[code is_space] +cell bool +cell | Does the lexeme consist of whitespace characters? Equivalent to | #[code lexeme.text.isspace()]. +row +cell #[code is_bracket] +cell bool +cell Is the lexeme a bracket? +row +cell #[code is_quote] +cell bool +cell Is the lexeme a quotation mark? +row +cell #[code like_url] +cell bool +cell Does the lexeme resemble a URL? +row +cell #[code like_num] +cell bool +cell Does the lexeme represent a number? e.g. "10.9", "10", "ten", etc. +row +cell #[code like_email] +cell bool +cell Does the lexeme resemble an email address? +row +cell #[code is_oov] +cell bool +cell Is the lexeme out-of-vocabulary? +row +cell #[code is_stop] +cell bool +cell Is the lexeme part of a "stop list"? +row +cell #[code lang] +cell int +cell Language of the parent vocabulary. +row +cell #[code lang_] +cell unicode +cell Language of the parent vocabulary. +row +cell #[code prob] +cell float +cell Smoothed log probability estimate of the lexeme's type. +row +cell #[code cluster] +cell int +cell Brown cluster ID. +row +cell #[code sentiment] +cell float +cell | A scalar value indicating the positivity or negativity of the | lexeme.