//- πŸ’« DOCS > API > TOKEN include ../../_includes/_mixins p An individual token β€” i.e. a word, punctuation symbol, whitespace, etc. +h(2, "attributes") Attributes +table(["Name", "Type", "Description"]) +row +cell #[code vocab] +cell #[code Vocab] +cell The vocab object of the parent #[code Doc]. +row +cell #[code doc] +cell #[code Doc] +cell The parent document. +row +cell #[code i] +cell int +cell The index of the token within the parent document. +row +cell #[code ent_type] +cell int +cell Named entity type. +row +cell #[code ent_type_] +cell unicode +cell Named entity type. +row +cell #[code ent_iob] +cell int +cell | IOB code of named entity tag. | #[code 1="I", 2="O", 3="B"]. #[code 0] means no tag is assigned. +row +cell #[code ent_iob_] +cell unicode +cell | IOB code of named entity tag. #[code "B"] | means the token begins an entity, #[code "I"] means it inside an | entity, #[code "O"] means it is outside an entity, and | #[code ""] means no entity tag is set. +row +cell #[code ent_id] +cell int +cell ID of the entity the token is an instance of, if any. +row +cell #[code ent_id_] +cell unicode +cell ID of the entity the token is an instance of, if any. +row +cell #[code lemma] +cell int +cell | Base form of the word, with no inflectional suffixes. +row +cell #[code lemma_] +cell unicode +cell Base form of the word, with no inflectional suffixes. +row +cell #[code lower] +cell int +cell Lower-case form of the word. +row +cell #[code lower_] +cell unicode +cell Lower-case form of the word. +row +cell #[code shape] +cell int +cell Transform of the word's string, to show orthographic features. +row +cell #[code shape_] +cell unicode +cell A transform of the word's string, to show orthographic features. +row +cell #[code prefix] +cell int +cell Integer ID of a length-N substring from the start of the | word. Defaults to #[code N=1]. +row +cell #[code prefix_] +cell unicode +cell | A length-N substring from the start of the word. Defaults to | #[code N=1]. +row +cell #[code suffix] +cell int +cell | Length-N substring from the end of the word. Defaults to #[code N=3]. +row +cell #[code suffix_] +cell unicode +cell Length-N substring from the end of the word. Defaults to #[code N=3]. +row +cell #[code is_alpha] +cell bool +cell Equivalent to #[code word.orth_.isalpha()]. +row +cell #[code is_ascii] +cell bool +cell Equivalent to #[code [any(ord(c) >= 128 for c in word.orth_)]]. +row +cell #[code is_digit] +cell bool +cell Equivalent to #[code word.orth_.isdigit()]. +row +cell #[code is_lower] +cell bool +cell Equivalent to #[code word.orth_.islower()]. +row +cell #[code is_title] +cell bool +cell Equivalent to #[code word.orth_.istitle()]. +row +cell #[code is_punct] +cell bool +cell Equivalent to #[code word.orth_.ispunct()]. +row +cell #[code is_space] +cell bool +cell Equivalent to #[code word.orth_.isspace()]. +row +cell #[code like_url] +cell bool +cell Does the word resemble a URL? +row +cell #[code like_num] +cell bool +cell Does the word represent a number? e.g. β€œ10.9”, β€œ10”, β€œten”, etc. +row +cell #[code like_email] +cell bool +cell Does the word resemble an email address? +row +cell #[code is_oov] +cell bool +cell Is the word out-of-vocabulary? +row +cell #[code is_stop] +cell bool +cell Is the word part of a "stop list"? +row +cell #[code pos] +cell int +cell Coarse-grained part-of-speech. +row +cell #[code pos_] +cell unicode +cell Coarse-grained part-of-speech. +row +cell #[code tag] +cell int +cell Fine-grained part-of-speech. +row +cell #[code tag_] +cell unicode +cell Fine-grained part-of-speech. +row +cell #[code dep] +cell int +cell Syntactic dependency relation. +row +cell #[code dep_] +cell unicode +cell Syntactic dependency relation. +row +cell #[code lang] +cell int +cell Language of the parent document's vocabulary. +row +cell #[code lang_] +cell unicode +cell Language of the parent document's vocabulary. +row +cell #[code prob] +cell float +cell Smoothed log probability estimate of token's type. +row +cell #[code idx] +cell int +cell The character offset of the token within the parent document. +row +cell #[code sentiment] +cell float +cell A scalar value indicating the positivity or negativity of the token. +row +cell #[code lex_id] +cell int +cell ID of the token's lexical type. +row +cell #[code text] +cell unicode +cell Verbatim text content. +row +cell #[code text_with_ws] +cell unicode +cell Text content, with trailing space character if present. +row +cell #[code whitespace] +cell int +cell Trailing space character if present. +row +cell #[code whitespace_] +cell unicode +cell Trailing space character if present. +h(2, "init") Token.__init__ +tag method p Construct a #[code Token] object. +table(["Name", "Type", "Description"]) +row +cell #[code vocab] +cell #[code Vocab] +cell A storage container for lexical types. +row +cell #[code doc] +cell #[code Doc] +cell The parent document. +row +cell #[code offset] +cell int +cell The index of the token within the document. +footrow +cell return +cell #[code Token] +cell The newly constructed object. +h(2, "len") Token.__len__ +tag method p Get the number of unicode characters in the token. +table(["Name", "Type", "Description"]) +footrow +cell return +cell int +cell The number of unicode characters in the token. +h(2, "check_flag") Token.check_flag +tag method p Check the value of a boolean flag. +table(["Name", "Type", "Description"]) +row +cell #[code flag_id] +cell int +cell The attribute ID of the flag to check. +footrow +cell return +cell bool +cell Whether the flag is set. +h(2, "nbor") Token.nbor +tag method p Get a neighboring token. +table(["Name", "Type", "Description"]) +row +cell #[code i] +cell int +cell The relative position of the token to get. Defaults to #[code 1]. +footrow +cell return +cell #[code Token] +cell The token at position #[code self.doc[self.i+i]] +h(2, "similarity") Token.similarity +tag method p Compute a semantic similarity estimate. Defaults to cosine over vectors. +table(["Name", "Type", "Description"]) +row +cell other +cell - +cell | The object to compare with. By default, accepts #[code Doc], | #[code Span], #[code Token] and #[code Lexeme] objects. +footrow +cell return +cell float +cell A scalar similarity score. Higher is more similar. +h(2, "is_ancestor") Token.is_ancestor +tag method p | Check whether this token is a parent, grandparent, etc. of another | in the dependency tree. +table(["Name", "Type", "Description"]) +row +cell descendant +cell #[code Token] +cell Another token. +footrow +cell return +cell bool +cell Whether this token is the ancestor of the descendant. +h(2, "vector") Token.vector +tag property p A real-valued meaning representation. +table(["Name", "Type", "Description"]) +footrow +cell return +cell #[code numpy.ndarray[ndim=1, dtype='float32']] +cell A 1D numpy array representing the token's semantics. +h(2, "has_vector") Token.has_vector +tag property p | A boolean value indicating whether a word vector is associated with the | object. +table(["Name", "Type", "Description"]) +footrow +cell return +cell bool +cell Whether the token has a vector data attached. +h(2, "head") Token.head +tag property p The syntactic parent, or "governor", of this token. +table(["Name", "Type", "Description"]) +footrow +cell return +cell #[code Token] +cell The head. +h(2, "conjuncts") Token.conjuncts +tag property p A sequence of coordinated tokens, including the token itself. +table(["Name", "Type", "Description"]) +footrow +cell yield +cell #[code Token] +cell A coordinated token. +h(2, "children") Token.children +tag property p A sequence of the token's immediate syntactic children. +table(["Name", "Type", "Description"]) +footrow +cell yield +cell #[code Token] +cell A child token such that #[code child.head==self]. +h(2, "subtree") Token.subtree +tag property p A sequence of all the token's syntactic descendents. +table(["Name", "Type", "Description"]) +footrow +cell yield +cell #[code Token] +cell A descendant token such that #[code self.is_ancestor(descendant)]. +h(2, "left_edge") Token.left_edge +tag property p The leftmost token of this token's syntactic descendants. +table(["Name", "Type", "Description"]) +footrow +cell return +cell #[code Token] +cell The first token such that #[code self.is_ancestor(token)]. +h(2, "right_edge") Token.right_edge +tag property p The rightmost token of this token's syntactic descendents. +table(["Name", "Type", "Description"]) +footrow +cell return +cell #[code Token] +cell The last token such that #[code self.is_ancestor(token)]. +h(2, "ancestors") Token.ancestors +tag property p The rightmost token of this token's syntactic descendants. +table(["Name", "Type", "Description"]) +footrow +cell yield +cell #[code Token] +cell | A sequence of ancestor tokens such that | #[code ancestor.is_ancestor(self)].