//- πŸ’« DOCS > API > TOKEN include ../../_includes/_mixins p An individual token β€” i.e. a word, punctuation symbol, whitespace, etc. +h(2, "init") Token.__init__ +tag method p Construct a #[code Token] object. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') token = doc[0] assert token.text == u'Give' +table(["Name", "Type", "Description"]) +row +cell #[code vocab] +cell #[code Vocab] +cell A storage container for lexical types. +row +cell #[code doc] +cell #[code Doc] +cell The parent document. +row +cell #[code offset] +cell int +cell The index of the token within the document. +footrow +cell returns +cell #[code Token] +cell The newly constructed object. +h(2, "len") Token.__len__ +tag method p The number of unicode characters in the token, i.e. #[code token.text]. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') token = doc[0] assert len(token) == 4 +table(["Name", "Type", "Description"]) +footrow +cell returns +cell int +cell The number of unicode characters in the token. +h(2, "check_flag") Token.check_flag +tag method p Check the value of a boolean flag. +aside-code("Example"). from spacy.attrs import IS_TITLE doc = nlp(u'Give it back! He pleaded.') token = doc[0] assert token.check_flag(IS_TITLE) == True +table(["Name", "Type", "Description"]) +row +cell #[code flag_id] +cell int +cell The attribute ID of the flag to check. +footrow +cell returns +cell bool +cell Whether the flag is set. +h(2, "similarity") Token.similarity +tag method +tag-model("vectors") p Compute a semantic similarity estimate. Defaults to cosine over vectors. +aside-code("Example"). apples, _, oranges = nlp(u'apples and oranges') apples_oranges = apples.similarity(oranges) oranges_apples = oranges.similarity(apples) assert apples_oranges == oranges_apples +table(["Name", "Type", "Description"]) +row +cell other +cell - +cell | The object to compare with. By default, accepts #[code Doc], | #[code Span], #[code Token] and #[code Lexeme] objects. +footrow +cell returns +cell float +cell A scalar similarity score. Higher is more similar. +h(2, "nbor") Token.nbor +tag method p Get a neighboring token. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') give_nbor = doc[0].nbor() assert give_nbor.text == u'it' +table(["Name", "Type", "Description"]) +row +cell #[code i] +cell int +cell The relative position of the token to get. Defaults to #[code 1]. +footrow +cell returns +cell #[code Token] +cell The token at position #[code self.doc[self.i+i]]. +h(2, "is_ancestor") Token.is_ancestor +tag method +tag-model("parse") p | Check whether this token is a parent, grandparent, etc. of another | in the dependency tree. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') give = doc[0] it = doc[1] assert give.is_ancestor(it) +table(["Name", "Type", "Description"]) +row +cell descendant +cell #[code Token] +cell Another token. +footrow +cell returns +cell bool +cell Whether this token is the ancestor of the descendant. +h(2, "ancestors") Token.ancestors +tag property +tag-model("parse") p The rightmost token of this token's syntactic descendants. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') it_ancestors = doc[1].ancestors assert [t.text for t in it_ancestors] == [u'Give'] he_ancestors = doc[4].ancestors assert [t.text for t in he_ancestors] == [u'pleaded'] +table(["Name", "Type", "Description"]) +footrow +cell yields +cell #[code Token] +cell | A sequence of ancestor tokens such that | #[code ancestor.is_ancestor(self)]. +h(2, "conjuncts") Token.conjuncts +tag property +tag-model("parse") p A sequence of coordinated tokens, including the token itself. +aside-code("Example"). doc = nlp(u'I like apples and oranges') apples_conjuncts = doc[2].conjuncts assert [t.text for t in apples_conjuncts] == [u'oranges'] +table(["Name", "Type", "Description"]) +footrow +cell yields +cell #[code Token] +cell A coordinated token. +h(2, "children") Token.children +tag property +tag-model("parse") p A sequence of the token's immediate syntactic children. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') give_children = doc[0].children assert [t.text for t in give_children] == [u'it', u'back', u'!'] +table(["Name", "Type", "Description"]) +footrow +cell yields +cell #[code Token] +cell A child token such that #[code child.head==self]. +h(2, "subtree") Token.subtree +tag property +tag-model("parse") p A sequence of all the token's syntactic descendents. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') give_subtree = doc[0].subtree assert [t.text for t in give_subtree] == [u'Give', u'it', u'back', u'!'] +table(["Name", "Type", "Description"]) +footrow +cell yields +cell #[code Token] +cell A descendant token such that #[code self.is_ancestor(descendant)]. +h(2, "has_vector") Token.has_vector +tag property +tag-model("vectors") p | A boolean value indicating whether a word vector is associated with the | token. +aside-code("Example"). doc = nlp(u'I like apples') apples = doc[2] assert apples.has_vector +table(["Name", "Type", "Description"]) +footrow +cell returns +cell bool +cell Whether the token has a vector data attached. +h(2, "vector") Token.vector +tag property +tag-model("vectors") p A real-valued meaning representation. +aside-code("Example"). doc = nlp(u'I like apples') apples = doc[2] assert apples.vector.dtype == 'float32' assert apples.vector.shape == (300,) +table(["Name", "Type", "Description"]) +footrow +cell returns +cell #[code numpy.ndarray[ndim=1, dtype='float32']] +cell A 1D numpy array representing the token's semantics. +h(2, "vector_norm") Span.vector_norm +tag property +tag-model("vectors") p The L2 norm of the token's vector representation. +aside-code("Example"). doc = nlp(u'I like apples and pasta') apples = doc[2] pasta = doc[4] apples.vector_norm # 6.89589786529541 pasta.vector_norm # 7.759851932525635 assert apples.vector_norm != pasta.vector_norm +table(["Name", "Type", "Description"]) +footrow +cell returns +cell float +cell The L2 norm of the vector representation. +h(2, "attributes") Attributes +table(["Name", "Type", "Description"]) +row +cell #[code text] +cell unicode +cell Verbatim text content. +row +cell #[code text_with_ws] +cell unicode +cell Text content, with trailing space character if present. +row +cell #[code whitespace] +cell int +cell Trailing space character if present. +row +cell #[code whitespace_] +cell unicode +cell Trailing space character if present. +row +cell #[code vocab] +cell #[code Vocab] +cell The vocab object of the parent #[code Doc]. +row +cell #[code doc] +cell #[code Doc] +cell The parent document. +row +cell #[code head] +cell #[code Token] +cell The syntactic parent, or "governor", of this token. +row +cell #[code left_edge] +cell #[code Token] +cell The leftmost token of this token's syntactic descendants. +row +cell #[code right_edge] +cell #[code Token] +cell The rightmost token of this token's syntactic descendents. +row +cell #[code i] +cell int +cell The index of the token within the parent document. +row +cell #[code ent_type] +cell int +cell Named entity type. +row +cell #[code ent_type_] +cell unicode +cell Named entity type. +row +cell #[code ent_iob] +cell int +cell | IOB code of named entity tag. | #[code 1="I", 2="O", 3="B"]. #[code 0] means no tag is assigned. +row +cell #[code ent_iob_] +cell unicode +cell | IOB code of named entity tag. #[code "B"] | means the token begins an entity, #[code "I"] means it is inside | an entity, #[code "O"] means it is outside an entity, and | #[code ""] means no entity tag is set. +row +cell #[code ent_id] +cell int +cell | ID of the entity the token is an instance of, if any. Usually | assigned by patterns in the Matcher. +row +cell #[code ent_id_] +cell unicode +cell | ID of the entity the token is an instance of, if any. Usually | assigned by patterns in the Matcher. +row +cell #[code lemma] +cell int +cell | Base form of the word, with no inflectional suffixes. +row +cell #[code lemma_] +cell unicode +cell Base form of the word, with no inflectional suffixes. +row +cell #[code lower] +cell int +cell Lower-case form of the word. +row +cell #[code lower_] +cell unicode +cell Lower-case form of the word. +row +cell #[code shape] +cell int +cell Transform of the word's string, to show orthographic features. +row +cell #[code shape_] +cell unicode +cell A transform of the word's string, to show orthographic features. +row +cell #[code prefix] +cell int +cell Integer ID of a length-N substring from the start of the | word. Defaults to #[code N=1]. +row +cell #[code prefix_] +cell unicode +cell | A length-N substring from the start of the word. Defaults to | #[code N=1]. +row +cell #[code suffix] +cell int +cell | Length-N substring from the end of the word. Defaults to #[code N=3]. +row +cell #[code suffix_] +cell unicode +cell Length-N substring from the end of the word. Defaults to #[code N=3]. +row +cell #[code is_alpha] +cell bool +cell Equivalent to #[code word.orth_.isalpha()]. +row +cell #[code is_ascii] +cell bool +cell Equivalent to #[code [any(ord(c) >= 128 for c in word.orth_)]]. +row +cell #[code is_digit] +cell bool +cell Equivalent to #[code word.orth_.isdigit()]. +row +cell #[code is_lower] +cell bool +cell Equivalent to #[code word.orth_.islower()]. +row +cell #[code is_title] +cell bool +cell Equivalent to #[code word.orth_.istitle()]. +row +cell #[code is_punct] +cell bool +cell Equivalent to #[code word.orth_.ispunct()]. +row +cell #[code is_space] +cell bool +cell Equivalent to #[code word.orth_.isspace()]. +row +cell #[code like_url] +cell bool +cell Does the word resemble a URL? +row +cell #[code like_num] +cell bool +cell Does the word represent a number? e.g. β€œ10.9”, β€œ10”, β€œten”, etc. +row +cell #[code like_email] +cell bool +cell Does the word resemble an email address? +row +cell #[code is_oov] +cell bool +cell Is the word out-of-vocabulary? +row +cell #[code is_stop] +cell bool +cell Is the word part of a "stop list"? +row +cell #[code pos] +cell int +cell Coarse-grained part-of-speech. +row +cell #[code pos_] +cell unicode +cell Coarse-grained part-of-speech. +row +cell #[code tag] +cell int +cell Fine-grained part-of-speech. +row +cell #[code tag_] +cell unicode +cell Fine-grained part-of-speech. +row +cell #[code dep] +cell int +cell Syntactic dependency relation. +row +cell #[code dep_] +cell unicode +cell Syntactic dependency relation. +row +cell #[code lang] +cell int +cell Language of the parent document's vocabulary. +row +cell #[code lang_] +cell unicode +cell Language of the parent document's vocabulary. +row +cell #[code prob] +cell float +cell Smoothed log probability estimate of token's type. +row +cell #[code idx] +cell int +cell The character offset of the token within the parent document. +row +cell #[code sentiment] +cell float +cell A scalar value indicating the positivity or negativity of the token. +row +cell #[code lex_id] +cell int +cell ID of the token's lexical type.