//- 💫 DOCS > API > SPAN include ../_includes/_mixins p A slice from a #[+api("doc") #[code Doc]] object. +h(2, "init") Span.__init__ +tag method p Create a Span object from the #[code slice doc[start : end]]. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') span = doc[1:4] assert [t.text for t in span] == [u'it', u'back', u'!'] +table(["Name", "Type", "Description"]) +row +cell #[code doc] +cell #[code Doc] +cell The parent document. +row +cell #[code start] +cell int +cell The index of the first token of the span. +row +cell #[code end] +cell int +cell The index of the first token after the span. +row +cell #[code label] +cell int +cell A label to attach to the span, e.g. for named entities. +row +cell #[code vector] +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']] +cell A meaning representation of the span. +row("foot") +cell returns +cell #[code Span] +cell The newly constructed object. +h(2, "getitem") Span.__getitem__ +tag method p Get a #[code Token] object. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') span = doc[1:4] assert span[1].text == 'back' +table(["Name", "Type", "Description"]) +row +cell #[code i] +cell int +cell The index of the token within the span. +row("foot") +cell returns +cell #[code Token] +cell The token at #[code span[i]]. p Get a #[code Span] object. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') span = doc[1:4] assert span[1:3].text == 'back!' +table(["Name", "Type", "Description"]) +row +cell #[code start_end] +cell tuple +cell The slice of the span to get. +row("foot") +cell returns +cell #[code Span] +cell The span at #[code span[start : end]]. +h(2, "iter") Span.__iter__ +tag method p Iterate over #[code Token] objects. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') span = doc[1:4] assert [t.text for t in span] == ['it', 'back', '!'] +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell #[code Token] +cell A #[code Token] object. +h(2, "len") Span.__len__ +tag method p Get the number of tokens in the span. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') span = doc[1:4] assert len(span) == 3 +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell int +cell The number of tokens in the span. +h(2, "set_extension") Span.set_extension +tag classmethod +tag-new(2) p | Define a custom attribute on the #[code Span] which becomes available via | #[code Span._]. For details, see the documentation on | #[+a("/usage/processing-pipelines#custom-components-attributes") custom attributes]. +aside-code("Example"). from spacy.tokens import Span city_getter = lambda span: any(city in span.text for city in ('New York', 'Paris', 'Berlin')) Span.set_extension('has_city', getter=city_getter) doc = nlp(u'I like New York in Autumn') assert doc[1:4]._.has_city +table(["Name", "Type", "Description"]) +row +cell #[code name] +cell unicode +cell | Name of the attribute to set by the extension. For example, | #[code 'my_attr'] will be available as #[code span._.my_attr]. +row +cell #[code default] +cell - +cell | Optional default value of the attribute if no getter or method | is defined. +row +cell #[code method] +cell callable +cell | Set a custom method on the object, for example | #[code span._.compare(other_span)]. +row +cell #[code getter] +cell callable +cell | Getter function that takes the object and returns an attribute | value. Is called when the user accesses the #[code ._] attribute. +row +cell #[code setter] +cell callable +cell | Setter function that takes the #[code Span] and a value, and | modifies the object. Is called when the user writes to the | #[code Span._] attribute. +h(2, "get_extension") Span.get_extension +tag classmethod +tag-new(2) p | Look up a previously registered extension by name. Returns a 4-tuple | #[code.u-break (default, method, getter, setter)] if the extension is | registered. Raises a #[code KeyError] otherwise. +aside-code("Example"). from spacy.tokens import Span Span.set_extension('is_city', default=False) extension = Span.get_extension('is_city') assert extension == (False, None, None, None) +table(["Name", "Type", "Description"]) +row +cell #[code name] +cell unicode +cell Name of the extension. +row("foot") +cell returns +cell tuple +cell | A #[code.u-break (default, method, getter, setter)] tuple of the | extension. +h(2, "has_extension") Span.has_extension +tag classmethod +tag-new(2) p Check whether an extension has been registered on the #[code Span] class. +aside-code("Example"). from spacy.tokens import Span Span.set_extension('is_city', default=False) assert Span.has_extension('is_city') +table(["Name", "Type", "Description"]) +row +cell #[code name] +cell unicode +cell Name of the extension to check. +row("foot") +cell returns +cell bool +cell Whether the extension has been registered. +h(2, "remove_extension") Span.remove_extension +tag classmethod +tag-new("2.0.12") p Remove a previously registered extension. +aside-code("Example"). from spacy.tokens import Span Span.set_extension('is_city', default=False) removed = Span.remove_extension('is_city') assert not Span.has_extension('is_city') +table(["Name", "Type", "Description"]) +row +cell #[code name] +cell unicode +cell Name of the extension. +row("foot") +cell returns +cell tuple +cell | A #[code.u-break (default, method, getter, setter)] tuple of the | removed extension. +h(2, "similarity") Span.similarity +tag method +tag-model("vectors") p | Make a semantic similarity estimate. The default estimate is cosine | similarity using an average of word vectors. +aside-code("Example"). doc = nlp(u'green apples and red oranges') green_apples = doc[:2] red_oranges = doc[3:] apples_oranges = green_apples.similarity(red_oranges) oranges_apples = red_oranges.similarity(green_apples) assert apples_oranges == oranges_apples +table(["Name", "Type", "Description"]) +row +cell #[code other] +cell - +cell | The object to compare with. By default, accepts #[code Doc], | #[code Span], #[code Token] and #[code Lexeme] objects. +row("foot") +cell returns +cell float +cell A scalar similarity score. Higher is more similar. +h(2, "get_lca_matrix") Span.get_lca_matrix +tag method p | Calculates the lowest common ancestor matrix for a given #[code Span]. | Returns LCA matrix containing the integer index of the ancestor, or | #[code -1] if no common ancestor is found, e.g. if span excludes a | necessary ancestor. +aside-code("Example"). doc = nlp(u'I like New York in Autumn') span = doc[1:4] matrix = span.get_lca_matrix() # array([[0, 0, 0], [0, 1, 2], [0, 2, 2]], dtype=int32) +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']] +cell The lowest common ancestor matrix of the #[code Span]. +h(2, "to_array") Span.to_array +tag method +tag-new(2) p | Given a list of #[code M] attribute IDs, export the tokens to a numpy | #[code ndarray] of shape #[code (N, M)], where #[code N] is the length of | the document. The values will be 32-bit integers. +aside-code("Example"). from spacy.attrs import LOWER, POS, ENT_TYPE, IS_ALPHA doc = nlp(u'I like New York in Autumn.') span = doc[2:3] # All strings mapped to integers, for easy export to numpy np_array = span.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA]) +table(["Name", "Type", "Description"]) +row +cell #[code attr_ids] +cell list +cell A list of attribute ID ints. +row("foot") +cell returns +cell #[code.u-break numpy.ndarray[long, ndim=2]] +cell | A feature matrix, with one row per word, and one column per | attribute indicated in the input #[code attr_ids]. +h(2, "merge") Span.merge +tag method p Retokenize the document, such that the span is merged into a single token. +aside-code("Example"). doc = nlp(u'I like New York in Autumn.') span = doc[2:4] span.merge() assert len(doc) == 6 assert doc[2].text == 'New York' +table(["Name", "Type", "Description"]) +row +cell #[code **attributes] +cell - +cell | Attributes to assign to the merged token. By default, attributes | are inherited from the syntactic root token of the span. +row("foot") +cell returns +cell #[code Token] +cell The newly merged token. +h(2, "ents") Span.ents +tag property +tag-model("NER") +tag-new("2.0.12") p | Iterate over the entities in the span. Yields named-entity | #[code Span] objects, if the entity recognizer has been applied to the | parent document. +aside-code("Example"). doc = nlp(u'Mr. Best flew to New York on Saturday morning.') span = doc[0:6] ents = list(span.ents) assert ents[0].label == 346 assert ents[0].label_ == 'PERSON' assert ents[0].text == 'Mr. Best' +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell #[code Span] +cell Entities in the document. +h(2, "as_doc") Span.as_doc p | Create a new #[code Doc] object corresponding to the #[code Span], with | a copy of the data. +aside-code("Example"). doc = nlp(u'I like New York in Autumn.') span = doc[2:4] doc2 = span.as_doc() assert doc2.text == 'New York' +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell #[code Doc] +cell A #[code Doc] object of the #[code Span]'s content. +h(2, "root") Span.root +tag property +tag-model("parse") p | The token within the span that's highest in the parse tree. If there's a | tie, the earliest is preferred. +aside-code("Example"). doc = nlp(u'I like New York in Autumn.') i, like, new, york, in_, autumn, dot = range(len(doc)) assert doc[new].head.text == 'York' assert doc[york].head.text == 'like' new_york = doc[new:york+1] assert new_york.root.text == 'York' +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell #[code Token] +cell The root token. +h(2, "lefts") Span.lefts +tag property +tag-model("parse") p Tokens that are to the left of the span, whose heads are within the span. +aside-code("Example"). doc = nlp(u'I like New York in Autumn.') lefts = [t.text for t in doc[3:7].lefts] assert lefts == [u'New'] +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell #[code Token] +cell A left-child of a token of the span. +h(2, "rights") Span.rights +tag property +tag-model("parse") p Tokens that are to the right of the span, whose heads are within the span. +aside-code("Example"). doc = nlp(u'I like New York in Autumn.') rights = [t.text for t in doc[2:4].rights] assert rights == [u'in'] +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell #[code Token] +cell A right-child of a token of the span. +h(2, "n_lefts") Span.n_lefts +tag property +tag-model("parse") p | The number of tokens that are to the left of the span, whose heads are | within the span. +aside-code("Example"). doc = nlp(u'I like New York in Autumn.') assert doc[3:7].n_lefts == 1 +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell int +cell The number of left-child tokens. +h(2, "n_rights") Span.n_rights +tag property +tag-model("parse") p | The number of tokens that are to the right of the span, whose heads are | within the span. +aside-code("Example"). doc = nlp(u'I like New York in Autumn.') assert doc[2:4].n_rights == 1 +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell int +cell The number of right-child tokens. +h(2, "subtree") Span.subtree +tag property +tag-model("parse") p Tokens within the span and tokens which descend from them. +aside-code("Example"). doc = nlp(u'Give it back! He pleaded.') subtree = [t.text for t in doc[:3].subtree] assert subtree == [u'Give', u'it', u'back', u'!'] +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell #[code Token] +cell A token within the span, or a descendant from it. +h(2, "has_vector") Span.has_vector +tag property +tag-model("vectors") p | A boolean value indicating whether a word vector is associated with the | object. +aside-code("Example"). doc = nlp(u'I like apples') assert doc[1:].has_vector +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell bool +cell Whether the span has a vector data attached. +h(2, "vector") Span.vector +tag property +tag-model("vectors") p | A real-valued meaning representation. Defaults to an average of the | token vectors. +aside-code("Example"). doc = nlp(u'I like apples') assert doc[1:].vector.dtype == 'float32' assert doc[1:].vector.shape == (300,) +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']] +cell A 1D numpy array representing the span's semantics. +h(2, "vector_norm") Span.vector_norm +tag property +tag-model("vectors") p | The L2 norm of the span's vector representation. +aside-code("Example"). doc = nlp(u'I like apples') doc[1:].vector_norm # 4.800883928527915 doc[2:].vector_norm # 6.895897646384268 assert doc[1:].vector_norm != doc[2:].vector_norm +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell float +cell The L2 norm of the vector representation. +h(2, "attributes") Attributes +table(["Name", "Type", "Description"]) +row +cell #[code doc] +cell #[code Doc] +cell The parent document. +row +cell #[code sent] +cell #[code Span] +cell The sentence span that this span is a part of. +row +cell #[code start] +cell int +cell The token offset for the start of the span. +row +cell #[code end] +cell int +cell The token offset for the end of the span. +row +cell #[code start_char] +cell int +cell The character offset for the start of the span. +row +cell #[code end_char] +cell int +cell The character offset for the end of the span. +row +cell #[code text] +cell unicode +cell A unicode representation of the span text. +row +cell #[code text_with_ws] +cell unicode +cell | The text content of the span with a trailing whitespace character | if the last token has one. +row +cell #[code orth] +cell int +cell ID of the verbatim text content. +row +cell #[code orth_] +cell unicode +cell | Verbatim text content (identical to #[code Span.text]). Exists | mostly for consistency with the other attributes. +row +cell #[code label] +cell int +cell The span's label. +row +cell #[code label_] +cell unicode +cell The span's label. +row +cell #[code lemma_] +cell unicode +cell The span's lemma. +row +cell #[code ent_id] +cell int +cell The hash value of the named entity the token is an instance of. +row +cell #[code ent_id_] +cell unicode +cell The string ID of the named entity the token is an instance of. +row +cell #[code sentiment] +cell float +cell | A scalar value indicating the positivity or negativity of the | span. +row +cell #[code _] +cell #[code Underscore] +cell | User space for adding custom | #[+a("/usage/processing-pipelines#custom-components-attributes") attribute extensions].