mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Update docstrings for Span class
This commit is contained in:
		
							parent
							
								
									b87066ff10
								
							
						
					
					
						commit
						593361ee3c
					
				| 
						 | 
				
			
			@ -20,22 +20,17 @@ from .. import about
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
cdef class Span:
 | 
			
		||||
    """
 | 
			
		||||
    A slice from a Doc object.
 | 
			
		||||
    """
 | 
			
		||||
    """A slice from a Doc object."""
 | 
			
		||||
    def __cinit__(self, Doc doc, int start, int end, int label=0, vector=None,
 | 
			
		||||
                  vector_norm=None):
 | 
			
		||||
        """
 | 
			
		||||
        Create a Span object from the slice doc[start : end]
 | 
			
		||||
        """Create a `Span` object from the slice `doc[start : end]`.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            doc (Doc): The parent document.
 | 
			
		||||
            start (int): The index of the first token of the span.
 | 
			
		||||
            end (int): The index of the first token after the span.
 | 
			
		||||
            label (int): A label to attach to the Span, e.g. for named entities.
 | 
			
		||||
            vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
 | 
			
		||||
        Returns:
 | 
			
		||||
            Span The newly constructed object.
 | 
			
		||||
        doc (Doc): The parent document.
 | 
			
		||||
        start (int): The index of the first token of the span.
 | 
			
		||||
        end (int): The index of the first token after the span.
 | 
			
		||||
        label (int): A label to attach to the Span, e.g. for named entities.
 | 
			
		||||
        vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
 | 
			
		||||
        RETURNS (Span): The newly constructed object.
 | 
			
		||||
        """
 | 
			
		||||
        if not (0 <= start <= end <= len(doc)):
 | 
			
		||||
            raise IndexError
 | 
			
		||||
| 
						 | 
				
			
			@ -70,7 +65,6 @@ cdef class Span:
 | 
			
		|||
    def __hash__(self):
 | 
			
		||||
        return hash((self.doc, self.label, self.start_char, self.end_char))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        self._recalculate_indices()
 | 
			
		||||
        if self.end < self.start:
 | 
			
		||||
| 
						 | 
				
			
			@ -99,30 +93,21 @@ cdef class Span:
 | 
			
		|||
            yield self.doc[i]
 | 
			
		||||
 | 
			
		||||
    def merge(self, *args, **attributes):
 | 
			
		||||
        """
 | 
			
		||||
        Retokenize the document, such that the span is merged into a single token.
 | 
			
		||||
        """Retokenize the document, such that the span is merged into a single token.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            **attributes:
 | 
			
		||||
                Attributes to assign to the merged token. By default, attributes
 | 
			
		||||
                are inherited from the syntactic root token of the span.
 | 
			
		||||
        Returns:
 | 
			
		||||
            token (Token):
 | 
			
		||||
                The newly merged token.
 | 
			
		||||
        **attributes: Attributes to assign to the merged token. By default,
 | 
			
		||||
            attributes are inherited from the syntactic root token of the span.
 | 
			
		||||
        RETURNS (Token): The newly merged token.
 | 
			
		||||
        """
 | 
			
		||||
        return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
 | 
			
		||||
 | 
			
		||||
    def similarity(self, other):
 | 
			
		||||
        """
 | 
			
		||||
        Make a semantic similarity estimate. The default estimate is cosine
 | 
			
		||||
        """ Make a semantic similarity estimate. The default estimate is cosine
 | 
			
		||||
        similarity using an average of word vectors.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            other (object): The object to compare with. By default, accepts Doc,
 | 
			
		||||
                Span, Token and Lexeme objects.
 | 
			
		||||
 | 
			
		||||
        Return:
 | 
			
		||||
            score (float): A scalar similarity score. Higher is more similar.
 | 
			
		||||
        other (object): The object to compare with. By default, accepts `Doc`,
 | 
			
		||||
            `Span`, `Token` and `Lexeme` objects.
 | 
			
		||||
        RETURNS (float): A scalar similarity score. Higher is more similar.
 | 
			
		||||
        """
 | 
			
		||||
        if 'similarity' in self.doc.user_span_hooks:
 | 
			
		||||
            self.doc.user_span_hooks['similarity'](self, other)
 | 
			
		||||
| 
						 | 
				
			
			@ -145,11 +130,9 @@ cdef class Span:
 | 
			
		|||
            self.end = end + 1
 | 
			
		||||
 | 
			
		||||
    property sent:
 | 
			
		||||
        """
 | 
			
		||||
        The sentence span that this span is a part of.
 | 
			
		||||
        """The sentence span that this span is a part of.
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Span The sentence this is part of.
 | 
			
		||||
        RETURNS (Span): The sentence span that the span is a part of.
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            if 'sent' in self.doc.user_span_hooks:
 | 
			
		||||
| 
						 | 
				
			
			@ -166,12 +149,14 @@ cdef class Span:
 | 
			
		|||
            return self.doc[root.l_edge : root.r_edge + 1]
 | 
			
		||||
 | 
			
		||||
    property has_vector:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            if 'has_vector' in self.doc.user_span_hooks:
 | 
			
		||||
                return self.doc.user_span_hooks['has_vector'](self)
 | 
			
		||||
            return any(token.has_vector for token in self)
 | 
			
		||||
 | 
			
		||||
    property vector:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            if 'vector' in self.doc.user_span_hooks:
 | 
			
		||||
                return self.doc.user_span_hooks['vector'](self)
 | 
			
		||||
| 
						 | 
				
			
			@ -180,6 +165,7 @@ cdef class Span:
 | 
			
		|||
            return self._vector
 | 
			
		||||
 | 
			
		||||
    property vector_norm:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            if 'vector_norm' in self.doc.user_span_hooks:
 | 
			
		||||
                return self.doc.user_span_hooks['vector'](self)
 | 
			
		||||
| 
						 | 
				
			
			@ -193,6 +179,7 @@ cdef class Span:
 | 
			
		|||
            return self._vector_norm
 | 
			
		||||
 | 
			
		||||
    property sentiment:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            if 'sentiment' in self.doc.user_span_hooks:
 | 
			
		||||
                return self.doc.user_span_hooks['sentiment'](self)
 | 
			
		||||
| 
						 | 
				
			
			@ -200,6 +187,7 @@ cdef class Span:
 | 
			
		|||
                return sum([token.sentiment for token in self]) / len(self)
 | 
			
		||||
 | 
			
		||||
    property text:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            text = self.text_with_ws
 | 
			
		||||
            if self[-1].whitespace_:
 | 
			
		||||
| 
						 | 
				
			
			@ -207,16 +195,17 @@ cdef class Span:
 | 
			
		|||
            return text
 | 
			
		||||
 | 
			
		||||
    property text_with_ws:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return u''.join([t.text_with_ws for t in self])
 | 
			
		||||
 | 
			
		||||
    property noun_chunks:
 | 
			
		||||
        """
 | 
			
		||||
        Yields base noun-phrase #[code Span] objects, if the document
 | 
			
		||||
        has been syntactically parsed. A base noun phrase, or
 | 
			
		||||
        'NP chunk', is a noun phrase that does not permit other NPs to
 | 
			
		||||
        be nested within it – so no NP-level coordination, no prepositional
 | 
			
		||||
        phrases, and no relative clauses. For example:
 | 
			
		||||
        """Yields base noun-phrase `Span` objects, if the document has been
 | 
			
		||||
        syntactically parsed. A base noun phrase, or "NP chunk", is a noun
 | 
			
		||||
        phrase that does not permit other NPs to be nested within it – so no
 | 
			
		||||
        NP-level coordination, no prepositional phrases, and no relative clauses.
 | 
			
		||||
 | 
			
		||||
        YIELDS (Span): Base noun-phrase `Span` objects
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            if not self.doc.is_parsed:
 | 
			
		||||
| 
						 | 
				
			
			@ -235,49 +224,47 @@ cdef class Span:
 | 
			
		|||
                yield span
 | 
			
		||||
 | 
			
		||||
    property root:
 | 
			
		||||
        """
 | 
			
		||||
        The token within the span that's highest in the parse tree. If there's a
 | 
			
		||||
        tie, the earlist is prefered.
 | 
			
		||||
        """The token within the span that's highest in the parse tree.
 | 
			
		||||
        If there's a tie, the earliest is prefered.
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Token: The root token.
 | 
			
		||||
        RETURNS (Token): The root token.
 | 
			
		||||
 | 
			
		||||
        i.e. has the shortest path to the root of the sentence (or is the root
 | 
			
		||||
        itself). If multiple words are equally high in the tree, the first word
 | 
			
		||||
        is taken. For example:
 | 
			
		||||
        EXAMPLE: The root token has the shortest path to the root of the sentence
 | 
			
		||||
            (or is the root itself). If multiple words are equally high in the
 | 
			
		||||
            tree, the first word is taken. For example:
 | 
			
		||||
 | 
			
		||||
        >>> toks = nlp(u'I like New York in Autumn.')
 | 
			
		||||
            >>> toks = nlp(u'I like New York in Autumn.')
 | 
			
		||||
 | 
			
		||||
        Let's name the indices --- easier than writing "toks[4]" etc.
 | 
			
		||||
            Let's name the indices – easier than writing `toks[4]` etc.
 | 
			
		||||
 | 
			
		||||
        >>> i, like, new, york, in_, autumn, dot = range(len(toks))
 | 
			
		||||
            >>> i, like, new, york, in_, autumn, dot = range(len(toks))
 | 
			
		||||
 | 
			
		||||
        The head of 'new' is 'York', and the head of 'York' is 'like'
 | 
			
		||||
            The head of 'new' is 'York', and the head of "York" is "like"
 | 
			
		||||
 | 
			
		||||
        >>> toks[new].head.orth_
 | 
			
		||||
        'York'
 | 
			
		||||
        >>> toks[york].head.orth_
 | 
			
		||||
        'like'
 | 
			
		||||
            >>> toks[new].head.orth_
 | 
			
		||||
            'York'
 | 
			
		||||
            >>> toks[york].head.orth_
 | 
			
		||||
            'like'
 | 
			
		||||
 | 
			
		||||
        Create a span for "New York". Its root is "York".
 | 
			
		||||
            Create a span for "New York". Its root is "York".
 | 
			
		||||
 | 
			
		||||
        >>> new_york = toks[new:york+1]
 | 
			
		||||
        >>> new_york.root.orth_
 | 
			
		||||
        'York'
 | 
			
		||||
            >>> new_york = toks[new:york+1]
 | 
			
		||||
            >>> new_york.root.orth_
 | 
			
		||||
            'York'
 | 
			
		||||
 | 
			
		||||
        Here's a more complicated case, raise by Issue #214
 | 
			
		||||
            Here's a more complicated case, raised by issue #214:
 | 
			
		||||
 | 
			
		||||
        >>> toks = nlp(u'to, north and south carolina')
 | 
			
		||||
        >>> to, north, and_, south, carolina = toks
 | 
			
		||||
        >>> south.head.text, carolina.head.text
 | 
			
		||||
        ('north', 'to')
 | 
			
		||||
            >>> toks = nlp(u'to, north and south carolina')
 | 
			
		||||
            >>> to, north, and_, south, carolina = toks
 | 
			
		||||
            >>> south.head.text, carolina.head.text
 | 
			
		||||
            ('north', 'to')
 | 
			
		||||
 | 
			
		||||
        Here 'south' is a child of 'north', which is a child of 'carolina'.
 | 
			
		||||
        Carolina is the root of the span:
 | 
			
		||||
            Here "south" is a child of "north", which is a child of "carolina".
 | 
			
		||||
            Carolina is the root of the span:
 | 
			
		||||
 | 
			
		||||
        >>> south_carolina = toks[-2:]
 | 
			
		||||
        >>> south_carolina.root.text
 | 
			
		||||
        'carolina'
 | 
			
		||||
            >>> south_carolina = toks[-2:]
 | 
			
		||||
            >>> south_carolina.root.text
 | 
			
		||||
            'carolina'
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            self._recalculate_indices()
 | 
			
		||||
| 
						 | 
				
			
			@ -314,10 +301,10 @@ cdef class Span:
 | 
			
		|||
                return self.doc[root]
 | 
			
		||||
 | 
			
		||||
    property lefts:
 | 
			
		||||
        """
 | 
			
		||||
        Tokens that are to the left of the span, whose head is within the Span.
 | 
			
		||||
        """ Tokens that are to the left of the span, whose head is within the
 | 
			
		||||
        `Span`.
 | 
			
		||||
 | 
			
		||||
        Yields: Token A left-child of a token of the span.
 | 
			
		||||
        YIELDS (Token):A left-child of a token of the span.
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            for token in reversed(self): # Reverse, so we get the tokens in order
 | 
			
		||||
| 
						 | 
				
			
			@ -326,10 +313,10 @@ cdef class Span:
 | 
			
		|||
                        yield left
 | 
			
		||||
 | 
			
		||||
    property rights:
 | 
			
		||||
        """
 | 
			
		||||
        Tokens that are to the right of the Span, whose head is within the Span.
 | 
			
		||||
        """Tokens that are to the right of the Span, whose head is within the
 | 
			
		||||
        `Span`.
 | 
			
		||||
 | 
			
		||||
        Yields: Token A right-child of a token of the span.
 | 
			
		||||
        YIELDS (Token): A right-child of a token of the span.
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            for token in self:
 | 
			
		||||
| 
						 | 
				
			
			@ -338,10 +325,9 @@ cdef class Span:
 | 
			
		|||
                        yield right
 | 
			
		||||
 | 
			
		||||
    property subtree:
 | 
			
		||||
        """
 | 
			
		||||
        Tokens that descend from tokens in the span, but fall outside it.
 | 
			
		||||
        """Tokens that descend from tokens in the span, but fall outside it.
 | 
			
		||||
 | 
			
		||||
        Yields: Token A descendant of a token within the span.
 | 
			
		||||
        YIELDS (Token): A descendant of a token within the span.
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            for word in self.lefts:
 | 
			
		||||
| 
						 | 
				
			
			@ -351,8 +337,9 @@ cdef class Span:
 | 
			
		|||
                yield from word.subtree
 | 
			
		||||
 | 
			
		||||
    property ent_id:
 | 
			
		||||
        """
 | 
			
		||||
        An (integer) entity ID. Usually assigned by patterns in the Matcher.
 | 
			
		||||
        """An (integer) entity ID. Usually assigned by patterns in the `Matcher`.
 | 
			
		||||
 | 
			
		||||
        RETURNS (int): The entity ID.
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return self.root.ent_id
 | 
			
		||||
| 
						 | 
				
			
			@ -362,9 +349,11 @@ cdef class Span:
 | 
			
		|||
            raise NotImplementedError(
 | 
			
		||||
                "Can't yet set ent_id from Span. Vote for this feature on the issue "
 | 
			
		||||
                "tracker: http://github.com/explosion/spaCy/issues")
 | 
			
		||||
 | 
			
		||||
    property ent_id_:
 | 
			
		||||
        """
 | 
			
		||||
        A (string) entity ID. Usually assigned by patterns in the Matcher.
 | 
			
		||||
        """A (string) entity ID. Usually assigned by patterns in the `Matcher`.
 | 
			
		||||
 | 
			
		||||
        RETURNS (unicode): The entity ID.
 | 
			
		||||
        """
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return self.root.ent_id_
 | 
			
		||||
| 
						 | 
				
			
			@ -376,26 +365,32 @@ cdef class Span:
 | 
			
		|||
                "tracker: http://github.com/explosion/spaCy/issues")
 | 
			
		||||
 | 
			
		||||
    property orth_:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return ''.join([t.string for t in self]).strip()
 | 
			
		||||
 | 
			
		||||
    property lemma_:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return ' '.join([t.lemma_ for t in self]).strip()
 | 
			
		||||
 | 
			
		||||
    property upper_:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return ''.join([t.string.upper() for t in self]).strip()
 | 
			
		||||
 | 
			
		||||
    property lower_:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return ''.join([t.string.lower() for t in self]).strip()
 | 
			
		||||
 | 
			
		||||
    property string:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return ''.join([t.string for t in self])
 | 
			
		||||
 | 
			
		||||
    property label_:
 | 
			
		||||
        # TODO: docstring
 | 
			
		||||
        def __get__(self):
 | 
			
		||||
            return self.doc.vocab.strings[self.label]
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user