Update docstrings for Span class

2025-07-18 20:22:25 +03:00 · 2017-05-18 22:17:24 +02:00 · 2017-05-18 22:17:24 +02:00 · 593361ee3c
commit 593361ee3c
parent b87066ff10
1 changed files with 79 additions and 84 deletions
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -20,22 +20,17 @@ from .. import about
 cdef class Span:
-    """
+    """A slice from a Doc object."""
    A slice from a Doc object.
    """
    def __cinit__(self, Doc doc, int start, int end, int label=0, vector=None,
                  vector_norm=None):
-        """
+        """Create a `Span` object from the slice `doc[start : end]`.
        Create a Span object from the slice doc[start : end]
-        Arguments:
+        doc (Doc): The parent document.
-            doc (Doc): The parent document.
+        start (int): The index of the first token of the span.
-            start (int): The index of the first token of the span.
+        end (int): The index of the first token after the span.
-            end (int): The index of the first token after the span.
+        label (int): A label to attach to the Span, e.g. for named entities.
-            label (int): A label to attach to the Span, e.g. for named entities.
+        vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
-            vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
+        RETURNS (Span): The newly constructed object.
        Returns:
            Span The newly constructed object.
        """
        if not (0 <= start <= end <= len(doc)):
            raise IndexError
@ -70,7 +65,6 @@ cdef class Span:
    def __hash__(self):
        return hash((self.doc, self.label, self.start_char, self.end_char))
    def __len__(self):
        self._recalculate_indices()
        if self.end < self.start:
@ -99,30 +93,21 @@ cdef class Span:
            yield self.doc[i]
    def merge(self, *args, **attributes):
-        """
+        """Retokenize the document, such that the span is merged into a single token.
        Retokenize the document, such that the span is merged into a single token.
-        Arguments:
+        **attributes: Attributes to assign to the merged token. By default,
-            **attributes:
+            attributes are inherited from the syntactic root token of the span.
-                Attributes to assign to the merged token. By default, attributes
+        RETURNS (Token): The newly merged token.
                are inherited from the syntactic root token of the span.
        Returns:
            token (Token):
                The newly merged token.
        """
        return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
    def similarity(self, other):
-        """
+        """ Make a semantic similarity estimate. The default estimate is cosine
        Make a semantic similarity estimate. The default estimate is cosine
        similarity using an average of word vectors.
-        Arguments:
+        other (object): The object to compare with. By default, accepts `Doc`,
-            other (object): The object to compare with. By default, accepts Doc,
+            `Span`, `Token` and `Lexeme` objects.
-                Span, Token and Lexeme objects.
+        RETURNS (float): A scalar similarity score. Higher is more similar.
        Return:
            score (float): A scalar similarity score. Higher is more similar.
        """
        if 'similarity' in self.doc.user_span_hooks:
            self.doc.user_span_hooks['similarity'](self, other)
@ -145,11 +130,9 @@ cdef class Span:
            self.end = end + 1
    property sent:
-        """
+        """The sentence span that this span is a part of.
        The sentence span that this span is a part of.
-        Returns:
+        RETURNS (Span): The sentence span that the span is a part of.
            Span The sentence this is part of.
        """
        def __get__(self):
            if 'sent' in self.doc.user_span_hooks:
@ -166,12 +149,14 @@ cdef class Span:
            return self.doc[root.l_edge : root.r_edge + 1]
    property has_vector:
        # TODO: docstring
        def __get__(self):
            if 'has_vector' in self.doc.user_span_hooks:
                return self.doc.user_span_hooks['has_vector'](self)
            return any(token.has_vector for token in self)
    property vector:
        # TODO: docstring
        def __get__(self):
            if 'vector' in self.doc.user_span_hooks:
                return self.doc.user_span_hooks['vector'](self)
@ -180,6 +165,7 @@ cdef class Span:
            return self._vector
    property vector_norm:
        # TODO: docstring
        def __get__(self):
            if 'vector_norm' in self.doc.user_span_hooks:
                return self.doc.user_span_hooks['vector'](self)
@ -193,6 +179,7 @@ cdef class Span:
            return self._vector_norm
    property sentiment:
        # TODO: docstring
        def __get__(self):
            if 'sentiment' in self.doc.user_span_hooks:
                return self.doc.user_span_hooks['sentiment'](self)
@ -200,6 +187,7 @@ cdef class Span:
                return sum([token.sentiment for token in self]) / len(self)
    property text:
        # TODO: docstring
        def __get__(self):
            text = self.text_with_ws
            if self[-1].whitespace_:
@ -207,16 +195,17 @@ cdef class Span:
            return text
    property text_with_ws:
        # TODO: docstring
        def __get__(self):
            return u''.join([t.text_with_ws for t in self])
    property noun_chunks:
-        """
+        """Yields base noun-phrase `Span` objects, if the document has been
-        Yields base noun-phrase #[code Span] objects, if the document
+        syntactically parsed. A base noun phrase, or "NP chunk", is a noun
-        has been syntactically parsed. A base noun phrase, or
+        phrase that does not permit other NPs to be nested within it – so no
-        'NP chunk', is a noun phrase that does not permit other NPs to
+        NP-level coordination, no prepositional phrases, and no relative clauses.
-        be nested within it – so no NP-level coordination, no prepositional
+
-        phrases, and no relative clauses. For example:
+        YIELDS (Span): Base noun-phrase `Span` objects
        """
        def __get__(self):
            if not self.doc.is_parsed:
@ -235,49 +224,47 @@ cdef class Span:
                yield span
    property root:
-        """
+        """The token within the span that's highest in the parse tree.
-        The token within the span that's highest in the parse tree. If there's a
+        If there's a tie, the earliest is prefered.
        tie, the earlist is prefered.
-        Returns:
+        RETURNS (Token): The root token.
            Token: The root token.
-        i.e. has the shortest path to the root of the sentence (or is the root
+        EXAMPLE: The root token has the shortest path to the root of the sentence
-        itself). If multiple words are equally high in the tree, the first word
+            (or is the root itself). If multiple words are equally high in the
-        is taken. For example:
+            tree, the first word is taken. For example:
-        >>> toks = nlp(u'I like New York in Autumn.')
+            >>> toks = nlp(u'I like New York in Autumn.')
-        Let's name the indices --- easier than writing "toks[4]" etc.
+            Let's name the indices – easier than writing `toks[4]` etc.
-        >>> i, like, new, york, in_, autumn, dot = range(len(toks))
+            >>> i, like, new, york, in_, autumn, dot = range(len(toks))
-        The head of 'new' is 'York', and the head of 'York' is 'like'
+            The head of 'new' is 'York', and the head of "York" is "like"
-        >>> toks[new].head.orth_
+            >>> toks[new].head.orth_
-        'York'
+            'York'
-        >>> toks[york].head.orth_
+            >>> toks[york].head.orth_
-        'like'
+            'like'
-        Create a span for "New York". Its root is "York".
+            Create a span for "New York". Its root is "York".
-        >>> new_york = toks[new:york+1]
+            >>> new_york = toks[new:york+1]
-        >>> new_york.root.orth_
+            >>> new_york.root.orth_
-        'York'
+            'York'
-        Here's a more complicated case, raise by Issue #214
+            Here's a more complicated case, raised by issue #214:
-        >>> toks = nlp(u'to, north and south carolina')
+            >>> toks = nlp(u'to, north and south carolina')
-        >>> to, north, and_, south, carolina = toks
+            >>> to, north, and_, south, carolina = toks
-        >>> south.head.text, carolina.head.text
+            >>> south.head.text, carolina.head.text
-        ('north', 'to')
+            ('north', 'to')
-        Here 'south' is a child of 'north', which is a child of 'carolina'.
+            Here "south" is a child of "north", which is a child of "carolina".
-        Carolina is the root of the span:
+            Carolina is the root of the span:
-        >>> south_carolina = toks[-2:]
+            >>> south_carolina = toks[-2:]
-        >>> south_carolina.root.text
+            >>> south_carolina.root.text
-        'carolina'
+            'carolina'
        """
        def __get__(self):
            self._recalculate_indices()
@ -314,10 +301,10 @@ cdef class Span:
                return self.doc[root]
    property lefts:
-        """
+        """ Tokens that are to the left of the span, whose head is within the
-        Tokens that are to the left of the span, whose head is within the Span.
+        `Span`.
-        Yields: Token A left-child of a token of the span.
+        YIELDS (Token):A left-child of a token of the span.
        """
        def __get__(self):
            for token in reversed(self): # Reverse, so we get the tokens in order
@ -326,10 +313,10 @@ cdef class Span:
                        yield left
    property rights:
-        """
+        """Tokens that are to the right of the Span, whose head is within the
-        Tokens that are to the right of the Span, whose head is within the Span.
+        `Span`.
-        Yields: Token A right-child of a token of the span.
+        YIELDS (Token): A right-child of a token of the span.
        """
        def __get__(self):
            for token in self:
@ -338,10 +325,9 @@ cdef class Span:
                        yield right
    property subtree:
-        """
+        """Tokens that descend from tokens in the span, but fall outside it.
        Tokens that descend from tokens in the span, but fall outside it.
-        Yields: Token A descendant of a token within the span.
+        YIELDS (Token): A descendant of a token within the span.
        """
        def __get__(self):
            for word in self.lefts:
@ -351,8 +337,9 @@ cdef class Span:
                yield from word.subtree
    property ent_id:
-        """
+        """An (integer) entity ID. Usually assigned by patterns in the `Matcher`.
-        An (integer) entity ID. Usually assigned by patterns in the Matcher.
+
        RETURNS (int): The entity ID.
        """
        def __get__(self):
            return self.root.ent_id
@ -362,9 +349,11 @@ cdef class Span:
            raise NotImplementedError(
                "Can't yet set ent_id from Span. Vote for this feature on the issue "
                "tracker: http://github.com/explosion/spaCy/issues")
    property ent_id_:
-        """
+        """A (string) entity ID. Usually assigned by patterns in the `Matcher`.
-        A (string) entity ID. Usually assigned by patterns in the Matcher.
+
        RETURNS (unicode): The entity ID.
        """
        def __get__(self):
            return self.root.ent_id_
@ -376,26 +365,32 @@ cdef class Span:
                "tracker: http://github.com/explosion/spaCy/issues")
    property orth_:
        # TODO: docstring
        def __get__(self):
            return ''.join([t.string for t in self]).strip()
    property lemma_:
        # TODO: docstring
        def __get__(self):
            return ' '.join([t.lemma_ for t in self]).strip()
    property upper_:
        # TODO: docstring
        def __get__(self):
            return ''.join([t.string.upper() for t in self]).strip()
    property lower_:
        # TODO: docstring
        def __get__(self):
            return ''.join([t.string.lower() for t in self]).strip()
    property string:
        # TODO: docstring
        def __get__(self):
            return ''.join([t.string for t in self])
    property label_:
        # TODO: docstring
        def __get__(self):
            return self.doc.vocab.strings[self.label]