Update docstrings and remove deprecated load classmethod

2025-07-17 11:42:30 +03:00 · 2017-05-21 13:27:52 +02:00 · 2017-05-21 13:27:52 +02:00 · 885e82c9b0
commit 885e82c9b0
parent c9f04f3cd0
1 changed files with 18 additions and 73 deletions
--- a/spacy/tagger.pyx
+++ b/spacy/tagger.pyx
@ -1,7 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
 import ujson
 from collections import defaultdict
 from cymem.cymem cimport Pool
@ -15,7 +14,6 @@ from .tokens.doc cimport Doc
 from .attrs cimport TAG
 from .gold cimport GoldParse
 from .attrs cimport *
 from . import util
 cpdef enum:
@ -108,55 +106,15 @@ cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
 cdef class Tagger:
-    """
+    """Annotate part-of-speech tags on Doc objects."""
    Annotate part-of-speech tags on Doc objects.
    """
    @classmethod
    def load(cls, path, vocab, require=False):
        """
        Load the statistical model from the supplied path.
        Arguments:
            path (Path):
                The path to load from.
            vocab (Vocab):
                The vocabulary. Must be shared by the documents to be processed.
            require (bool):
                Whether to raise an error if the files are not found.
        Returns (Tagger):
            The newly created object.
        """
        # TODO: Change this to expect config.json when we don't have to
        # support old data.
        path = util.ensure_path(path)
        if (path / 'templates.json').exists():
            with (path / 'templates.json').open('r', encoding='utf8') as file_:
                templates = ujson.load(file_)
        elif require:
            raise IOError(
                "Required file %s/templates.json not found when loading Tagger" % str(path))
        else:
            templates = cls.feature_templates
        self = cls(vocab, model=None, feature_templates=templates)
        if (path / 'model').exists():
            self.model.load(str(path / 'model'))
        elif require:
            raise IOError(
                "Required file %s/model not found when loading Tagger" % str(path))
        return self
    def __init__(self, Vocab vocab, TaggerModel model=None, **cfg):
-        """
+        """Create a Tagger.
        Create a Tagger.
-        Arguments:
+        vocab (Vocab): The vocabulary object. Must be shared with documents to
-            vocab (Vocab):
+            be processed.
-                The vocabulary object. Must be shared with documents to be processed.
+        model (thinc.linear.AveragedPerceptron): The statistical model.
-            model (thinc.linear.AveragedPerceptron):
+        RETURNS (Tagger): The newly constructed object.
                The statistical model.
        Returns (Tagger):
            The newly constructed object.
        """
        if model is None:
            model = TaggerModel(cfg.get('features', self.feature_templates),
@ -186,13 +144,9 @@ cdef class Tagger:
        tokens._py_tokens = [None] * tokens.length
    def __call__(self, Doc tokens):
-        """
+        """Apply the tagger, setting the POS tags onto the Doc object.
        Apply the tagger, setting the POS tags onto the Doc object.
        Arguments:
        doc (Doc): The tokens to be tagged.
        Returns:
            None
        """
        if tokens.length == 0:
            return 0
@ -215,34 +169,25 @@ cdef class Tagger:
        tokens._py_tokens = [None] * tokens.length
    def pipe(self, stream, batch_size=1000, n_threads=2):
-        """
+        """Tag a stream of documents.
        Tag a stream of documents.
        Arguments:
        stream: The sequence of documents to tag.
-            batch_size (int):
+        batch_size (int): The number of documents to accumulate into a working set.
-                The number of documents to accumulate into a working set.
+        n_threads (int): The number of threads with which to work on the buffer
-            n_threads (int):
+            in parallel, if the Matcher implementation supports multi-threading.
-                The number of threads with which to work on the buffer in parallel,
+        YIELDS (Doc): Documents, in order.
                if the Matcher implementation supports multi-threading.
        Yields:
            Doc Documents, in order.
        """
        for doc in stream:
            self(doc)
            yield doc
    def update(self, Doc tokens, GoldParse gold, itn=0):
-        """
+        """Update the statistical model, with tags supplied for the given document.
        Update the statistical model, with tags supplied for the given document.
-        Arguments:
+        doc (Doc): The document to update on.
-            doc (Doc):
+        gold (GoldParse): Manager for the gold-standard tags.
-                The document to update on.
+        RETURNS (int): Number of tags predicted correctly.
            gold (GoldParse):
                Manager for the gold-standard tags.
        Returns (int):
            Number of tags correct.
        """
        gold_tag_strs = gold.tags
        assert len(tokens) == len(gold_tag_strs)