mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Update docstrings and remove deprecated load classmethod
This commit is contained in:
		
							parent
							
								
									c9f04f3cd0
								
							
						
					
					
						commit
						885e82c9b0
					
				| 
						 | 
				
			
			@ -1,7 +1,6 @@
 | 
			
		|||
# coding: utf8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import ujson
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
 | 
			
		||||
from cymem.cymem cimport Pool
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +14,6 @@ from .tokens.doc cimport Doc
 | 
			
		|||
from .attrs cimport TAG
 | 
			
		||||
from .gold cimport GoldParse
 | 
			
		||||
from .attrs cimport *
 | 
			
		||||
from . import util
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cpdef enum:
 | 
			
		||||
| 
						 | 
				
			
			@ -108,55 +106,15 @@ cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
cdef class Tagger:
 | 
			
		||||
    """
 | 
			
		||||
    Annotate part-of-speech tags on Doc objects.
 | 
			
		||||
    """
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def load(cls, path, vocab, require=False):
 | 
			
		||||
        """
 | 
			
		||||
        Load the statistical model from the supplied path.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            path (Path):
 | 
			
		||||
                The path to load from.
 | 
			
		||||
            vocab (Vocab):
 | 
			
		||||
                The vocabulary. Must be shared by the documents to be processed.
 | 
			
		||||
            require (bool):
 | 
			
		||||
                Whether to raise an error if the files are not found.
 | 
			
		||||
        Returns (Tagger):
 | 
			
		||||
            The newly created object.
 | 
			
		||||
        """
 | 
			
		||||
        # TODO: Change this to expect config.json when we don't have to
 | 
			
		||||
        # support old data.
 | 
			
		||||
        path = util.ensure_path(path)
 | 
			
		||||
        if (path / 'templates.json').exists():
 | 
			
		||||
            with (path / 'templates.json').open('r', encoding='utf8') as file_:
 | 
			
		||||
                templates = ujson.load(file_)
 | 
			
		||||
        elif require:
 | 
			
		||||
            raise IOError(
 | 
			
		||||
                "Required file %s/templates.json not found when loading Tagger" % str(path))
 | 
			
		||||
        else:
 | 
			
		||||
            templates = cls.feature_templates
 | 
			
		||||
        self = cls(vocab, model=None, feature_templates=templates)
 | 
			
		||||
 | 
			
		||||
        if (path / 'model').exists():
 | 
			
		||||
            self.model.load(str(path / 'model'))
 | 
			
		||||
        elif require:
 | 
			
		||||
            raise IOError(
 | 
			
		||||
                "Required file %s/model not found when loading Tagger" % str(path))
 | 
			
		||||
        return self
 | 
			
		||||
    """Annotate part-of-speech tags on Doc objects."""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, Vocab vocab, TaggerModel model=None, **cfg):
 | 
			
		||||
        """
 | 
			
		||||
        Create a Tagger.
 | 
			
		||||
        """Create a Tagger.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            vocab (Vocab):
 | 
			
		||||
                The vocabulary object. Must be shared with documents to be processed.
 | 
			
		||||
            model (thinc.linear.AveragedPerceptron):
 | 
			
		||||
                The statistical model.
 | 
			
		||||
        Returns (Tagger):
 | 
			
		||||
            The newly constructed object.
 | 
			
		||||
        vocab (Vocab): The vocabulary object. Must be shared with documents to
 | 
			
		||||
            be processed.
 | 
			
		||||
        model (thinc.linear.AveragedPerceptron): The statistical model.
 | 
			
		||||
        RETURNS (Tagger): The newly constructed object.
 | 
			
		||||
        """
 | 
			
		||||
        if model is None:
 | 
			
		||||
            model = TaggerModel(cfg.get('features', self.feature_templates),
 | 
			
		||||
| 
						 | 
				
			
			@ -186,13 +144,9 @@ cdef class Tagger:
 | 
			
		|||
        tokens._py_tokens = [None] * tokens.length
 | 
			
		||||
 | 
			
		||||
    def __call__(self, Doc tokens):
 | 
			
		||||
        """
 | 
			
		||||
        Apply the tagger, setting the POS tags onto the Doc object.
 | 
			
		||||
        """Apply the tagger, setting the POS tags onto the Doc object.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
        doc (Doc): The tokens to be tagged.
 | 
			
		||||
        Returns:
 | 
			
		||||
            None
 | 
			
		||||
        """
 | 
			
		||||
        if tokens.length == 0:
 | 
			
		||||
            return 0
 | 
			
		||||
| 
						 | 
				
			
			@ -215,34 +169,25 @@ cdef class Tagger:
 | 
			
		|||
        tokens._py_tokens = [None] * tokens.length
 | 
			
		||||
 | 
			
		||||
    def pipe(self, stream, batch_size=1000, n_threads=2):
 | 
			
		||||
        """
 | 
			
		||||
        Tag a stream of documents.
 | 
			
		||||
        """Tag a stream of documents.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
        stream: The sequence of documents to tag.
 | 
			
		||||
            batch_size (int):
 | 
			
		||||
                The number of documents to accumulate into a working set.
 | 
			
		||||
            n_threads (int):
 | 
			
		||||
                The number of threads with which to work on the buffer in parallel,
 | 
			
		||||
                if the Matcher implementation supports multi-threading.
 | 
			
		||||
        Yields:
 | 
			
		||||
            Doc Documents, in order.
 | 
			
		||||
        batch_size (int): The number of documents to accumulate into a working set.
 | 
			
		||||
        n_threads (int): The number of threads with which to work on the buffer
 | 
			
		||||
            in parallel, if the Matcher implementation supports multi-threading.
 | 
			
		||||
        YIELDS (Doc): Documents, in order.
 | 
			
		||||
        """
 | 
			
		||||
        for doc in stream:
 | 
			
		||||
            self(doc)
 | 
			
		||||
            yield doc
 | 
			
		||||
 | 
			
		||||
    def update(self, Doc tokens, GoldParse gold, itn=0):
 | 
			
		||||
        """
 | 
			
		||||
        Update the statistical model, with tags supplied for the given document.
 | 
			
		||||
        """Update the statistical model, with tags supplied for the given document.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            doc (Doc):
 | 
			
		||||
                The document to update on.
 | 
			
		||||
            gold (GoldParse):
 | 
			
		||||
                Manager for the gold-standard tags.
 | 
			
		||||
        Returns (int):
 | 
			
		||||
            Number of tags correct.
 | 
			
		||||
        doc (Doc): The document to update on.
 | 
			
		||||
        gold (GoldParse): Manager for the gold-standard tags.
 | 
			
		||||
        RETURNS (int): Number of tags predicted correctly.
 | 
			
		||||
        """
 | 
			
		||||
        gold_tag_strs = gold.tags
 | 
			
		||||
        assert len(tokens) == len(gold_tag_strs)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user