* Add docstring to English class

2025-07-21 21:49:49 +03:00 · 2015-01-27 02:45:21 +11:00 · 2015-01-27 02:45:21 +11:00 · c38c62d4a3
commit c38c62d4a3
parent 830b9358f8
1 changed files with 38 additions and 22 deletions
--- a/spacy/en/init.py
+++ b/spacy/en/init.py
@ -33,6 +33,8 @@ def get_lex_props(string):
 LOCAL_DATA_DIR = path.join(path.dirname(__file__), 'data')
 parse_if_model_present = -1
 class English(object):
    """The English NLP pipeline.
@ -43,23 +45,10 @@ class English(object):
        data_dir (unicode): A path to a directory, from which to load the pipeline.
            If None, looks for a directory named "data/" in the same directory as
            the present file, i.e. path.join(path.dirname(__file__, 'data')).
            If path.join(data_dir, 'pos') exists, the tagger is loaded from it.
            If path.join(data_dir, 'deps') exists, the parser is loaded from it.
            See Pipeline Directory Structure for details.
-    Attributes:
+            If path.join(data_dir, 'pos') exists, the tagger is loaded from there.
        vocab (spacy.vocab.Vocab): The lexicon.
-        strings (spacy.strings.StringStore): Encode/decode strings to/from integer IDs.
+            If path.join(data_dir, 'deps') exists, the parser is loaded from there.
        tokenizer (spacy.tokenizer.Tokenizer): The start of the pipeline.
        tagger (spacy.en.pos.EnPosTagger):
            The part-of-speech tagger, which also performs lemmatization and
            morphological analysis.
        parser (spacy.syntax.parser.GreedyParser):
            A greedy shift-reduce dependency parser.
    """
    def __init__(self, data_dir=LOCAL_DATA_DIR):
        self._data_dir = data_dir
@ -99,24 +88,51 @@ class English(object):
            self._parser = GreedyParser(path.join(self._data_dir, 'deps'))
        return self._parser
-    def __call__(self, text, tag=True, parse=True):
+    def __call__(self, text, tag=True, parse=parse_if_model_present):
-        """Apply the pipeline to some text.
+        """Apply the pipeline to some text.  The text can span multiple sentences,
        and can contain arbtrary whitespace.  Alignment into the original string
        The tagger and parser are lazy-loaded the first time they are required.
        Loading the parser model usually takes 5-10 seconds.
        Args:
            text (unicode): The text to be processed.
        Keyword args:
-            tag (bool): Whether to add part-of-speech tags to the text.  This
+            tag (bool): Whether to add part-of-speech tags to the text.  Also
-                will also set morphological analysis and lemmas.
+                sets morphological analysis and lemmas.
-
+        
-            parse (bool): Whether to add dependency-heads and labels to the text.
+            parse (True, False, -1): Whether to add labelled syntactic dependencies.
              -1 (default) is "guess": It will guess True if tag=True and the
                model has been installed.
        Returns:
            tokens (spacy.tokens.Tokens):
        >>> from spacy.en import English
        >>> nlp = English()
        >>> tokens = nlp('An example sentence. Another example sentence.')
        >>> tokens[0].orth_, tokens[0].head.tag_
        ('An', 'NN')
        """
        if parse == True and tag == False:
            msg = ("Incompatible arguments: tag=False, parse=True"
                   "Part-of-speech tags are required for parsing.")
            raise ValueError(msg)
        tokens = self.tokenizer(text)
-        if tag or parse and self.has_tagger_model:
+        if parse == -1 and tag == False:
            parse = False
        elif parse == -1 and not self.has_parser_model:
            parse = False
        if tag and self.has_tagger_model:
            self.tagger(tokens)
        if parse == True and not self.has_parser_model:
            msg = ("Receive parse=True, but parser model not found.\n\n"
                  "Run:\n"
                  "$ python -m spacy.en.download\n"
                  "To install the model.")
            raise IOError(msg)
        if parse and self.has_parser_model:
            self.parser(tokens)
        return tokens