Auto-format and update URL

2025-07-03 11:23:12 +03:00 · 2020-07-04 14:23:44 +02:00 · 2020-07-04 14:23:44 +02:00 · abd173937f
commit abd173937f
parent 99aff16d60
1 changed files with 8 additions and 11 deletions
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@ -8,7 +8,7 @@ class Corpus:
    """An annotated corpus, reading train and dev datasets from
    the DocBin (.spacy) format.
-    DOCS: https://spacy.io/api/goldcorpus
+    DOCS: https://spacy.io/api/corpus
    """
    def __init__(self, train_loc, dev_loc, limit=0):
@ -49,15 +49,12 @@ class Corpus:
                Doc(
                    nlp.vocab,
                    words=[word.text for word in reference],
-                    spaces=[bool(word.whitespace_) for word in reference]
+                    spaces=[bool(word.whitespace_) for word in reference],
                ),
-                reference
+                reference,
            )
        else:
-            return Example(
+            return Example(nlp.make_doc(reference.text), reference)
                nlp.make_doc(reference.text),
                reference
            )
    def make_examples(self, nlp, reference_docs, max_length=0):
        for reference in reference_docs:
@ -72,7 +69,6 @@ class Corpus:
                    elif max_length == 0 or len(ref_sent) < max_length:
                        yield self._make_example(nlp, ref_sent.as_doc(), False)
    def make_examples_gold_preproc(self, nlp, reference_docs):
        for reference in reference_docs:
            if reference.is_sentenced:
@ -111,8 +107,9 @@ class Corpus:
            i += 1
        return n
-    def train_dataset(self, nlp, *, shuffle=True, gold_preproc=False,
+    def train_dataset(
-            max_length=0, **kwargs):
+        self, nlp, *, shuffle=True, gold_preproc=False, max_length=0, **kwargs
    ):
        ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc))
        if gold_preproc:
            examples = self.make_examples_gold_preproc(nlp, ref_docs)