Auto-format and update URL

This commit is contained in:
Ines Montani 2020-07-04 14:23:44 +02:00
parent 99aff16d60
commit abd173937f

View File

@ -8,7 +8,7 @@ class Corpus:
"""An annotated corpus, reading train and dev datasets from """An annotated corpus, reading train and dev datasets from
the DocBin (.spacy) format. the DocBin (.spacy) format.
DOCS: https://spacy.io/api/goldcorpus DOCS: https://spacy.io/api/corpus
""" """
def __init__(self, train_loc, dev_loc, limit=0): def __init__(self, train_loc, dev_loc, limit=0):
@ -49,15 +49,12 @@ class Corpus:
Doc( Doc(
nlp.vocab, nlp.vocab,
words=[word.text for word in reference], words=[word.text for word in reference],
spaces=[bool(word.whitespace_) for word in reference] spaces=[bool(word.whitespace_) for word in reference],
), ),
reference reference,
) )
else: else:
return Example( return Example(nlp.make_doc(reference.text), reference)
nlp.make_doc(reference.text),
reference
)
def make_examples(self, nlp, reference_docs, max_length=0): def make_examples(self, nlp, reference_docs, max_length=0):
for reference in reference_docs: for reference in reference_docs:
@ -72,7 +69,6 @@ class Corpus:
elif max_length == 0 or len(ref_sent) < max_length: elif max_length == 0 or len(ref_sent) < max_length:
yield self._make_example(nlp, ref_sent.as_doc(), False) yield self._make_example(nlp, ref_sent.as_doc(), False)
def make_examples_gold_preproc(self, nlp, reference_docs): def make_examples_gold_preproc(self, nlp, reference_docs):
for reference in reference_docs: for reference in reference_docs:
if reference.is_sentenced: if reference.is_sentenced:
@ -111,8 +107,9 @@ class Corpus:
i += 1 i += 1
return n return n
def train_dataset(self, nlp, *, shuffle=True, gold_preproc=False, def train_dataset(
max_length=0, **kwargs): self, nlp, *, shuffle=True, gold_preproc=False, max_length=0, **kwargs
):
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc)) ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc))
if gold_preproc: if gold_preproc:
examples = self.make_examples_gold_preproc(nlp, ref_docs) examples = self.make_examples_gold_preproc(nlp, ref_docs)