mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Auto-format and update URL
This commit is contained in:
parent
99aff16d60
commit
abd173937f
|
@ -8,7 +8,7 @@ class Corpus:
|
||||||
"""An annotated corpus, reading train and dev datasets from
|
"""An annotated corpus, reading train and dev datasets from
|
||||||
the DocBin (.spacy) format.
|
the DocBin (.spacy) format.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/goldcorpus
|
DOCS: https://spacy.io/api/corpus
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, train_loc, dev_loc, limit=0):
|
def __init__(self, train_loc, dev_loc, limit=0):
|
||||||
|
@ -49,15 +49,12 @@ class Corpus:
|
||||||
Doc(
|
Doc(
|
||||||
nlp.vocab,
|
nlp.vocab,
|
||||||
words=[word.text for word in reference],
|
words=[word.text for word in reference],
|
||||||
spaces=[bool(word.whitespace_) for word in reference]
|
spaces=[bool(word.whitespace_) for word in reference],
|
||||||
),
|
),
|
||||||
reference
|
reference,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return Example(
|
return Example(nlp.make_doc(reference.text), reference)
|
||||||
nlp.make_doc(reference.text),
|
|
||||||
reference
|
|
||||||
)
|
|
||||||
|
|
||||||
def make_examples(self, nlp, reference_docs, max_length=0):
|
def make_examples(self, nlp, reference_docs, max_length=0):
|
||||||
for reference in reference_docs:
|
for reference in reference_docs:
|
||||||
|
@ -72,7 +69,6 @@ class Corpus:
|
||||||
elif max_length == 0 or len(ref_sent) < max_length:
|
elif max_length == 0 or len(ref_sent) < max_length:
|
||||||
yield self._make_example(nlp, ref_sent.as_doc(), False)
|
yield self._make_example(nlp, ref_sent.as_doc(), False)
|
||||||
|
|
||||||
|
|
||||||
def make_examples_gold_preproc(self, nlp, reference_docs):
|
def make_examples_gold_preproc(self, nlp, reference_docs):
|
||||||
for reference in reference_docs:
|
for reference in reference_docs:
|
||||||
if reference.is_sentenced:
|
if reference.is_sentenced:
|
||||||
|
@ -111,8 +107,9 @@ class Corpus:
|
||||||
i += 1
|
i += 1
|
||||||
return n
|
return n
|
||||||
|
|
||||||
def train_dataset(self, nlp, *, shuffle=True, gold_preproc=False,
|
def train_dataset(
|
||||||
max_length=0, **kwargs):
|
self, nlp, *, shuffle=True, gold_preproc=False, max_length=0, **kwargs
|
||||||
|
):
|
||||||
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc))
|
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc))
|
||||||
if gold_preproc:
|
if gold_preproc:
|
||||||
examples = self.make_examples_gold_preproc(nlp, ref_docs)
|
examples = self.make_examples_gold_preproc(nlp, ref_docs)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user