mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
Filter out 0-length examples in Corpus
This commit is contained in:
parent
52338a07bb
commit
f5532757a3
|
@ -48,15 +48,19 @@ class Corpus:
|
||||||
if len(reference) >= max_length >= 1:
|
if len(reference) >= max_length >= 1:
|
||||||
if reference.is_sentenced:
|
if reference.is_sentenced:
|
||||||
for ref_sent in reference.sents:
|
for ref_sent in reference.sents:
|
||||||
yield Example(
|
eg = Example(
|
||||||
nlp.make_doc(ref_sent.text),
|
nlp.make_doc(ref_sent.text),
|
||||||
ref_sent.as_doc()
|
ref_sent.as_doc()
|
||||||
)
|
)
|
||||||
|
if len(eg.x):
|
||||||
|
yield eg
|
||||||
else:
|
else:
|
||||||
yield Example(
|
eg = Example(
|
||||||
nlp.make_doc(reference.text),
|
nlp.make_doc(reference.text),
|
||||||
reference
|
reference
|
||||||
)
|
)
|
||||||
|
if len(eg.x):
|
||||||
|
yield eg
|
||||||
|
|
||||||
def make_examples_gold_preproc(self, nlp, reference_docs):
|
def make_examples_gold_preproc(self, nlp, reference_docs):
|
||||||
for reference in reference_docs:
|
for reference in reference_docs:
|
||||||
|
@ -65,7 +69,7 @@ class Corpus:
|
||||||
else:
|
else:
|
||||||
ref_sents = [reference]
|
ref_sents = [reference]
|
||||||
for ref_sent in ref_sents:
|
for ref_sent in ref_sents:
|
||||||
yield Example(
|
eg = Example(
|
||||||
Doc(
|
Doc(
|
||||||
nlp.vocab,
|
nlp.vocab,
|
||||||
words=[w.text for w in ref_sent],
|
words=[w.text for w in ref_sent],
|
||||||
|
@ -73,6 +77,8 @@ class Corpus:
|
||||||
),
|
),
|
||||||
ref_sent
|
ref_sent
|
||||||
)
|
)
|
||||||
|
if len(eg.x):
|
||||||
|
yield eg
|
||||||
|
|
||||||
def read_docbin(self, vocab, locs):
|
def read_docbin(self, vocab, locs):
|
||||||
""" Yield training examples as example dicts """
|
""" Yield training examples as example dicts """
|
||||||
|
|
Loading…
Reference in New Issue
Block a user