mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Raise if empty examples (#6052)
* raise error if no valid Example objects were found during initialization * fix max_length parameter * remove commit from other branch Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com>
This commit is contained in:
parent
24e138b8ac
commit
e92e850c72
|
@ -1166,14 +1166,20 @@ class Language:
|
|||
if not hasattr(get_examples, "__call__"):
|
||||
err = Errors.E930.format(name="Language", obj=type(get_examples))
|
||||
raise ValueError(err)
|
||||
valid_examples = False
|
||||
for example in get_examples():
|
||||
if not isinstance(example, Example):
|
||||
err = Errors.E978.format(
|
||||
name="Language.begin_training", types=type(example)
|
||||
)
|
||||
raise ValueError(err)
|
||||
else:
|
||||
valid_examples = True
|
||||
for word in [t.text for t in example.reference]:
|
||||
_ = self.vocab[word] # noqa: F841
|
||||
if not valid_examples:
|
||||
err = Errors.E930.format(name="Language", obj="empty list")
|
||||
raise ValueError(err)
|
||||
if device >= 0: # TODO: do we need this here?
|
||||
require_gpu(device)
|
||||
if self.vocab.vectors.data.shape[1] >= 1:
|
||||
|
|
|
@ -47,7 +47,7 @@ class Corpus:
|
|||
*,
|
||||
limit: int = 0,
|
||||
gold_preproc: bool = False,
|
||||
max_length: bool = False,
|
||||
max_length: int = 0,
|
||||
) -> None:
|
||||
self.path = util.ensure_path(path)
|
||||
self.gold_preproc = gold_preproc
|
||||
|
@ -89,7 +89,7 @@ class Corpus:
|
|||
if self.gold_preproc:
|
||||
examples = self.make_examples_gold_preproc(nlp, ref_docs)
|
||||
else:
|
||||
examples = self.make_examples(nlp, ref_docs, self.max_length)
|
||||
examples = self.make_examples(nlp, ref_docs)
|
||||
yield from examples
|
||||
|
||||
def _make_example(
|
||||
|
@ -108,18 +108,18 @@ class Corpus:
|
|||
return Example(nlp.make_doc(reference.text), reference)
|
||||
|
||||
def make_examples(
|
||||
self, nlp: "Language", reference_docs: Iterable[Doc], max_length: int = 0
|
||||
self, nlp: "Language", reference_docs: Iterable[Doc]
|
||||
) -> Iterator[Example]:
|
||||
for reference in reference_docs:
|
||||
if len(reference) == 0:
|
||||
continue
|
||||
elif max_length == 0 or len(reference) < max_length:
|
||||
elif self.max_length == 0 or len(reference) < self.max_length:
|
||||
yield self._make_example(nlp, reference, False)
|
||||
elif reference.is_sentenced:
|
||||
for ref_sent in reference.sents:
|
||||
if len(ref_sent) == 0:
|
||||
continue
|
||||
elif max_length == 0 or len(ref_sent) < max_length:
|
||||
elif self.max_length == 0 or len(ref_sent) < self.max_length:
|
||||
yield self._make_example(nlp, ref_sent.as_doc(), False)
|
||||
|
||||
def make_examples_gold_preproc(
|
||||
|
|
Loading…
Reference in New Issue
Block a user