Merge pull request #7491 from adrianeboyd/bugfix/corpus-depr-props

Update deprecated doc.is_sentenced in Corpus
This commit is contained in:
Ines Montani 2021-03-21 02:17:24 +01:00 committed by GitHub
commit 66ebd5c69e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -155,7 +155,7 @@ class Corpus:
continue continue
elif self.max_length == 0 or len(reference) < self.max_length: elif self.max_length == 0 or len(reference) < self.max_length:
yield self._make_example(nlp, reference, False) yield self._make_example(nlp, reference, False)
elif reference.is_sentenced: elif reference.has_annotation("SENT_START"):
for ref_sent in reference.sents: for ref_sent in reference.sents:
if len(ref_sent) == 0: if len(ref_sent) == 0:
continue continue
@ -166,7 +166,7 @@ class Corpus:
self, nlp: "Language", reference_docs: Iterable[Doc] self, nlp: "Language", reference_docs: Iterable[Doc]
) -> Iterator[Example]: ) -> Iterator[Example]:
for reference in reference_docs: for reference in reference_docs:
if reference.is_sentenced: if reference.has_annotation("SENT_START"):
ref_sents = [sent.as_doc() for sent in reference.sents] ref_sents = [sent.as_doc() for sent in reference.sents]
else: else:
ref_sents = [reference] ref_sents = [reference]