mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
Fix Docs.from_docs for all empty docs (#8009)
This commit is contained in:
parent
debaab7021
commit
a71194362f
|
@ -411,6 +411,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
|
|||
assert "group" in m_doc.spans
|
||||
assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])
|
||||
|
||||
# can merge empty docs
|
||||
doc = Doc.from_docs([en_tokenizer("")] * 10)
|
||||
|
||||
|
||||
def test_doc_api_from_docs_ents(en_tokenizer):
|
||||
texts = ["Merging the docs is fun.", "They don't think alike."]
|
||||
|
|
|
@ -1158,6 +1158,7 @@ cdef class Doc:
|
|||
for i, array in enumerate(arrays[:-1]):
|
||||
if len(array) > 0 and not docs[i][-1].is_space:
|
||||
array[-1][spacy_index] = 1
|
||||
if len(concat_spaces) > 0:
|
||||
token_offset = -1
|
||||
for doc in docs[:-1]:
|
||||
token_offset += len(doc)
|
||||
|
|
Loading…
Reference in New Issue
Block a user