Fix Docs.from_docs for all empty docs (#8009)

2025-12-23 10:03:15 +03:00 · 2021-05-05 18:44:14 +02:00 · 2021-05-05 18:44:14 +02:00 · a71194362f
commit a71194362f
parent debaab7021
2 changed files with 9 additions and 5 deletions
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -411,6 +411,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
    assert "group" in m_doc.spans
    assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])
    # can merge empty docs
    doc = Doc.from_docs([en_tokenizer("")] * 10)
 def test_doc_api_from_docs_ents(en_tokenizer):
    texts = ["Merging the docs is fun.", "They don't think alike."]
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -1158,11 +1158,12 @@ cdef class Doc:
            for i, array in enumerate(arrays[:-1]):
                if len(array) > 0 and not docs[i][-1].is_space:
                    array[-1][spacy_index] = 1
-            token_offset = -1
+            if len(concat_spaces) > 0:
-            for doc in docs[:-1]:
+                token_offset = -1
-                token_offset += len(doc)
+                for doc in docs[:-1]:
-                if not (len(doc) > 0 and doc[-1].is_space):
+                    token_offset += len(doc)
-                    concat_spaces[token_offset] = True
+                    if not (len(doc) > 0 and doc[-1].is_space):
                        concat_spaces[token_offset] = True
        concat_array = numpy.concatenate(arrays)