mirror of
https://github.com/django-import-export/django-import-export.git
synced 2025-12-13 12:43:59 +03:00
Skip empty rows in XLSX (#2028)
* Prevent empty lines in XLSX * Test for create_dataset empty rows * Update AUTHORS * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix * Add IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES flag * Add docs * Update tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated docs * updated changelog * Update changelog.rst * performance improvement --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: matthewhegarty <mrhegarty@gmail.com> Co-authored-by: Matt Hegarty <matthewhegarty@users.noreply.github.com>
This commit is contained in:
parent
ffe94d16d4
commit
8a6586090b
1
AUTHORS
1
AUTHORS
|
|
@ -158,3 +158,4 @@ The following is a list of much appreciated contributors:
|
|||
* 19greg96 (Gergely Karz)
|
||||
* AyushDharDubey
|
||||
* dahvo (David Mark Awad)
|
||||
* jurrian
|
||||
|
|
|
|||
|
|
@ -5,6 +5,11 @@ Changelog
|
|||
|
||||
If upgrading from v3, v4 introduces breaking changes. Please refer to :doc:`release notes<release_notes>`.
|
||||
|
||||
4.3.6 (unreleased)
|
||||
------------------
|
||||
|
||||
- Add flag to ignore empty rows in XLSX import (`2028 <https://github.com/django-import-export/django-import-export/issues/2028>`_)
|
||||
|
||||
4.3.5 (2025-02-01)
|
||||
------------------
|
||||
|
||||
|
|
|
|||
13
docs/faq.rst
13
docs/faq.rst
|
|
@ -184,12 +184,21 @@ How to create relation during import if it does not exist
|
|||
|
||||
See :ref:`creating-non-existent-relations`.
|
||||
|
||||
How to handle large file uploads
|
||||
---------------------------------
|
||||
How to handle large file imports
|
||||
--------------------------------
|
||||
|
||||
If uploading large files, you may encounter time-outs.
|
||||
See :ref:`Using celery<celery>` and :ref:`bulk_import:Bulk imports`.
|
||||
|
||||
Performance issues or unexpected behavior during import
|
||||
-------------------------------------------------------
|
||||
|
||||
This could be due to hidden rows in Excel files.
|
||||
Hidden rows can be excluded using :ref:`import_export_import_ignore_blank_lines`.
|
||||
|
||||
Refer to `this PR <https://github.com/django-import-export/django-import-export/pull/2028>`_ for more information.
|
||||
|
||||
|
||||
How to use field other than `id` in Foreign Key lookup
|
||||
------------------------------------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -254,6 +254,16 @@ The values must be those provided in ``import_export.formats.base_formats`` e.g
|
|||
|
||||
This can be set for a specific model admin by declaring the ``export_formats`` attribute.
|
||||
|
||||
.. _import_export_import_ignore_blank_lines:
|
||||
|
||||
``IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES``
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If set to ``True``, rows without content will be ignored in XSLX imports.
|
||||
This prevents an old Excel 1.0 bug which causes openpyxl ``max_rows`` to be counting all
|
||||
logical empty rows. Some editors (like LibreOffice) might add :math:`2^{20}` empty rows to the
|
||||
file, which causes a significant slowdown. By default this is ``False``.
|
||||
|
||||
.. _exampleapp:
|
||||
|
||||
Example app
|
||||
|
|
|
|||
|
|
@ -208,9 +208,18 @@ class XLSX(TablibFormat):
|
|||
rows = sheet.rows
|
||||
dataset.headers = [cell.value for cell in next(rows)]
|
||||
|
||||
ignore_blanks = getattr(
|
||||
settings, "IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES", False
|
||||
)
|
||||
for row in rows:
|
||||
row_values = [cell.value for cell in row]
|
||||
dataset.append(row_values)
|
||||
|
||||
if ignore_blanks:
|
||||
# do not add empty rows to dataset
|
||||
if not all(value is None for value in row_values):
|
||||
dataset.append(row_values)
|
||||
else:
|
||||
dataset.append(row_values)
|
||||
return dataset
|
||||
|
||||
def export_data(self, dataset, **kwargs):
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
import os
|
||||
import unittest
|
||||
from io import BytesIO
|
||||
from unittest import mock
|
||||
|
||||
import openpyxl
|
||||
import tablib
|
||||
from core.tests.utils import ignore_utcnow_deprecation_warning
|
||||
from django.test import TestCase
|
||||
from django.test import TestCase, override_settings
|
||||
from django.utils.encoding import force_str
|
||||
from tablib.core import UnsupportedFormat
|
||||
|
||||
|
|
@ -115,6 +117,62 @@ class XLSXTest(TestCase):
|
|||
unittest.mock.ANY, read_only=True, data_only=True
|
||||
)
|
||||
|
||||
@override_settings(IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES=False)
|
||||
def test_xlsx_create_dataset__empty_rows(self):
|
||||
"""Default situation without the flag: do not ignore the empty rows for
|
||||
backwards compatibility.
|
||||
"""
|
||||
rows_before = 3
|
||||
empty_rows = 5
|
||||
rows_after = 2
|
||||
|
||||
wb = openpyxl.Workbook()
|
||||
ws = wb.active
|
||||
ws.append(["Header1", "Header2", "Header3"])
|
||||
|
||||
for _ in range(rows_before):
|
||||
ws.append(["Data1", "Data2", "Data3"])
|
||||
|
||||
for _ in range(empty_rows):
|
||||
ws.append([None, None, None])
|
||||
|
||||
for _ in range(rows_after):
|
||||
ws.append(["Data1", "Data2", "Data3"])
|
||||
|
||||
xlsx_data = BytesIO()
|
||||
wb.save(xlsx_data)
|
||||
xlsx_data.seek(0)
|
||||
|
||||
dataset = self.format.create_dataset(xlsx_data.getvalue())
|
||||
assert len(dataset) == rows_before + empty_rows + rows_after # With empty rows
|
||||
|
||||
@override_settings(IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES=True)
|
||||
def test_xlsx_create_dataset__ignore_empty_rows(self):
|
||||
"""Ensure that empty rows are not added to the dataset."""
|
||||
rows_before = 3
|
||||
empty_rows = 5
|
||||
rows_after = 2
|
||||
|
||||
wb = openpyxl.Workbook()
|
||||
ws = wb.active
|
||||
ws.append(["Header1", "Header2", "Header3"])
|
||||
|
||||
for _ in range(rows_before):
|
||||
ws.append(["Data1", "Data2", "Data3"])
|
||||
|
||||
for _ in range(empty_rows):
|
||||
ws.append([None, None, None])
|
||||
|
||||
for _ in range(rows_after):
|
||||
ws.append(["Data1", "Data2", "Data3"])
|
||||
|
||||
xlsx_data = BytesIO()
|
||||
wb.save(xlsx_data)
|
||||
xlsx_data.seek(0)
|
||||
|
||||
dataset = self.format.create_dataset(xlsx_data.getvalue())
|
||||
assert len(dataset) == rows_before + rows_after # Without empty rows
|
||||
|
||||
|
||||
class CSVTest(TestCase):
|
||||
def setUp(self):
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user