mirror of
https://github.com/django-polymorphic/django-polymorphic.git
synced 2026-01-15 04:18:41 +03:00
- Update polymorphic queryset iterators to honor chunk_size.
- Add iteration tests. - Update documentation with iteration performance considerations.
This commit is contained in:
parent
9cb89d94b4
commit
9dda8d114f
|
|
@ -16,6 +16,7 @@ API Documentation
|
|||
polymorphic.formsets
|
||||
polymorphic.managers
|
||||
polymorphic.models
|
||||
polymorphic.query
|
||||
polymorphic.showfields
|
||||
polymorphic.templatetags
|
||||
polymorphic.utils
|
||||
|
|
|
|||
7
docs/api/polymorphic.query.rst
Normal file
7
docs/api/polymorphic.query.rst
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
polymorphic.query
|
||||
=================
|
||||
|
||||
.. automodule:: polymorphic.query
|
||||
:members:
|
||||
:show-inheritance:
|
||||
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
Changelog
|
||||
=========
|
||||
|
||||
v4.2.0 (2025-12-01)
|
||||
v4.2.0 (2025-12-04)
|
||||
-------------------
|
||||
|
||||
* Implemented `Defer to chunk_size parameter on .iterators for fetching get_real_instances() <https://github.com/jazzband/django-polymorphic/pull/672>`_
|
||||
* Fixed `Show full admin context (breadcrumb and logout nav) in model type selection admin form <https://github.com/jazzband/django-polymorphic/pull/580>`_
|
||||
* Fixed `Issue with Autocomplete Fields in StackedPolymorphicInline.Child Inline <https://github.com/jazzband/django-polymorphic/issues/546>`_
|
||||
* Support Python 3.14 and Django 6.0, drop support for EOL python 3.9, Django 3.2, 4.0, 4.1 and 5.0.
|
||||
|
|
|
|||
|
|
@ -27,6 +27,54 @@ if all are class ``ModelA``. If 50 objects are ``ModelA`` and 50 are ``ModelB``,
|
|||
are executed. The pathological worst case is 101 db queries if result_objects contains 100 different
|
||||
object types (with all of them subclasses of ``ModelA``).
|
||||
|
||||
Iteration: Memory vs DB Round Trips
|
||||
-----------------------------------
|
||||
|
||||
When iterating over large QuerySets, there is a trade-off between memory consumption and number
|
||||
of round trips to the database. One additional query is needed per model subclass present in the
|
||||
QuerySet and these queries take the form of ``SELECT ... WHERE pk IN (....)`` with a potentially
|
||||
large number of IDs in the IN clause. All models in the IN clause will be loaded into memory during
|
||||
iteration.
|
||||
|
||||
To balance this trade-off, by default a maximum of 2000 objects are requested at once. This means
|
||||
that if your QuerySet contains 10,000 objects of 3 different subclasses, then 16 queries will be
|
||||
executed: 1 to fetch the base objects, and 5 (10/2 == 5) * 3 more to fetch the subclasses.
|
||||
|
||||
The `chunk_size` parameter on :meth:`~django.db.models.query.QuerySet.iterator` can be used to
|
||||
change the number of objects loaded into memory at once during iteration. For example, to load 5000 objects at once:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
for obj in ModelA.objects.all().iterator(chunk_size=5000):
|
||||
process(obj)
|
||||
|
||||
.. note::
|
||||
|
||||
``chunk_size`` on non-polymorphic QuerySets controls the number of rows fetched from the
|
||||
database at once, but for polymorphic QuerySets the behavior is more analogous to its behavior
|
||||
when :meth:`~django.db.models.query.QuerySet.prefetch_related` is used.
|
||||
|
||||
Some database backends limit the number of parameters in a query. For those backends the
|
||||
``chunk_size`` will be restricted to be no greater than that limit. This limit can be checked in:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from django.db import connection
|
||||
|
||||
print(connection.features.max_query_params)
|
||||
|
||||
|
||||
You may change the global default fallback ``chunk_size`` by modifying the
|
||||
:attr:`polymorphic.query.Polymorphic_QuerySet_objects_per_request` attribute. Place code like
|
||||
this somewhere that will be executed during startup:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from polymorphic import query
|
||||
|
||||
query.Polymorphic_QuerySet_objects_per_request = 5000
|
||||
|
||||
|
||||
:class:`~django.contrib.contenttypes.models.ContentType` retrieval
|
||||
------------------------------------------------------------------
|
||||
|
||||
|
|
|
|||
4
justfile
4
justfile
|
|
@ -182,11 +182,11 @@ test-lock +PACKAGES: _lock-python
|
|||
test *TESTS:
|
||||
@just run pytest --cov-append {{ TESTS }}
|
||||
|
||||
test-db DB_CLIENT="dev":
|
||||
test-db DB_CLIENT="dev" *TESTS:
|
||||
# No Optional Dependency Unit Tests
|
||||
# todo clean this up, rerunning a lot of tests
|
||||
uv sync --group {{ DB_CLIENT }}
|
||||
@just run pytest --cov-append
|
||||
@just run pytest --cov-append {{ TESTS }}
|
||||
|
||||
# run the pre-commit checks
|
||||
precommit:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from collections import defaultdict
|
|||
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.core.exceptions import FieldDoesNotExist
|
||||
from django.db import connections
|
||||
from django.db.models import FilteredRelation
|
||||
from django.db.models.query import ModelIterable, Q, QuerySet
|
||||
|
||||
|
|
@ -17,9 +18,11 @@ from .query_translate import (
|
|||
translate_polymorphic_Q_object,
|
||||
)
|
||||
|
||||
# chunk-size: maximum number of objects requested per db-request
|
||||
# by the polymorphic queryset.iterator() implementation
|
||||
Polymorphic_QuerySet_objects_per_request = 100
|
||||
Polymorphic_QuerySet_objects_per_request = 2000
|
||||
"""
|
||||
The maximum number of objects requested per db-request by the polymorphic
|
||||
queryset.iterator() implementation
|
||||
"""
|
||||
|
||||
|
||||
class PolymorphicModelIterable(ModelIterable):
|
||||
|
|
@ -44,15 +47,29 @@ class PolymorphicModelIterable(ModelIterable):
|
|||
for o in real_results: yield o
|
||||
|
||||
but it requests the objects in chunks from the database,
|
||||
with Polymorphic_QuerySet_objects_per_request per chunk
|
||||
with QuerySet.iterator(chunk_size) per chunk
|
||||
"""
|
||||
|
||||
# some databases have a limit on the number of query parameters, we must
|
||||
# respect this for generating get_real_instances queries because those
|
||||
# queries do a large WHERE IN clause with primary keys
|
||||
max_chunk = connections[self.queryset.db].features.max_query_params
|
||||
sql_chunk = self.chunk_size if self.chunked_fetch else None
|
||||
if max_chunk:
|
||||
sql_chunk = (
|
||||
max_chunk
|
||||
if not self.chunked_fetch # chunk_size was not provided
|
||||
else min(max_chunk, self.chunk_size or max_chunk)
|
||||
)
|
||||
|
||||
sql_chunk = sql_chunk or Polymorphic_QuerySet_objects_per_request
|
||||
|
||||
while True:
|
||||
base_result_objects = []
|
||||
reached_end = False
|
||||
|
||||
# Make sure the base iterator is read in chunks instead of
|
||||
# reading it completely, in case our caller read only a few objects.
|
||||
for i in range(Polymorphic_QuerySet_objects_per_request):
|
||||
# Fetch in chunks
|
||||
for _ in range(sql_chunk):
|
||||
try:
|
||||
o = next(base_iter)
|
||||
base_result_objects.append(o)
|
||||
|
|
@ -60,10 +77,7 @@ class PolymorphicModelIterable(ModelIterable):
|
|||
reached_end = True
|
||||
break
|
||||
|
||||
real_results = self.queryset._get_real_instances(base_result_objects)
|
||||
|
||||
for o in real_results:
|
||||
yield o
|
||||
yield from self.queryset._get_real_instances(base_result_objects)
|
||||
|
||||
if reached_end:
|
||||
return
|
||||
|
|
|
|||
|
|
@ -3,10 +3,11 @@ import re
|
|||
import uuid
|
||||
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models
|
||||
from django.db import models, connection
|
||||
from django.db.models import Case, Count, FilteredRelation, Q, Sum, When, F
|
||||
from django.db.utils import IntegrityError, NotSupportedError
|
||||
from django.test import TransactionTestCase
|
||||
from django.test.utils import CaptureQueriesContext
|
||||
|
||||
from polymorphic import query_translate
|
||||
from polymorphic.managers import PolymorphicManager
|
||||
|
|
@ -1234,3 +1235,179 @@ class PolymorphicTests(TransactionTestCase):
|
|||
def test_non_polymorphic_parent(self):
|
||||
obj = NonPolymorphicParent.objects.create()
|
||||
assert obj.delete()
|
||||
|
||||
def test_iteration(self):
|
||||
Model2A.objects.all().delete()
|
||||
|
||||
for i in range(250):
|
||||
Model2B.objects.create(field1=f"B1-{i}", field2=f"B2-{i}")
|
||||
for i in range(1000):
|
||||
Model2C.objects.create(
|
||||
field1=f"C1-{i + 250}", field2=f"C2-{i + 250}", field3=f"C3-{i + 250}"
|
||||
)
|
||||
for i in range(2000):
|
||||
Model2D.objects.create(
|
||||
field1=f"D1-{i + 1250}",
|
||||
field2=f"D2-{i + 1250}",
|
||||
field3=f"D3-{i + 1250}",
|
||||
field4=f"D4-{i + 1250}",
|
||||
)
|
||||
|
||||
with CaptureQueriesContext(connection) as base_all:
|
||||
for _ in Model2A.objects.non_polymorphic().all():
|
||||
pass # Evaluating the queryset
|
||||
|
||||
len_base_all = len(base_all)
|
||||
assert len_base_all == 1, (
|
||||
f"Expected 1 queries for chunked iteration over 3250 base objects. {len_base_all}"
|
||||
)
|
||||
|
||||
with CaptureQueriesContext(connection) as base_iterator:
|
||||
for _ in Model2A.objects.non_polymorphic().iterator():
|
||||
pass # Evaluating the queryset
|
||||
|
||||
len_base_iterator = len(base_iterator)
|
||||
assert len_base_iterator == 1, (
|
||||
f"Expected 1 queries for chunked iteration over 3250 base objects. {len_base_iterator}"
|
||||
)
|
||||
|
||||
with CaptureQueriesContext(connection) as base_chunked:
|
||||
for _ in Model2A.objects.non_polymorphic().iterator(chunk_size=1000):
|
||||
pass # Evaluating the queryset
|
||||
|
||||
len_base_chunked = len(base_chunked)
|
||||
assert len_base_chunked == 1, (
|
||||
f"Expected 1 queries for chunked iteration over 3250 base objects. {len_base_chunked}"
|
||||
)
|
||||
|
||||
with CaptureQueriesContext(connection) as poly_all:
|
||||
b, c, d = 0, 0, 0
|
||||
for idx, obj in enumerate(reversed(list(Model2A.objects.order_by("-pk").all()))):
|
||||
if isinstance(obj, Model2D):
|
||||
d += 1
|
||||
assert obj.field1 == f"D1-{idx}"
|
||||
assert obj.field2 == f"D2-{idx}"
|
||||
assert obj.field3 == f"D3-{idx}"
|
||||
assert obj.field4 == f"D4-{idx}"
|
||||
elif isinstance(obj, Model2C):
|
||||
c += 1
|
||||
assert obj.field1 == f"C1-{idx}"
|
||||
assert obj.field2 == f"C2-{idx}"
|
||||
assert obj.field3 == f"C3-{idx}"
|
||||
elif isinstance(obj, Model2B):
|
||||
b += 1
|
||||
assert obj.field1 == f"B1-{idx}"
|
||||
assert obj.field2 == f"B2-{idx}"
|
||||
else:
|
||||
assert False, "Unexpected model type"
|
||||
assert (b, c, d) == (250, 1000, 2000)
|
||||
|
||||
assert len(poly_all) <= 7, (
|
||||
f"Expected < 7 queries for chunked iteration over 3250 "
|
||||
f"objects with 3 child models and the default chunk size of 2000, encountered "
|
||||
f"{len(poly_all)}"
|
||||
)
|
||||
|
||||
with CaptureQueriesContext(connection) as poly_all:
|
||||
b, c, d = 0, 0, 0
|
||||
for idx, obj in enumerate(Model2A.objects.order_by("pk").iterator(chunk_size=None)):
|
||||
if isinstance(obj, Model2D):
|
||||
d += 1
|
||||
assert obj.field1 == f"D1-{idx}"
|
||||
assert obj.field2 == f"D2-{idx}"
|
||||
assert obj.field3 == f"D3-{idx}"
|
||||
assert obj.field4 == f"D4-{idx}"
|
||||
elif isinstance(obj, Model2C):
|
||||
c += 1
|
||||
assert obj.field1 == f"C1-{idx}"
|
||||
assert obj.field2 == f"C2-{idx}"
|
||||
assert obj.field3 == f"C3-{idx}"
|
||||
elif isinstance(obj, Model2B):
|
||||
b += 1
|
||||
assert obj.field1 == f"B1-{idx}"
|
||||
assert obj.field2 == f"B2-{idx}"
|
||||
else:
|
||||
assert False, "Unexpected model type"
|
||||
assert (b, c, d) == (250, 1000, 2000)
|
||||
|
||||
assert len(poly_all) <= 7, (
|
||||
f"Expected < 7 queries for chunked iteration over 3250 "
|
||||
f"objects with 3 child models and a chunk size of 2000, encountered "
|
||||
f"{len(poly_all)}"
|
||||
)
|
||||
|
||||
with CaptureQueriesContext(connection) as poly_iterator:
|
||||
b, c, d = 0, 0, 0
|
||||
for idx, obj in enumerate(Model2A.objects.order_by("pk").iterator()):
|
||||
if isinstance(obj, Model2D):
|
||||
d += 1
|
||||
assert obj.field1 == f"D1-{idx}"
|
||||
assert obj.field2 == f"D2-{idx}"
|
||||
assert obj.field3 == f"D3-{idx}"
|
||||
assert obj.field4 == f"D4-{idx}"
|
||||
elif isinstance(obj, Model2C):
|
||||
c += 1
|
||||
assert obj.field1 == f"C1-{idx}"
|
||||
assert obj.field2 == f"C2-{idx}"
|
||||
assert obj.field3 == f"C3-{idx}"
|
||||
elif isinstance(obj, Model2B):
|
||||
b += 1
|
||||
assert obj.field1 == f"B1-{idx}"
|
||||
assert obj.field2 == f"B2-{idx}"
|
||||
else:
|
||||
assert False, "Unexpected model type"
|
||||
assert (b, c, d) == (250, 1000, 2000)
|
||||
|
||||
assert len(poly_iterator) <= 7, (
|
||||
f"Expected <= 7 queries for chunked iteration over 3250 "
|
||||
f"objects with 3 child models and a default chunk size of 2000, encountered "
|
||||
f"{len(poly_iterator)}"
|
||||
)
|
||||
|
||||
with CaptureQueriesContext(connection) as poly_chunked:
|
||||
b, c, d = 0, 0, 0
|
||||
for idx, obj in enumerate(Model2A.objects.order_by("pk").iterator(chunk_size=4000)):
|
||||
if isinstance(obj, Model2D):
|
||||
d += 1
|
||||
assert obj.field1 == f"D1-{idx}"
|
||||
assert obj.field2 == f"D2-{idx}"
|
||||
assert obj.field3 == f"D3-{idx}"
|
||||
assert obj.field4 == f"D4-{idx}"
|
||||
elif isinstance(obj, Model2C):
|
||||
c += 1
|
||||
assert obj.field1 == f"C1-{idx}"
|
||||
assert obj.field2 == f"C2-{idx}"
|
||||
assert obj.field3 == f"C3-{idx}"
|
||||
elif isinstance(obj, Model2B):
|
||||
b += 1
|
||||
assert obj.field1 == f"B1-{idx}"
|
||||
assert obj.field2 == f"B2-{idx}"
|
||||
else:
|
||||
assert False, "Unexpected model type"
|
||||
assert (b, c, d) == (250, 1000, 2000)
|
||||
|
||||
assert len(poly_chunked) <= 7, (
|
||||
f"Expected <= 7 queries for chunked iteration over 3250 objects with 3 child "
|
||||
f"models and a chunk size of 4000, encountered {len(poly_chunked)}"
|
||||
)
|
||||
|
||||
if connection.vendor == "postgresql":
|
||||
assert len(poly_chunked) == 4, "On postgres with a 4000 chunk size, expected 4 queries"
|
||||
|
||||
try:
|
||||
result = Model2A.objects.all().delete()
|
||||
assert result == (
|
||||
11500,
|
||||
{
|
||||
"tests.Model2D": 2000,
|
||||
"tests.Model2C": 3000,
|
||||
"tests.Model2A": 3250,
|
||||
"tests.Model2B": 3250,
|
||||
},
|
||||
)
|
||||
except AttributeError:
|
||||
if connection.vendor == "oracle":
|
||||
# FIXME
|
||||
# known deletion issue with oracle
|
||||
# https://github.com/jazzband/django-polymorphic/issues/673
|
||||
pass
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user