From f67c5dbc8cfa6783247ed021b78dbda182614c00 Mon Sep 17 00:00:00 2001 From: Steven DeMartini Date: Sat, 29 Apr 2023 12:01:55 -0700 Subject: [PATCH 1/4] Revert field resolver logic to fix poor query performance This reverts the change to `convert_field_to_djangomodel` introduced in https://github.com/graphql-python/graphene-django/pull/1315 for the reasons discussed here https://github.com/graphql-python/graphene-django/pull/1315/files#r1015659857. As mentioned there, without reverting this code, "queries are forced every time an object is resolved, making an exponential number of queries when nesting without any possibility of optimizing". That regression prevented `graphene-django-optimizer` from working with `graphene-django` v3.0.0b9+ (where this change first was published), as discussed in https://github.com/graphql-python/graphene-django/issues/1356#issuecomment-1284718187, https://github.com/tfoxy/graphene-django-optimizer/issues/86, and https://github.com/tfoxy/graphene-django-optimizer/pull/83#issuecomment-1451987397. For now, this marks the two tests that depended on this problematic code as "expected to fail", and perhaps they can be reintroduced if there's a way to support this logic in a way that does not prevent `select_related` and `prefetch_related` query-optimization and introduce nested N+1s. As mentioned here https://github.com/graphql-python/graphene-django/pull/1315#issuecomment-1468594361, this is blocking upgrade to graphene-django v3 for many users, and fixing this would allow many to begin upgrading and contributing to keep graphene-django going. --- graphene_django/converter.py | 21 +-------------------- graphene_django/tests/test_get_queryset.py | 8 ++++++++ 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/graphene_django/converter.py b/graphene_django/converter.py index 386103a..9ad6c9d 100644 --- a/graphene_django/converter.py +++ b/graphene_django/converter.py @@ -315,26 +315,7 @@ def convert_field_to_djangomodel(field, registry=None): if not _type: return - class CustomField(Field): - def wrap_resolve(self, parent_resolver): - """ - Implements a custom resolver which go through the `get_node` method to ensure that - it goes through the `get_queryset` method of the DjangoObjectType. - """ - resolver = super().wrap_resolve(parent_resolver) - - def custom_resolver(root, info, **args): - fk_obj = resolver(root, info, **args) - if not isinstance(fk_obj, model): - # In case the resolver is a custom one that overwrites - # the default Django resolver - # This happens, for example, when using custom awaitable resolvers. - return fk_obj - return _type.get_node(info, fk_obj.pk) - - return custom_resolver - - return CustomField( + return Field( _type, description=get_django_field_description(field), required=not field.null, diff --git a/graphene_django/tests/test_get_queryset.py b/graphene_django/tests/test_get_queryset.py index 91bdc70..63027b9 100644 --- a/graphene_django/tests/test_get_queryset.py +++ b/graphene_django/tests/test_get_queryset.py @@ -121,6 +121,10 @@ class TestShouldCallGetQuerySetOnForeignKey: assert not result.errors assert result.data == {"reporter": {"firstName": "Jane"}} + # TODO: This test is currently expected to fail because the logic it depended on has been + # removed, due to poor SQL performance and preventing query-optimization (see + # https://github.com/graphql-python/graphene-django/pull/1315/files#r1015659857) + @pytest.mark.xfail def test_get_queryset_called_on_foreignkey(self): # If a user tries to access a reporter through an article they should get our authorization error query = """ @@ -291,6 +295,10 @@ class TestShouldCallGetQuerySetOnForeignKeyNode: assert not result.errors assert result.data == {"reporter": {"firstName": "Jane"}} + # TODO: This test is currently expected to fail because the logic it depended on has been + # removed, due to poor SQL performance and preventing query-optimization (see + # https://github.com/graphql-python/graphene-django/pull/1315/files#r1015659857) + @pytest.mark.xfail def test_get_queryset_called_on_foreignkey(self): # If a user tries to access a reporter through an article they should get our authorization error query = """ From 9796e93fc7d9cd1d440408f59857ebcca2f0f344 Mon Sep 17 00:00:00 2001 From: Steven DeMartini Date: Mon, 1 May 2023 09:00:30 -0700 Subject: [PATCH 2/4] Remove obsolete tests and add note about rationale --- graphene_django/tests/test_get_queryset.py | 146 +-------------------- 1 file changed, 6 insertions(+), 140 deletions(-) diff --git a/graphene_django/tests/test_get_queryset.py b/graphene_django/tests/test_get_queryset.py index 63027b9..7cbaa54 100644 --- a/graphene_django/tests/test_get_queryset.py +++ b/graphene_django/tests/test_get_queryset.py @@ -16,6 +16,12 @@ class TestShouldCallGetQuerySetOnForeignKey: Check that the get_queryset method is called in both forward and reversed direction of a foreignkey on types. (see issue #1111) + + NOTE: For now, we do not expect this get_queryset method to be called for nested + objects, as the original attempt to do so prevented SQL query-optimization with + `select_related`/`prefetch_related` and caused N+1 queries. See discussions here + https://github.com/graphql-python/graphene-django/pull/1315/files#r1015659857 + and here https://github.com/graphql-python/graphene-django/pull/1401. """ @pytest.fixture(autouse=True) @@ -121,73 +127,6 @@ class TestShouldCallGetQuerySetOnForeignKey: assert not result.errors assert result.data == {"reporter": {"firstName": "Jane"}} - # TODO: This test is currently expected to fail because the logic it depended on has been - # removed, due to poor SQL performance and preventing query-optimization (see - # https://github.com/graphql-python/graphene-django/pull/1315/files#r1015659857) - @pytest.mark.xfail - def test_get_queryset_called_on_foreignkey(self): - # If a user tries to access a reporter through an article they should get our authorization error - query = """ - query getArticle($id: ID!) { - article(id: $id) { - headline - reporter { - firstName - } - } - } - """ - - result = self.schema.execute(query, variables={"id": self.articles[0].id}) - assert len(result.errors) == 1 - assert result.errors[0].message == "Not authorized to access reporters." - - # An admin user should be able to get reporters through an article - query = """ - query getArticle($id: ID!) { - article(id: $id) { - headline - reporter { - firstName - } - } - } - """ - - result = self.schema.execute( - query, - variables={"id": self.articles[0].id}, - context_value={"admin": True}, - ) - assert not result.errors - assert result.data["article"] == { - "headline": "A fantastic article", - "reporter": {"firstName": "Jane"}, - } - - # An admin user should not be able to access draft article through a reporter - query = """ - query getReporter($id: ID!) { - reporter(id: $id) { - firstName - articles { - headline - } - } - } - """ - - result = self.schema.execute( - query, - variables={"id": self.reporter.id}, - context_value={"admin": True}, - ) - assert not result.errors - assert result.data["reporter"] == { - "firstName": "Jane", - "articles": [{"headline": "A fantastic article"}], - } - class TestShouldCallGetQuerySetOnForeignKeyNode: """ @@ -294,76 +233,3 @@ class TestShouldCallGetQuerySetOnForeignKeyNode: ) assert not result.errors assert result.data == {"reporter": {"firstName": "Jane"}} - - # TODO: This test is currently expected to fail because the logic it depended on has been - # removed, due to poor SQL performance and preventing query-optimization (see - # https://github.com/graphql-python/graphene-django/pull/1315/files#r1015659857) - @pytest.mark.xfail - def test_get_queryset_called_on_foreignkey(self): - # If a user tries to access a reporter through an article they should get our authorization error - query = """ - query getArticle($id: ID!) { - article(id: $id) { - headline - reporter { - firstName - } - } - } - """ - - result = self.schema.execute( - query, variables={"id": to_global_id("ArticleType", self.articles[0].id)} - ) - assert len(result.errors) == 1 - assert result.errors[0].message == "Not authorized to access reporters." - - # An admin user should be able to get reporters through an article - query = """ - query getArticle($id: ID!) { - article(id: $id) { - headline - reporter { - firstName - } - } - } - """ - - result = self.schema.execute( - query, - variables={"id": to_global_id("ArticleType", self.articles[0].id)}, - context_value={"admin": True}, - ) - assert not result.errors - assert result.data["article"] == { - "headline": "A fantastic article", - "reporter": {"firstName": "Jane"}, - } - - # An admin user should not be able to access draft article through a reporter - query = """ - query getReporter($id: ID!) { - reporter(id: $id) { - firstName - articles { - edges { - node { - headline - } - } - } - } - } - """ - - result = self.schema.execute( - query, - variables={"id": to_global_id("ReporterType", self.reporter.id)}, - context_value={"admin": True}, - ) - assert not result.errors - assert result.data["reporter"] == { - "firstName": "Jane", - "articles": {"edges": [{"node": {"headline": "A fantastic article"}}]}, - } From 20a6cecc4ca77b3866073ef66bb5ef63b4beab05 Mon Sep 17 00:00:00 2001 From: Steven DeMartini Date: Tue, 2 May 2023 09:00:22 -0700 Subject: [PATCH 3/4] Add test validating query performance with select_related + prefetch_related This test passes after reverting the `CustomField` resolver change introduced in https://github.com/graphql-python/graphene-django/pull/1315, but fails with that resolver code present. For instance, adding back the resolver code gives a test failure showing: ``` Failed: Expected to perform 2 queries but 11 were done ``` This should ensure there aren't regressions that prevent query-optimization in the future. --- graphene_django/tests/test_fields.py | 153 ++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 3 deletions(-) diff --git a/graphene_django/tests/test_fields.py b/graphene_django/tests/test_fields.py index 835de78..8c7b78d 100644 --- a/graphene_django/tests/test_fields.py +++ b/graphene_django/tests/test_fields.py @@ -1,5 +1,6 @@ import datetime -from django.db.models import Count +import re +from django.db.models import Count, Prefetch import pytest @@ -7,8 +8,12 @@ from graphene import List, NonNull, ObjectType, Schema, String from ..fields import DjangoListField from ..types import DjangoObjectType -from .models import Article as ArticleModel -from .models import Reporter as ReporterModel +from .models import ( + Article as ArticleModel, + Film as FilmModel, + FilmDetails as FilmDetailsModel, + Reporter as ReporterModel, +) class TestDjangoListField: @@ -500,3 +505,145 @@ class TestDjangoListField: assert not result.errors assert result.data == {"reporters": [{"firstName": "Tara"}]} + + def test_select_related_and_prefetch_related_are_respected( + self, django_assert_num_queries + ): + class Article(DjangoObjectType): + class Meta: + model = ArticleModel + fields = ("headline", "editor", "reporter") + + class Film(DjangoObjectType): + class Meta: + model = FilmModel + fields = ("genre", "details") + + class FilmDetail(DjangoObjectType): + class Meta: + model = FilmDetailsModel + fields = ("location",) + + class Reporter(DjangoObjectType): + class Meta: + model = ReporterModel + fields = ("first_name", "articles", "films") + + class Query(ObjectType): + articles = DjangoListField(Article) + + @staticmethod + def resolve_articles(root, info): + # Optimize for querying associated editors and reporters, and the films and film + # details of those reporters. This is similar to what would happen using a library + # like https://github.com/tfoxy/graphene-django-optimizer for a query like the one + # below (albeit simplified and hardcoded here). + return ArticleModel.objects.select_related( + "editor", "reporter" + ).prefetch_related( + Prefetch( + "reporter__films", + queryset=FilmModel.objects.select_related("details"), + ), + ) + + schema = Schema(query=Query) + + query = """ + query { + articles { + headline + + editor { + firstName + } + + reporter { + firstName + + films { + genre + + details { + location + } + } + } + } + } + """ + + r1 = ReporterModel.objects.create(first_name="Tara", last_name="West") + r2 = ReporterModel.objects.create(first_name="Debra", last_name="Payne") + + ArticleModel.objects.create( + headline="Amazing news", + reporter=r1, + pub_date=datetime.date.today(), + pub_date_time=datetime.datetime.now(), + editor=r2, + ) + ArticleModel.objects.create( + headline="Not so good news", + reporter=r2, + pub_date=datetime.date.today(), + pub_date_time=datetime.datetime.now(), + editor=r1, + ) + + film1 = FilmModel.objects.create(genre="ac") + film2 = FilmModel.objects.create(genre="ot") + film3 = FilmModel.objects.create(genre="do") + FilmDetailsModel.objects.create(location="Hollywood", film=film1) + FilmDetailsModel.objects.create(location="Antarctica", film=film3) + r1.films.add(film1, film2) + r2.films.add(film3) + + # We expect 2 queries to be performed based on the above resolver definition: one for all + # articles joined with the reporters model (for associated editors and reporters), and one + # for the films prefetch (which includes its `select_related` JOIN logic in its queryset) + with django_assert_num_queries(2) as captured: + result = schema.execute(query) + + assert not result.errors + assert result.data == { + "articles": [ + { + "headline": "Amazing news", + "editor": {"firstName": "Debra"}, + "reporter": { + "firstName": "Tara", + "films": [ + {"genre": "AC", "details": {"location": "Hollywood"}}, + {"genre": "OT", "details": None}, + ], + }, + }, + { + "headline": "Not so good news", + "editor": {"firstName": "Tara"}, + "reporter": { + "firstName": "Debra", + "films": [ + {"genre": "DO", "details": {"location": "Antarctica"}}, + ], + }, + }, + ] + } + + assert len(captured.captured_queries) == 2 # Sanity-check + + # First we should have queried for all articles in a single query, joining on the reporters + # model (for the editors and reporters ForeignKeys) + assert re.match( + r'SELECT .* "tests_article" INNER JOIN "tests_reporter"', + captured.captured_queries[0]["sql"], + ) + + # Then we should have queried for all of the films of all reporters, joined with the film + # details for each film, using a single query + assert re.match( + r'SELECT .* FROM "tests_film" INNER JOIN "tests_film_reporters" .* LEFT OUTER JOIN "tests_filmdetails"', + captured.captured_queries[1]["sql"], + ) From a8ceca77ed96c46414099c7595449975a19954c8 Mon Sep 17 00:00:00 2001 From: Firas K <3097061+firaskafri@users.noreply.github.com> Date: Wed, 3 May 2023 11:45:56 +0300 Subject: [PATCH 4/4] Bump version --- graphene_django/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphene_django/__init__.py b/graphene_django/__init__.py index 82c4fb3..12408a4 100644 --- a/graphene_django/__init__.py +++ b/graphene_django/__init__.py @@ -1,7 +1,7 @@ from .fields import DjangoConnectionField, DjangoListField from .types import DjangoObjectType -__version__ = "3.0.1" +__version__ = "3.0.2" __all__ = [ "__version__",