#20327: Device queries now faster when including ConfigContexts (#20346)

* Fixes #20327: Device queries are now faster when including ConfidContexts Move .distinct() from main queryset to tag subquery to eliminate performance bottleneck when querying devices with config contexts. The .distinct() call on the main device queryset was causing PostgreSQL to sort all devices before pagination, resulting in 15x slower API responses for large installations (10k+ devices, 100+ config contexts). Moving .distinct() to the tag subquery eliminates duplicates at their source (GenericForeignKey tag relationships) while preserving the fix for issues #5314 and #5387 without impacting overall query performance. * Add performance regression test for config context annotation The test verifies that: - Main device queries do not use expensive DISTINCT operations - Tag subqueries properly use DISTINCT to prevent duplicates from issue #5387 This ensures the optimization from issue #20327 (moving .distinct() from maintaining query to tag subquery) cannot be accidentally reverted while maintaining the correctness guarantees for issues #5314 and #5387. * Address PR feedback, clean up new regression test The new regression test now avoids casting the query to a string and inspecting the string, which was brittle at best. The new approach asserts directly against `queryset.distinct` for the main query and then finds the subquery that we expect to have distinct set and verifies that is in fact the case. I also realized that the use of `connection.query_log` was problematic, in that it didn't seem to return any queries as expected. This meant that the test was actually not making any assertions since none of the code inside of the for loop over `device_queries` ever ran.
2025-09-15 17:04:56 +00:00
parent 192440a4d3
commit fb004bb94e
2 changed files with 66 additions and 22 deletions
--- a/netbox/extras/querysets.py
+++ b/netbox/extras/querysets.py
@@ -93,7 +93,7 @@ class ConfigContextModelQuerySet(RestrictedQuerySet):
                    _data=EmptyGroupByJSONBAgg('data', ordering=['weight', 'name'])
                ).values("_data").order_by()
            )
-        ).distinct()
+        )

    def _get_config_context_filters(self):
        # Construct the set of Q objects for the specific object types
@@ -117,7 +117,7 @@ class ConfigContextModelQuerySet(RestrictedQuerySet):
                    ).values_list(
                        'tag_id',
                        flat=True
-                    )
+                    ).distinct()
                )
            ) | Q(tags=None),
            is_active=True,
--- a/netbox/extras/tests/test_models.py
+++ b/netbox/extras/tests/test_models.py
@@ -8,7 +8,7 @@ from django.test import tag, TestCase

 from core.models import DataSource, ObjectType
 from dcim.models import Device, DeviceRole, DeviceType, Location, Manufacturer, Platform, Region, Site, SiteGroup
-from extras.models import ConfigContext, ConfigContextProfile, ConfigTemplate, ImageAttachment, Tag
+from extras.models import ConfigContext, ConfigContextProfile, ConfigTemplate, ImageAttachment, Tag, TaggedItem
 from tenancy.models import Tenant, TenantGroup
 from utilities.exceptions import AbortRequest
 from virtualization.models import Cluster, ClusterGroup, ClusterType, VirtualMachine
@@ -536,6 +536,34 @@ class ConfigContextTest(TestCase):
            vms[1].get_config_context()
        )

+    def test_valid_local_context_data(self):
+        device = Device.objects.first()
+        device.local_context_data = None
+        device.clean()
+
+        device.local_context_data = {"foo": "bar"}
+        device.clean()
+
+    def test_invalid_local_context_data(self):
+        device = Device.objects.first()
+
+        device.local_context_data = ""
+        with self.assertRaises(ValidationError):
+            device.clean()
+
+        device.local_context_data = 0
+        with self.assertRaises(ValidationError):
+            device.clean()
+
+        device.local_context_data = False
+        with self.assertRaises(ValidationError):
+            device.clean()
+
+        device.local_context_data = 'foo'
+        with self.assertRaises(ValidationError):
+            device.clean()
+
+    @tag('regression')
    def test_multiple_tags_return_distinct_objects(self):
        """
        Tagged items use a generic relationship, which results in duplicate rows being returned when queried.
@@ -573,6 +601,7 @@ class ConfigContextTest(TestCase):
        self.assertEqual(ConfigContext.objects.get_for_object(device).count(), 1)
        self.assertEqual(device.get_config_context(), annotated_queryset[0].get_config_context())

+    @tag('regression')
    def test_multiple_tags_return_distinct_objects_with_separate_config_contexts(self):
        """
        Tagged items use a generic relationship, which results in duplicate rows being returned when queried.
@@ -621,32 +650,47 @@ class ConfigContextTest(TestCase):
        self.assertEqual(ConfigContext.objects.get_for_object(device).count(), 2)
        self.assertEqual(device.get_config_context(), annotated_queryset[0].get_config_context())

-    def test_valid_local_context_data(self):
+    @tag('performance', 'regression')
+    def test_config_context_annotation_query_optimization(self):
+        """
+        Regression test for issue #20327: Ensure config context annotation
+        doesn't use expensive DISTINCT on main query.
+
+        Verifies that DISTINCT is only used in tag subquery where needed,
+        not on the main device query which is expensive for large datasets.
+        """
        device = Device.objects.first()
-        device.local_context_data = None
-        device.clean()
+        queryset = Device.objects.filter(pk=device.pk).annotate_config_context_data()

-        device.local_context_data = {"foo": "bar"}
-        device.clean()
+        # Main device query should NOT use DISTINCT
+        self.assertFalse(queryset.query.distinct)

-    def test_invalid_local_context_data(self):
-        device = Device.objects.first()
+        # Check that tag subqueries DO use DISTINCT by inspecting the annotation
+        config_annotation = queryset.query.annotations.get('config_context_data')
+        self.assertIsNotNone(config_annotation)

-        device.local_context_data = ""
-        with self.assertRaises(ValidationError):
-            device.clean()
+        def find_tag_subqueries(where_node):
+            """Find subqueries in WHERE clause that relate to tag filtering"""
+            subqueries = []

-        device.local_context_data = 0
-        with self.assertRaises(ValidationError):
-            device.clean()
+            def traverse(node):
+                if hasattr(node, 'children'):
+                    for child in node.children:
+                        try:
+                            if child.rhs.query.model is TaggedItem:
+                                subqueries.append(child.rhs.query)
+                        except AttributeError:
+                            traverse(child)
+            traverse(where_node)
+            return subqueries

-        device.local_context_data = False
-        with self.assertRaises(ValidationError):
-            device.clean()
+        # Find subqueries in the WHERE clause that should have DISTINCT
+        tag_subqueries = find_tag_subqueries(config_annotation.query.where)
+        distinct_subqueries = [sq for sq in tag_subqueries if sq.distinct]

-        device.local_context_data = 'foo'
-        with self.assertRaises(ValidationError):
-            device.clean()
+        # Verify we found at least one DISTINCT subquery for tags
+        self.assertEqual(len(distinct_subqueries), 1)
+        self.assertTrue(distinct_subqueries[0].distinct)


 class ConfigTemplateTest(TestCase):