feat(api): remove unused database indexes and improve new failed findings index (#9904)

2026-07-24 13:01:56 +00:00 · 2026-01-28 12:35:36 +01:00
parent 5407f3c68e
commit 8783e963d3
7 changed files with 224 additions and 31 deletions
@@ -9,10 +9,12 @@ All notable changes to the **Prowler API** are documented in this file.
 - Attack Paths: Bedrock Code Interpreter and AttachRolePolicy privilege escalation queries [(#9885)](https://github.com/prowler-cloud/prowler/pull/9885)
 - Added memory optimizations for large compliance report generation [(#9444)](https://github.com/prowler-cloud/prowler/pull/9444)
 - `GET /api/v1/resources/{id}/events` endpoint to retrieve AWS resource modification history from CloudTrail [(#9101)](https://github.com/prowler-cloud/prowler/pull/9101)
+- Partial index on findings to speed up new failed findings queries [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904)

 ### 🔄 Changed

 - Lazy-load providers and compliance data to reduce API/worker startup memory and time [(#9857)](https://github.com/prowler-cloud/prowler/pull/9857)
+- Remove unused indexes [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904)

 ---

@@ -450,7 +450,7 @@ def create_index_on_partitions(
            all_partitions=True
        )
    """
-    with connection.cursor() as cursor:
+    with schema_editor.connection.cursor() as cursor:
        cursor.execute(
            """
            SELECT inhrelid::regclass::text
@@ -0,0 +1,41 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    """
+    Drop unused indexes on partitioned tables (findings, resource_finding_mappings).
+
+    NOTE: RemoveIndexConcurrently cannot be used on partitioned tables in PostgreSQL.
+    Standard RemoveIndex drops the parent index, which cascades to all partitions.
+    """
+
+    dependencies = [
+        ("api", "0070_attack_paths_scan"),
+    ]
+
+    operations = [
+        migrations.RemoveIndex(
+            model_name="finding",
+            name="gin_findings_search_idx",
+        ),
+        migrations.RemoveIndex(
+            model_name="finding",
+            name="gin_find_service_idx",
+        ),
+        migrations.RemoveIndex(
+            model_name="finding",
+            name="gin_find_region_idx",
+        ),
+        migrations.RemoveIndex(
+            model_name="finding",
+            name="gin_find_rtype_idx",
+        ),
+        migrations.RemoveIndex(
+            model_name="finding",
+            name="find_delta_new_idx",
+        ),
+        migrations.RemoveIndex(
+            model_name="resourcefindingmapping",
+            name="rfm_tenant_finding_idx",
+        ),
+    ]
@@ -0,0 +1,91 @@
+"""
+Drop unused indexes on non-partitioned tables.
+
+These tables are not partitioned, so RemoveIndexConcurrently can be used safely.
+"""
+
+from uuid import uuid4
+
+from django.contrib.postgres.operations import RemoveIndexConcurrently
+from django.db import migrations, models
+
+
+def drop_resource_scan_summary_resource_id_index(apps, schema_editor):
+    with schema_editor.connection.cursor() as cursor:
+        cursor.execute(
+            """
+            SELECT idx_ns.nspname, idx.relname
+            FROM pg_class tbl
+            JOIN pg_namespace tbl_ns ON tbl_ns.oid = tbl.relnamespace
+            JOIN pg_index i ON i.indrelid = tbl.oid
+            JOIN pg_class idx ON idx.oid = i.indexrelid
+            JOIN pg_namespace idx_ns ON idx_ns.oid = idx.relnamespace
+            JOIN pg_attribute a
+                ON a.attrelid = tbl.oid
+                AND a.attnum = (i.indkey::int[])[0]
+            WHERE tbl_ns.nspname = ANY (current_schemas(false))
+              AND tbl.relname = %s
+              AND i.indnatts = 1
+              AND a.attname = %s
+            """,
+            ["resource_scan_summaries", "resource_id"],
+        )
+        row = cursor.fetchone()
+
+    if not row:
+        return
+
+    schema_name, index_name = row
+    quote_name = schema_editor.connection.ops.quote_name
+    qualified_name = f"{quote_name(schema_name)}.{quote_name(index_name)}"
+    schema_editor.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {qualified_name};")
+
+
+class Migration(migrations.Migration):
+    atomic = False
+
+    dependencies = [
+        ("api", "0071_drop_partitioned_indexes"),
+    ]
+
+    operations = [
+        RemoveIndexConcurrently(
+            model_name="resource",
+            name="gin_resources_search_idx",
+        ),
+        RemoveIndexConcurrently(
+            model_name="resourcetag",
+            name="gin_resource_tags_search_idx",
+        ),
+        RemoveIndexConcurrently(
+            model_name="scansummary",
+            name="ss_tenant_scan_service_idx",
+        ),
+        RemoveIndexConcurrently(
+            model_name="complianceoverview",
+            name="comp_ov_cp_id_idx",
+        ),
+        RemoveIndexConcurrently(
+            model_name="complianceoverview",
+            name="comp_ov_req_fail_idx",
+        ),
+        RemoveIndexConcurrently(
+            model_name="complianceoverview",
+            name="comp_ov_cp_id_req_fail_idx",
+        ),
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                migrations.RunPython(
+                    drop_resource_scan_summary_resource_id_index,
+                    reverse_code=migrations.RunPython.noop,
+                ),
+            ],
+            state_operations=[
+                migrations.AlterField(
+                    model_name="resourcescansummary",
+                    name="resource_id",
+                    field=models.UUIDField(default=uuid4),
+                ),
+            ],
+        ),
+    ]
@@ -0,0 +1,31 @@
+from functools import partial
+
+from django.db import migrations
+
+from api.db_utils import create_index_on_partitions, drop_index_on_partitions
+
+
+class Migration(migrations.Migration):
+    atomic = False
+
+    dependencies = [
+        ("api", "0072_drop_unused_indexes"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            partial(
+                create_index_on_partitions,
+                parent_table="findings",
+                index_name="find_tenant_scan_fail_new_idx",
+                columns="tenant_id, scan_id",
+                where="status = 'FAIL' AND delta = 'new'",
+                all_partitions=True,
+            ),
+            reverse_code=partial(
+                drop_index_on_partitions,
+                parent_table="findings",
+                index_name="find_tenant_scan_fail_new_idx",
+            ),
+        )
+    ]
@@ -0,0 +1,54 @@
+from django.db import migrations, models
+
+INDEX_NAME = "find_tenant_scan_fail_new_idx"
+PARENT_TABLE = "findings"
+
+
+def create_parent_and_attach(apps, schema_editor):
+    with schema_editor.connection.cursor() as cursor:
+        cursor.execute(
+            f"CREATE INDEX {INDEX_NAME} ON ONLY {PARENT_TABLE} "
+            f"USING btree (tenant_id, scan_id) "
+            f"WHERE status = 'FAIL' AND delta = 'new'"
+        )
+        cursor.execute(
+            "SELECT inhrelid::regclass::text "
+            "FROM pg_inherits "
+            "WHERE inhparent = %s::regclass",
+            [PARENT_TABLE],
+        )
+        for (partition,) in cursor.fetchall():
+            child_idx = f"{partition.replace('.', '_')}_{INDEX_NAME}"
+            cursor.execute(f"ALTER INDEX {INDEX_NAME} ATTACH PARTITION {child_idx}")
+
+
+def drop_parent_index(apps, schema_editor):
+    with schema_editor.connection.cursor() as cursor:
+        cursor.execute(f"DROP INDEX IF EXISTS {INDEX_NAME}")
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("api", "0073_findings_fail_new_index_partitions"),
+    ]
+
+    operations = [
+        migrations.SeparateDatabaseAndState(
+            state_operations=[
+                migrations.AddIndex(
+                    model_name="finding",
+                    index=models.Index(
+                        condition=models.Q(status="FAIL", delta="new"),
+                        fields=["tenant_id", "scan_id"],
+                        name=INDEX_NAME,
+                    ),
+                ),
+            ],
+            database_operations=[
+                migrations.RunPython(
+                    create_parent_and_attach,
+                    reverse_code=drop_parent_index,
+                ),
+            ],
+        ),
+    ]
@@ -12,7 +12,6 @@ from cryptography.fernet import Fernet, InvalidToken
 from django.conf import settings
 from django.contrib.auth.models import AbstractBaseUser
 from django.contrib.postgres.fields import ArrayField
-from django.contrib.postgres.indexes import GinIndex
 from django.contrib.postgres.search import SearchVector, SearchVectorField
 from django.contrib.sites.models import Site
 from django.core.exceptions import ValidationError
@@ -741,10 +740,6 @@ class ResourceTag(RowLevelSecurityProtectedModel):
    class Meta(RowLevelSecurityProtectedModel.Meta):
        db_table = "resource_tags"

-        indexes = [
-            GinIndex(fields=["text_search"], name="gin_resource_tags_search_idx"),
-        ]
-
        constraints = [
            models.UniqueConstraint(
                fields=("tenant_id", "key", "value"),
@@ -853,7 +848,6 @@ class Resource(RowLevelSecurityProtectedModel):
                fields=["tenant_id", "service", "region", "type"],
                name="resource_tenant_metadata_idx",
            ),
-            GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
            models.Index(fields=["tenant_id", "id"], name="resources_tenant_id_idx"),
            models.Index(
                fields=["tenant_id", "provider_id"],
@@ -1038,23 +1032,19 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):

        indexes = [
            models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
-            GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
            models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
            models.Index(
                fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
            ),
            models.Index(
-                fields=["tenant_id", "id"],
-                condition=Q(delta="new"),
-                name="find_delta_new_idx",
+                condition=models.Q(status=StatusChoices.FAIL, delta="new"),
+                fields=["tenant_id", "scan_id"],
+                name="find_tenant_scan_fail_new_idx",
            ),
            models.Index(
                fields=["tenant_id", "uid", "-inserted_at"],
                name="find_tenant_uid_inserted_idx",
            ),
-            GinIndex(fields=["resource_services"], name="gin_find_service_idx"),
-            GinIndex(fields=["resource_regions"], name="gin_find_region_idx"),
-            GinIndex(fields=["resource_types"], name="gin_find_rtype_idx"),
            models.Index(
                fields=["tenant_id", "scan_id", "check_id"],
                name="find_tenant_scan_check_idx",
@@ -1122,10 +1112,6 @@ class ResourceFindingMapping(PostgresPartitionedModel, RowLevelSecurityProtected
        #   - id

        indexes = [
-            models.Index(
-                fields=["tenant_id", "finding_id"],
-                name="rfm_tenant_finding_idx",
-            ),
            models.Index(
                fields=["tenant_id", "resource_id"],
                name="rfm_tenant_resource_idx",
@@ -1442,14 +1428,6 @@ class ComplianceOverview(RowLevelSecurityProtectedModel):
                statements=["SELECT", "INSERT", "DELETE"],
            ),
        ]
-        indexes = [
-            models.Index(fields=["compliance_id"], name="comp_ov_cp_id_idx"),
-            models.Index(fields=["requirements_failed"], name="comp_ov_req_fail_idx"),
-            models.Index(
-                fields=["compliance_id", "requirements_failed"],
-                name="comp_ov_cp_id_req_fail_idx",
-            ),
-        ]

    class JSONAPIMeta:
        resource_name = "compliance-overviews"
@@ -1615,10 +1593,6 @@ class ScanSummary(RowLevelSecurityProtectedModel):
                fields=["tenant_id", "scan_id"],
                name="scan_summaries_tenant_scan_idx",
            ),
-            models.Index(
-                fields=["tenant_id", "scan_id", "service"],
-                name="ss_tenant_scan_service_idx",
-            ),
            models.Index(
                fields=["tenant_id", "scan_id", "severity"],
                name="ss_tenant_scan_severity_idx",
@@ -2033,7 +2007,7 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):

 class ResourceScanSummary(RowLevelSecurityProtectedModel):
    scan_id = models.UUIDField(default=uuid7, db_index=True)
-    resource_id = models.UUIDField(default=uuid4, db_index=True)
+    resource_id = models.UUIDField(default=uuid4)
    service = models.CharField(max_length=100)
    region = models.CharField(max_length=100)
    resource_type = models.CharField(max_length=100)