From 8783e963d35337f32a3c9babd4d563dc1cd338e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Fern=C3=A1ndez=20Poyatos?= Date: Wed, 28 Jan 2026 12:35:36 +0100 Subject: [PATCH] feat(api): remove unused database indexes and improve new failed findings index (#9904) --- api/CHANGELOG.md | 2 + api/src/backend/api/db_utils.py | 2 +- .../0071_drop_partitioned_indexes.py | 41 +++++++++ .../migrations/0072_drop_unused_indexes.py | 91 +++++++++++++++++++ ...0073_findings_fail_new_index_partitions.py | 31 +++++++ .../0074_findings_fail_new_index_parent.py | 54 +++++++++++ api/src/backend/api/models.py | 34 +------ 7 files changed, 224 insertions(+), 31 deletions(-) create mode 100644 api/src/backend/api/migrations/0071_drop_partitioned_indexes.py create mode 100644 api/src/backend/api/migrations/0072_drop_unused_indexes.py create mode 100644 api/src/backend/api/migrations/0073_findings_fail_new_index_partitions.py create mode 100644 api/src/backend/api/migrations/0074_findings_fail_new_index_parent.py diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 8da7925731..c89e0b2cb7 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -9,10 +9,12 @@ All notable changes to the **Prowler API** are documented in this file. - Attack Paths: Bedrock Code Interpreter and AttachRolePolicy privilege escalation queries [(#9885)](https://github.com/prowler-cloud/prowler/pull/9885) - Added memory optimizations for large compliance report generation [(#9444)](https://github.com/prowler-cloud/prowler/pull/9444) - `GET /api/v1/resources/{id}/events` endpoint to retrieve AWS resource modification history from CloudTrail [(#9101)](https://github.com/prowler-cloud/prowler/pull/9101) +- Partial index on findings to speed up new failed findings queries [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904) ### 🔄 Changed - Lazy-load providers and compliance data to reduce API/worker startup memory and time [(#9857)](https://github.com/prowler-cloud/prowler/pull/9857) +- Remove unused indexes [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904) --- diff --git a/api/src/backend/api/db_utils.py b/api/src/backend/api/db_utils.py index c6fcaeb43a..a5f98ad14c 100644 --- a/api/src/backend/api/db_utils.py +++ b/api/src/backend/api/db_utils.py @@ -450,7 +450,7 @@ def create_index_on_partitions( all_partitions=True ) """ - with connection.cursor() as cursor: + with schema_editor.connection.cursor() as cursor: cursor.execute( """ SELECT inhrelid::regclass::text diff --git a/api/src/backend/api/migrations/0071_drop_partitioned_indexes.py b/api/src/backend/api/migrations/0071_drop_partitioned_indexes.py new file mode 100644 index 0000000000..e1b1e192ad --- /dev/null +++ b/api/src/backend/api/migrations/0071_drop_partitioned_indexes.py @@ -0,0 +1,41 @@ +from django.db import migrations + + +class Migration(migrations.Migration): + """ + Drop unused indexes on partitioned tables (findings, resource_finding_mappings). + + NOTE: RemoveIndexConcurrently cannot be used on partitioned tables in PostgreSQL. + Standard RemoveIndex drops the parent index, which cascades to all partitions. + """ + + dependencies = [ + ("api", "0070_attack_paths_scan"), + ] + + operations = [ + migrations.RemoveIndex( + model_name="finding", + name="gin_findings_search_idx", + ), + migrations.RemoveIndex( + model_name="finding", + name="gin_find_service_idx", + ), + migrations.RemoveIndex( + model_name="finding", + name="gin_find_region_idx", + ), + migrations.RemoveIndex( + model_name="finding", + name="gin_find_rtype_idx", + ), + migrations.RemoveIndex( + model_name="finding", + name="find_delta_new_idx", + ), + migrations.RemoveIndex( + model_name="resourcefindingmapping", + name="rfm_tenant_finding_idx", + ), + ] diff --git a/api/src/backend/api/migrations/0072_drop_unused_indexes.py b/api/src/backend/api/migrations/0072_drop_unused_indexes.py new file mode 100644 index 0000000000..81f1f69c0d --- /dev/null +++ b/api/src/backend/api/migrations/0072_drop_unused_indexes.py @@ -0,0 +1,91 @@ +""" +Drop unused indexes on non-partitioned tables. + +These tables are not partitioned, so RemoveIndexConcurrently can be used safely. +""" + +from uuid import uuid4 + +from django.contrib.postgres.operations import RemoveIndexConcurrently +from django.db import migrations, models + + +def drop_resource_scan_summary_resource_id_index(apps, schema_editor): + with schema_editor.connection.cursor() as cursor: + cursor.execute( + """ + SELECT idx_ns.nspname, idx.relname + FROM pg_class tbl + JOIN pg_namespace tbl_ns ON tbl_ns.oid = tbl.relnamespace + JOIN pg_index i ON i.indrelid = tbl.oid + JOIN pg_class idx ON idx.oid = i.indexrelid + JOIN pg_namespace idx_ns ON idx_ns.oid = idx.relnamespace + JOIN pg_attribute a + ON a.attrelid = tbl.oid + AND a.attnum = (i.indkey::int[])[0] + WHERE tbl_ns.nspname = ANY (current_schemas(false)) + AND tbl.relname = %s + AND i.indnatts = 1 + AND a.attname = %s + """, + ["resource_scan_summaries", "resource_id"], + ) + row = cursor.fetchone() + + if not row: + return + + schema_name, index_name = row + quote_name = schema_editor.connection.ops.quote_name + qualified_name = f"{quote_name(schema_name)}.{quote_name(index_name)}" + schema_editor.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {qualified_name};") + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ("api", "0071_drop_partitioned_indexes"), + ] + + operations = [ + RemoveIndexConcurrently( + model_name="resource", + name="gin_resources_search_idx", + ), + RemoveIndexConcurrently( + model_name="resourcetag", + name="gin_resource_tags_search_idx", + ), + RemoveIndexConcurrently( + model_name="scansummary", + name="ss_tenant_scan_service_idx", + ), + RemoveIndexConcurrently( + model_name="complianceoverview", + name="comp_ov_cp_id_idx", + ), + RemoveIndexConcurrently( + model_name="complianceoverview", + name="comp_ov_req_fail_idx", + ), + RemoveIndexConcurrently( + model_name="complianceoverview", + name="comp_ov_cp_id_req_fail_idx", + ), + migrations.SeparateDatabaseAndState( + database_operations=[ + migrations.RunPython( + drop_resource_scan_summary_resource_id_index, + reverse_code=migrations.RunPython.noop, + ), + ], + state_operations=[ + migrations.AlterField( + model_name="resourcescansummary", + name="resource_id", + field=models.UUIDField(default=uuid4), + ), + ], + ), + ] diff --git a/api/src/backend/api/migrations/0073_findings_fail_new_index_partitions.py b/api/src/backend/api/migrations/0073_findings_fail_new_index_partitions.py new file mode 100644 index 0000000000..671fdf5ef6 --- /dev/null +++ b/api/src/backend/api/migrations/0073_findings_fail_new_index_partitions.py @@ -0,0 +1,31 @@ +from functools import partial + +from django.db import migrations + +from api.db_utils import create_index_on_partitions, drop_index_on_partitions + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ("api", "0072_drop_unused_indexes"), + ] + + operations = [ + migrations.RunPython( + partial( + create_index_on_partitions, + parent_table="findings", + index_name="find_tenant_scan_fail_new_idx", + columns="tenant_id, scan_id", + where="status = 'FAIL' AND delta = 'new'", + all_partitions=True, + ), + reverse_code=partial( + drop_index_on_partitions, + parent_table="findings", + index_name="find_tenant_scan_fail_new_idx", + ), + ) + ] diff --git a/api/src/backend/api/migrations/0074_findings_fail_new_index_parent.py b/api/src/backend/api/migrations/0074_findings_fail_new_index_parent.py new file mode 100644 index 0000000000..a889ba0ed4 --- /dev/null +++ b/api/src/backend/api/migrations/0074_findings_fail_new_index_parent.py @@ -0,0 +1,54 @@ +from django.db import migrations, models + +INDEX_NAME = "find_tenant_scan_fail_new_idx" +PARENT_TABLE = "findings" + + +def create_parent_and_attach(apps, schema_editor): + with schema_editor.connection.cursor() as cursor: + cursor.execute( + f"CREATE INDEX {INDEX_NAME} ON ONLY {PARENT_TABLE} " + f"USING btree (tenant_id, scan_id) " + f"WHERE status = 'FAIL' AND delta = 'new'" + ) + cursor.execute( + "SELECT inhrelid::regclass::text " + "FROM pg_inherits " + "WHERE inhparent = %s::regclass", + [PARENT_TABLE], + ) + for (partition,) in cursor.fetchall(): + child_idx = f"{partition.replace('.', '_')}_{INDEX_NAME}" + cursor.execute(f"ALTER INDEX {INDEX_NAME} ATTACH PARTITION {child_idx}") + + +def drop_parent_index(apps, schema_editor): + with schema_editor.connection.cursor() as cursor: + cursor.execute(f"DROP INDEX IF EXISTS {INDEX_NAME}") + + +class Migration(migrations.Migration): + dependencies = [ + ("api", "0073_findings_fail_new_index_partitions"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AddIndex( + model_name="finding", + index=models.Index( + condition=models.Q(status="FAIL", delta="new"), + fields=["tenant_id", "scan_id"], + name=INDEX_NAME, + ), + ), + ], + database_operations=[ + migrations.RunPython( + create_parent_and_attach, + reverse_code=drop_parent_index, + ), + ], + ), + ] diff --git a/api/src/backend/api/models.py b/api/src/backend/api/models.py index 4f7eb7ba6d..fdd52cb8d1 100644 --- a/api/src/backend/api/models.py +++ b/api/src/backend/api/models.py @@ -12,7 +12,6 @@ from cryptography.fernet import Fernet, InvalidToken from django.conf import settings from django.contrib.auth.models import AbstractBaseUser from django.contrib.postgres.fields import ArrayField -from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.search import SearchVector, SearchVectorField from django.contrib.sites.models import Site from django.core.exceptions import ValidationError @@ -741,10 +740,6 @@ class ResourceTag(RowLevelSecurityProtectedModel): class Meta(RowLevelSecurityProtectedModel.Meta): db_table = "resource_tags" - indexes = [ - GinIndex(fields=["text_search"], name="gin_resource_tags_search_idx"), - ] - constraints = [ models.UniqueConstraint( fields=("tenant_id", "key", "value"), @@ -853,7 +848,6 @@ class Resource(RowLevelSecurityProtectedModel): fields=["tenant_id", "service", "region", "type"], name="resource_tenant_metadata_idx", ), - GinIndex(fields=["text_search"], name="gin_resources_search_idx"), models.Index(fields=["tenant_id", "id"], name="resources_tenant_id_idx"), models.Index( fields=["tenant_id", "provider_id"], @@ -1038,23 +1032,19 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel): indexes = [ models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"), - GinIndex(fields=["text_search"], name="gin_findings_search_idx"), models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"), models.Index( fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx" ), models.Index( - fields=["tenant_id", "id"], - condition=Q(delta="new"), - name="find_delta_new_idx", + condition=models.Q(status=StatusChoices.FAIL, delta="new"), + fields=["tenant_id", "scan_id"], + name="find_tenant_scan_fail_new_idx", ), models.Index( fields=["tenant_id", "uid", "-inserted_at"], name="find_tenant_uid_inserted_idx", ), - GinIndex(fields=["resource_services"], name="gin_find_service_idx"), - GinIndex(fields=["resource_regions"], name="gin_find_region_idx"), - GinIndex(fields=["resource_types"], name="gin_find_rtype_idx"), models.Index( fields=["tenant_id", "scan_id", "check_id"], name="find_tenant_scan_check_idx", @@ -1122,10 +1112,6 @@ class ResourceFindingMapping(PostgresPartitionedModel, RowLevelSecurityProtected # - id indexes = [ - models.Index( - fields=["tenant_id", "finding_id"], - name="rfm_tenant_finding_idx", - ), models.Index( fields=["tenant_id", "resource_id"], name="rfm_tenant_resource_idx", @@ -1442,14 +1428,6 @@ class ComplianceOverview(RowLevelSecurityProtectedModel): statements=["SELECT", "INSERT", "DELETE"], ), ] - indexes = [ - models.Index(fields=["compliance_id"], name="comp_ov_cp_id_idx"), - models.Index(fields=["requirements_failed"], name="comp_ov_req_fail_idx"), - models.Index( - fields=["compliance_id", "requirements_failed"], - name="comp_ov_cp_id_req_fail_idx", - ), - ] class JSONAPIMeta: resource_name = "compliance-overviews" @@ -1615,10 +1593,6 @@ class ScanSummary(RowLevelSecurityProtectedModel): fields=["tenant_id", "scan_id"], name="scan_summaries_tenant_scan_idx", ), - models.Index( - fields=["tenant_id", "scan_id", "service"], - name="ss_tenant_scan_service_idx", - ), models.Index( fields=["tenant_id", "scan_id", "severity"], name="ss_tenant_scan_severity_idx", @@ -2033,7 +2007,7 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel): class ResourceScanSummary(RowLevelSecurityProtectedModel): scan_id = models.UUIDField(default=uuid7, db_index=True) - resource_id = models.UUIDField(default=uuid4, db_index=True) + resource_id = models.UUIDField(default=uuid4) service = models.CharField(max_length=100) region = models.CharField(max_length=100) resource_type = models.CharField(max_length=100)