feat(api): remove unused database indexes and improve new failed findings index (#9904)

This commit is contained in:
Víctor Fernández Poyatos
2026-01-28 12:35:36 +01:00
committed by GitHub
parent 5407f3c68e
commit 8783e963d3
7 changed files with 224 additions and 31 deletions
+2
View File
@@ -9,10 +9,12 @@ All notable changes to the **Prowler API** are documented in this file.
- Attack Paths: Bedrock Code Interpreter and AttachRolePolicy privilege escalation queries [(#9885)](https://github.com/prowler-cloud/prowler/pull/9885)
- Added memory optimizations for large compliance report generation [(#9444)](https://github.com/prowler-cloud/prowler/pull/9444)
- `GET /api/v1/resources/{id}/events` endpoint to retrieve AWS resource modification history from CloudTrail [(#9101)](https://github.com/prowler-cloud/prowler/pull/9101)
- Partial index on findings to speed up new failed findings queries [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904)
### 🔄 Changed
- Lazy-load providers and compliance data to reduce API/worker startup memory and time [(#9857)](https://github.com/prowler-cloud/prowler/pull/9857)
- Remove unused indexes [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904)
---
+1 -1
View File
@@ -450,7 +450,7 @@ def create_index_on_partitions(
all_partitions=True
)
"""
with connection.cursor() as cursor:
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
@@ -0,0 +1,41 @@
from django.db import migrations
class Migration(migrations.Migration):
"""
Drop unused indexes on partitioned tables (findings, resource_finding_mappings).
NOTE: RemoveIndexConcurrently cannot be used on partitioned tables in PostgreSQL.
Standard RemoveIndex drops the parent index, which cascades to all partitions.
"""
dependencies = [
("api", "0070_attack_paths_scan"),
]
operations = [
migrations.RemoveIndex(
model_name="finding",
name="gin_findings_search_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="gin_find_service_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="gin_find_region_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="gin_find_rtype_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="find_delta_new_idx",
),
migrations.RemoveIndex(
model_name="resourcefindingmapping",
name="rfm_tenant_finding_idx",
),
]
@@ -0,0 +1,91 @@
"""
Drop unused indexes on non-partitioned tables.
These tables are not partitioned, so RemoveIndexConcurrently can be used safely.
"""
from uuid import uuid4
from django.contrib.postgres.operations import RemoveIndexConcurrently
from django.db import migrations, models
def drop_resource_scan_summary_resource_id_index(apps, schema_editor):
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
SELECT idx_ns.nspname, idx.relname
FROM pg_class tbl
JOIN pg_namespace tbl_ns ON tbl_ns.oid = tbl.relnamespace
JOIN pg_index i ON i.indrelid = tbl.oid
JOIN pg_class idx ON idx.oid = i.indexrelid
JOIN pg_namespace idx_ns ON idx_ns.oid = idx.relnamespace
JOIN pg_attribute a
ON a.attrelid = tbl.oid
AND a.attnum = (i.indkey::int[])[0]
WHERE tbl_ns.nspname = ANY (current_schemas(false))
AND tbl.relname = %s
AND i.indnatts = 1
AND a.attname = %s
""",
["resource_scan_summaries", "resource_id"],
)
row = cursor.fetchone()
if not row:
return
schema_name, index_name = row
quote_name = schema_editor.connection.ops.quote_name
qualified_name = f"{quote_name(schema_name)}.{quote_name(index_name)}"
schema_editor.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {qualified_name};")
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0071_drop_partitioned_indexes"),
]
operations = [
RemoveIndexConcurrently(
model_name="resource",
name="gin_resources_search_idx",
),
RemoveIndexConcurrently(
model_name="resourcetag",
name="gin_resource_tags_search_idx",
),
RemoveIndexConcurrently(
model_name="scansummary",
name="ss_tenant_scan_service_idx",
),
RemoveIndexConcurrently(
model_name="complianceoverview",
name="comp_ov_cp_id_idx",
),
RemoveIndexConcurrently(
model_name="complianceoverview",
name="comp_ov_req_fail_idx",
),
RemoveIndexConcurrently(
model_name="complianceoverview",
name="comp_ov_cp_id_req_fail_idx",
),
migrations.SeparateDatabaseAndState(
database_operations=[
migrations.RunPython(
drop_resource_scan_summary_resource_id_index,
reverse_code=migrations.RunPython.noop,
),
],
state_operations=[
migrations.AlterField(
model_name="resourcescansummary",
name="resource_id",
field=models.UUIDField(default=uuid4),
),
],
),
]
@@ -0,0 +1,31 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0072_drop_unused_indexes"),
]
operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_fail_new_idx",
columns="tenant_id, scan_id",
where="status = 'FAIL' AND delta = 'new'",
all_partitions=True,
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_fail_new_idx",
),
)
]
@@ -0,0 +1,54 @@
from django.db import migrations, models
INDEX_NAME = "find_tenant_scan_fail_new_idx"
PARENT_TABLE = "findings"
def create_parent_and_attach(apps, schema_editor):
with schema_editor.connection.cursor() as cursor:
cursor.execute(
f"CREATE INDEX {INDEX_NAME} ON ONLY {PARENT_TABLE} "
f"USING btree (tenant_id, scan_id) "
f"WHERE status = 'FAIL' AND delta = 'new'"
)
cursor.execute(
"SELECT inhrelid::regclass::text "
"FROM pg_inherits "
"WHERE inhparent = %s::regclass",
[PARENT_TABLE],
)
for (partition,) in cursor.fetchall():
child_idx = f"{partition.replace('.', '_')}_{INDEX_NAME}"
cursor.execute(f"ALTER INDEX {INDEX_NAME} ATTACH PARTITION {child_idx}")
def drop_parent_index(apps, schema_editor):
with schema_editor.connection.cursor() as cursor:
cursor.execute(f"DROP INDEX IF EXISTS {INDEX_NAME}")
class Migration(migrations.Migration):
dependencies = [
("api", "0073_findings_fail_new_index_partitions"),
]
operations = [
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddIndex(
model_name="finding",
index=models.Index(
condition=models.Q(status="FAIL", delta="new"),
fields=["tenant_id", "scan_id"],
name=INDEX_NAME,
),
),
],
database_operations=[
migrations.RunPython(
create_parent_and_attach,
reverse_code=drop_parent_index,
),
],
),
]
+4 -30
View File
@@ -12,7 +12,6 @@ from cryptography.fernet import Fernet, InvalidToken
from django.conf import settings
from django.contrib.auth.models import AbstractBaseUser
from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVector, SearchVectorField
from django.contrib.sites.models import Site
from django.core.exceptions import ValidationError
@@ -741,10 +740,6 @@ class ResourceTag(RowLevelSecurityProtectedModel):
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "resource_tags"
indexes = [
GinIndex(fields=["text_search"], name="gin_resource_tags_search_idx"),
]
constraints = [
models.UniqueConstraint(
fields=("tenant_id", "key", "value"),
@@ -853,7 +848,6 @@ class Resource(RowLevelSecurityProtectedModel):
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
models.Index(fields=["tenant_id", "id"], name="resources_tenant_id_idx"),
models.Index(
fields=["tenant_id", "provider_id"],
@@ -1038,23 +1032,19 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
indexes = [
models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
models.Index(
fields=["tenant_id", "id"],
condition=Q(delta="new"),
name="find_delta_new_idx",
condition=models.Q(status=StatusChoices.FAIL, delta="new"),
fields=["tenant_id", "scan_id"],
name="find_tenant_scan_fail_new_idx",
),
models.Index(
fields=["tenant_id", "uid", "-inserted_at"],
name="find_tenant_uid_inserted_idx",
),
GinIndex(fields=["resource_services"], name="gin_find_service_idx"),
GinIndex(fields=["resource_regions"], name="gin_find_region_idx"),
GinIndex(fields=["resource_types"], name="gin_find_rtype_idx"),
models.Index(
fields=["tenant_id", "scan_id", "check_id"],
name="find_tenant_scan_check_idx",
@@ -1122,10 +1112,6 @@ class ResourceFindingMapping(PostgresPartitionedModel, RowLevelSecurityProtected
# - id
indexes = [
models.Index(
fields=["tenant_id", "finding_id"],
name="rfm_tenant_finding_idx",
),
models.Index(
fields=["tenant_id", "resource_id"],
name="rfm_tenant_resource_idx",
@@ -1442,14 +1428,6 @@ class ComplianceOverview(RowLevelSecurityProtectedModel):
statements=["SELECT", "INSERT", "DELETE"],
),
]
indexes = [
models.Index(fields=["compliance_id"], name="comp_ov_cp_id_idx"),
models.Index(fields=["requirements_failed"], name="comp_ov_req_fail_idx"),
models.Index(
fields=["compliance_id", "requirements_failed"],
name="comp_ov_cp_id_req_fail_idx",
),
]
class JSONAPIMeta:
resource_name = "compliance-overviews"
@@ -1615,10 +1593,6 @@ class ScanSummary(RowLevelSecurityProtectedModel):
fields=["tenant_id", "scan_id"],
name="scan_summaries_tenant_scan_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "service"],
name="ss_tenant_scan_service_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "severity"],
name="ss_tenant_scan_severity_idx",
@@ -2033,7 +2007,7 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
class ResourceScanSummary(RowLevelSecurityProtectedModel):
scan_id = models.UUIDField(default=uuid7, db_index=True)
resource_id = models.UUIDField(default=uuid4, db_index=True)
resource_id = models.UUIDField(default=uuid4)
service = models.CharField(max_length=100)
region = models.CharField(max_length=100)
resource_type = models.CharField(max_length=100)