perf(api): replace JOINs with pre-check in threat score aggregation query (#10399)

Co-authored-by: Josema Camacho <josema@prowler.com>
This commit is contained in:
Prowler Bot
2026-03-19 17:43:47 +01:00
committed by GitHub
parent cd9d7a2e95
commit 9ae35029dc
3 changed files with 23 additions and 22 deletions

View File

@@ -2,6 +2,14 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.22.1] (Prowler v5.21.1)
### 🐞 Fixed
- Threat score aggregation query to eliminate unnecessary JOINs and `COUNT(DISTINCT)` overhead [(#10394)](https://github.com/prowler-cloud/prowler/pull/10394)
---
## [1.22.0] (Prowler v5.21.0)
### 🚀 Added

View File

@@ -4,7 +4,7 @@ from django.db.models import Count, Q
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Finding, StatusChoices
from api.models import Finding, Scan, StatusChoices
from prowler.lib.outputs.finding import Finding as FindingOutput
logger = get_task_logger(__name__)
@@ -35,25 +35,26 @@ def _aggregate_requirement_statistics_from_database(
}
"""
requirement_statistics_by_check_id = {}
# TODO: take into account that now the relation is 1 finding == 1 resource, review this when the logic changes
# TODO: review when finding-resource relation changes from 1:1
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
# Pre-check: skip if the scan's provider is deleted (avoids JOINs in the main query)
if Scan.all_objects.filter(id=scan_id, provider__is_deleted=True).exists():
return requirement_statistics_by_check_id
aggregated_statistics_queryset = (
Finding.all_objects.filter(
tenant_id=tenant_id,
scan_id=scan_id,
muted=False,
resources__provider__is_deleted=False,
)
.values("check_id")
.annotate(
total_findings=Count(
"id",
distinct=True,
filter=Q(status__in=[StatusChoices.PASS, StatusChoices.FAIL]),
),
passed_findings=Count(
"id",
distinct=True,
filter=Q(status=StatusChoices.PASS),
),
)

View File

@@ -169,35 +169,27 @@ class TestAggregateRequirementStatistics:
assert result["check_1"]["passed"] == 1
assert result["check_1"]["total"] == 1
def test_excludes_findings_without_resources(self, tenants_fixture, scans_fixture):
"""Verify findings without resources are excluded from aggregation."""
def test_skips_aggregation_for_deleted_provider(
self, tenants_fixture, scans_fixture
):
"""Verify aggregation returns empty when the scan's provider is soft-deleted."""
tenant = tenants_fixture[0]
scan = scans_fixture[0]
# Finding WITH resource → should be counted
self._create_finding_with_resource(
tenant, scan, "finding-1", "check_1", StatusChoices.PASS
)
# Finding WITHOUT resource → should be EXCLUDED
Finding.objects.create(
tenant_id=tenant.id,
scan=scan,
uid="finding-2",
check_id="check_1",
status=StatusChoices.FAIL,
severity=Severity.high,
impact=Severity.high,
check_metadata={},
raw_result={},
)
# Soft-delete the provider
provider = scan.provider
provider.is_deleted = True
provider.save(update_fields=["is_deleted"])
result = _aggregate_requirement_statistics_from_database(
str(tenant.id), str(scan.id)
)
assert result["check_1"]["passed"] == 1
assert result["check_1"]["total"] == 1
assert result == {}
def test_multiple_resources_no_double_count(self, tenants_fixture, scans_fixture):
"""Verify a finding with multiple resources is only counted once."""