diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 7cd4fce52f..68e497386d 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to the **Prowler API** are documented in this file. +## [1.22.1] (Prowler v5.21.1) + +### 🐞 Fixed + +- Threat score aggregation query to eliminate unnecessary JOINs and `COUNT(DISTINCT)` overhead [(#10394)](https://github.com/prowler-cloud/prowler/pull/10394) + +--- + ## [1.22.0] (Prowler v5.21.0) ### 🚀 Added diff --git a/api/src/backend/tasks/jobs/threatscore_utils.py b/api/src/backend/tasks/jobs/threatscore_utils.py index a17125af46..2ef29484ee 100644 --- a/api/src/backend/tasks/jobs/threatscore_utils.py +++ b/api/src/backend/tasks/jobs/threatscore_utils.py @@ -4,7 +4,7 @@ from django.db.models import Count, Q from api.db_router import READ_REPLICA_ALIAS from api.db_utils import rls_transaction -from api.models import Finding, StatusChoices +from api.models import Finding, Scan, StatusChoices from prowler.lib.outputs.finding import Finding as FindingOutput logger = get_task_logger(__name__) @@ -35,25 +35,26 @@ def _aggregate_requirement_statistics_from_database( } """ requirement_statistics_by_check_id = {} - # TODO: take into account that now the relation is 1 finding == 1 resource, review this when the logic changes + # TODO: review when finding-resource relation changes from 1:1 with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS): + # Pre-check: skip if the scan's provider is deleted (avoids JOINs in the main query) + if Scan.all_objects.filter(id=scan_id, provider__is_deleted=True).exists(): + return requirement_statistics_by_check_id + aggregated_statistics_queryset = ( Finding.all_objects.filter( tenant_id=tenant_id, scan_id=scan_id, muted=False, - resources__provider__is_deleted=False, ) .values("check_id") .annotate( total_findings=Count( "id", - distinct=True, filter=Q(status__in=[StatusChoices.PASS, StatusChoices.FAIL]), ), passed_findings=Count( "id", - distinct=True, filter=Q(status=StatusChoices.PASS), ), ) diff --git a/api/src/backend/tasks/tests/test_reports.py b/api/src/backend/tasks/tests/test_reports.py index a6a80b0891..858f4c06ca 100644 --- a/api/src/backend/tasks/tests/test_reports.py +++ b/api/src/backend/tasks/tests/test_reports.py @@ -169,35 +169,27 @@ class TestAggregateRequirementStatistics: assert result["check_1"]["passed"] == 1 assert result["check_1"]["total"] == 1 - def test_excludes_findings_without_resources(self, tenants_fixture, scans_fixture): - """Verify findings without resources are excluded from aggregation.""" + def test_skips_aggregation_for_deleted_provider( + self, tenants_fixture, scans_fixture + ): + """Verify aggregation returns empty when the scan's provider is soft-deleted.""" tenant = tenants_fixture[0] scan = scans_fixture[0] - # Finding WITH resource → should be counted self._create_finding_with_resource( tenant, scan, "finding-1", "check_1", StatusChoices.PASS ) - # Finding WITHOUT resource → should be EXCLUDED - Finding.objects.create( - tenant_id=tenant.id, - scan=scan, - uid="finding-2", - check_id="check_1", - status=StatusChoices.FAIL, - severity=Severity.high, - impact=Severity.high, - check_metadata={}, - raw_result={}, - ) + # Soft-delete the provider + provider = scan.provider + provider.is_deleted = True + provider.save(update_fields=["is_deleted"]) result = _aggregate_requirement_statistics_from_database( str(tenant.id), str(scan.id) ) - assert result["check_1"]["passed"] == 1 - assert result["check_1"]["total"] == 1 + assert result == {} def test_multiple_resources_no_double_count(self, tenants_fixture, scans_fixture): """Verify a finding with multiple resources is only counted once."""