Compare commits

...

6 Commits

Author SHA1 Message Date
Adrián Jesús Peña Rodríguez ffc16e1021 Merge branch 'master' into fix/finding-groups-aggregated-filters 2026-03-24 12:38:25 +01:00
Adrián Jesús Peña Rodríguez c99a203517 fix(api): use hybrid aggregation path for finding-group filters
- Aggregate from findings when finding-level filters are present (region, delta, etc.)
- Keep summary path for compatible filters to preserve performance
- Apply status/severity filters post-aggregation in both paths
- Fix sort by time fields mapping to aggregated annotation names
- Use DenseRank instead of RowNumber to keep all findings per latest scan
- Expose muted_reason in finding-group resource endpoints
- Add tests for combined filters, validation errors, and time-field sorting
- Deduplicate list/latest actions via shared helpers
- Consolidate check_title icontains filter in base class
2026-03-24 12:34:49 +01:00
Adrián Jesús Peña Rodríguez ce9e1af176 Merge branch 'master' into fix/finding-groups-aggregated-filters 2026-03-23 17:03:08 +01:00
Adrián Jesús Peña Rodríguez cdfc52732c chore: ruff format 2026-03-23 16:27:54 +01:00
Adrián Jesús Peña Rodríguez 373f6e8ca0 chore(api): update changelog for finding-group filters 2026-03-23 16:23:23 +01:00
Adrián Jesús Peña Rodríguez efe14dfa7d fix(api): support finding-group aggregated filters
- Apply status and severity filters on aggregated finding-group results

- Prefilter summary groups by finding-level filter matches for advanced dimensions

- Add coverage for latest/list filters and keep check_title sorting support
2026-03-23 16:20:51 +01:00
5 changed files with 799 additions and 129 deletions
+6
View File
@@ -2,6 +2,12 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.24.0] (Prowler UNRELEASED)
### 🐞 Fixed
- Finding groups list/latest now apply computed status/severity filters and finding-level prefilters (delta, region, service, category, resource group, scan, resource type), plus `check_title` support for sort/filter consistency [(#10428)](https://github.com/prowler-cloud/prowler/pull/10428)
## [1.23.0] (Prowler v5.22.0)
### 🚀 Added
+101
View File
@@ -15,6 +15,7 @@ from django_filters.rest_framework import (
from rest_framework_json_api.django_filters.backends import DjangoFilterBackend
from rest_framework_json_api.serializers import ValidationError
from api.constants import SEVERITY_ORDER
from api.db_utils import (
FindingDeltaEnumField,
InvitationStateEnumField,
@@ -264,6 +265,13 @@ class CommonFindingFilters(FilterSet):
)
return queryset.filter(overall_query).distinct()
def filter_check_title_icontains(self, queryset, name, value):
return queryset.filter(
Q(check_metadata__CheckTitle__icontains=value)
| Q(check_metadata__checktitle__icontains=value)
| Q(check_metadata__Checktitle__icontains=value)
)
class TenantFilter(FilterSet):
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
@@ -803,11 +811,15 @@ class FindingGroupFilter(CommonFindingFilters):
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
check_title__icontains = CharFilter(method="filter_check_title_icontains")
scan = UUIDFilter(field_name="scan_id", lookup_expr="exact")
scan__in = UUIDInFilter(field_name="scan_id", lookup_expr="in")
class Meta:
model = Finding
fields = {
"check_id": ["exact", "in", "icontains"],
"scan": ["exact", "in"],
}
def filter_queryset(self, queryset):
@@ -895,11 +907,15 @@ class LatestFindingGroupFilter(CommonFindingFilters):
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
check_title__icontains = CharFilter(method="filter_check_title_icontains")
scan = UUIDFilter(field_name="scan_id", lookup_expr="exact")
scan__in = UUIDInFilter(field_name="scan_id", lookup_expr="in")
class Meta:
model = Finding
fields = {
"check_id": ["exact", "in", "icontains"],
"scan": ["exact", "in"],
}
@@ -1048,6 +1064,91 @@ class LatestFindingGroupSummaryFilter(FilterSet):
}
class FindingGroupAggregatedComputedFilter(FilterSet):
"""Filter aggregated finding-group rows by computed status/severity."""
STATUS_CHOICES = (
("FAIL", "Fail"),
("PASS", "Pass"),
("MUTED", "Muted"),
)
status = ChoiceFilter(method="filter_status", choices=STATUS_CHOICES)
status__in = CharInFilter(method="filter_status_in", lookup_expr="in")
severity = ChoiceFilter(method="filter_severity", choices=SeverityChoices)
severity__in = CharInFilter(method="filter_severity_in", lookup_expr="in")
def filter_status(self, queryset, name, value):
return queryset.filter(aggregated_status=value)
def filter_status_in(self, queryset, name, value):
values = value
if isinstance(value, str):
values = [part.strip() for part in value.split(",") if part.strip()]
allowed = {choice[0] for choice in self.STATUS_CHOICES}
invalid = [
status_value for status_value in values if status_value not in allowed
]
if invalid:
raise ValidationError(
[
{
"detail": f"invalid status filter: {invalid[0]}",
"status": "400",
"source": {"pointer": "/data"},
"code": "invalid",
}
]
)
if not values:
return queryset
return queryset.filter(aggregated_status__in=values)
def filter_severity(self, queryset, name, value):
severity_order = SEVERITY_ORDER.get(value)
if severity_order is None:
raise ValidationError(
[
{
"detail": f"invalid severity filter: {value}",
"status": "400",
"source": {"pointer": "/data"},
"code": "invalid",
}
]
)
return queryset.filter(severity_order=severity_order)
def filter_severity_in(self, queryset, name, value):
values = value
if isinstance(value, str):
values = [part.strip() for part in value.split(",") if part.strip()]
orders = []
for severity_value in values:
severity_order = SEVERITY_ORDER.get(severity_value)
if severity_order is None:
raise ValidationError(
[
{
"detail": f"invalid severity filter: {severity_value}",
"status": "400",
"source": {"pointer": "/data"},
"code": "invalid",
}
]
)
orders.append(severity_order)
if not orders:
return queryset
return queryset.filter(severity_order__in=orders)
class ProviderSecretFilter(FilterSet):
inserted_at = DateFilter(
field_name="inserted_at",
+468 -22
View File
@@ -45,7 +45,6 @@ from api.models import (
ComplianceRequirementOverview,
DailySeveritySummary,
Finding,
FindingGroupDailySummary,
Integration,
Invitation,
LighthouseProviderConfiguration,
@@ -15217,6 +15216,29 @@ class TestFindingGroupViewSet:
# ec2_instance_public_ip has 1 PASS and 1 FAIL, should aggregate to FAIL
assert data[0]["attributes"]["status"] == "FAIL"
def test_finding_groups_region_filter_reaggregates_metrics(
self, authenticated_client, finding_groups_fixture
):
"""Test finding-level filters recompute group metrics from matching findings."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "ec2_instance_public_ip",
"filter[region]": "us-east-1",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
assert attrs["status"] == "PASS"
assert attrs["pass_count"] == 1
assert attrs["fail_count"] == 0
assert attrs["resources_total"] == 1
assert attrs["resources_fail"] == 0
def test_finding_groups_status_pass_when_no_fail(
self, authenticated_client, finding_groups_fixture
):
@@ -15245,6 +15267,182 @@ class TestFindingGroupViewSet:
# rds_encryption has all muted findings
assert data[0]["attributes"]["status"] == "MUTED"
def test_finding_groups_status_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test finding groups can be filtered by aggregated status."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[status]": "FAIL"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["status"] == "FAIL" for item in data)
def test_finding_groups_status_in_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test finding groups support status__in filter on aggregated status."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[status__in]": "FAIL,PASS"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["status"] in {"FAIL", "PASS"} for item in data)
def test_finding_groups_severity_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test finding groups can be filtered by aggregated severity."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[severity]": "critical"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["severity"] == "critical" for item in data)
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
)
def test_finding_groups_combined_region_and_status_filters(
self, authenticated_client, finding_groups_fixture, endpoint_name
):
"""Test combined region + aggregated status filters."""
params = {"filter[region]": "us-east-1", "filter[status]": "FAIL"}
if endpoint_name == "finding-group-list":
params["filter[inserted_at]"] = TODAY
response = authenticated_client.get(reverse(endpoint_name), params)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
check_ids = {item["id"] for item in data}
assert check_ids == {"s3_bucket_public_access", "cloudtrail_enabled"}
assert all(item["attributes"]["status"] == "FAIL" for item in data)
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
)
def test_finding_groups_combined_delta_and_severity_filters(
self, authenticated_client, finding_groups_fixture, endpoint_name
):
"""Test combined delta + aggregated severity filters."""
params = {"filter[delta]": "new", "filter[severity]": "critical"}
if endpoint_name == "finding-group-list":
params["filter[inserted_at]"] = TODAY
response = authenticated_client.get(reverse(endpoint_name), params)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
check_ids = {item["id"] for item in data}
assert check_ids == {"s3_bucket_public_access", "cloudtrail_enabled"}
assert all(item["attributes"]["severity"] == "critical" for item in data)
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
)
@pytest.mark.parametrize(
"filter_key,filter_value",
[
("status", "INVALID_STATUS"),
("severity", "INVALID_SEVERITY"),
],
)
def test_finding_groups_invalid_status_or_severity_returns_400(
self,
authenticated_client,
finding_groups_fixture,
endpoint_name,
filter_key,
filter_value,
):
"""Test invalid aggregated status/severity values are rejected."""
params = {f"filter[{filter_key}]": filter_value}
if endpoint_name == "finding-group-list":
params["filter[inserted_at]"] = TODAY
response = authenticated_client.get(reverse(endpoint_name), params)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json()["errors"][0]["code"] == "invalid"
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
)
@pytest.mark.parametrize(
"filter_key,filter_value,expected_detail",
[
("status__in", "FAIL,INVALID_STATUS", "invalid status filter"),
("severity__in", "critical,INVALID_SEVERITY", "invalid severity filter"),
],
)
def test_finding_groups_invalid_in_filters_return_400(
self,
authenticated_client,
finding_groups_fixture,
endpoint_name,
filter_key,
filter_value,
expected_detail,
):
"""Test invalid values in status__in/severity__in are rejected."""
params = {f"filter[{filter_key}]": filter_value}
if endpoint_name == "finding-group-list":
params["filter[inserted_at]"] = TODAY
response = authenticated_client.get(reverse(endpoint_name), params)
assert response.status_code == status.HTTP_400_BAD_REQUEST
errors = response.json()["errors"]
assert errors[0]["code"] == "invalid"
assert expected_detail in errors[0]["detail"]
@pytest.mark.parametrize(
"filter_name,filter_value",
[
("region", "__region_does_not_exist__"),
("service", "__service_does_not_exist__"),
("category", "__category_does_not_exist__"),
("resource_groups", "__group_does_not_exist__"),
("resource_type", "__type_does_not_exist__"),
("scan", "00000000-0000-7000-8000-000000000001"),
],
)
def test_finding_groups_finding_level_filters_are_applied(
self,
authenticated_client,
finding_groups_fixture,
filter_name,
filter_value,
):
"""Test finding-level filters are applied in /finding-groups aggregation."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, f"filter[{filter_name}]": filter_value},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 0
def test_finding_groups_delta_filter_is_applied(
self, authenticated_client, finding_groups_fixture
):
"""Test delta filter is applied in /finding-groups aggregation."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[delta]": "new"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["new_count"] > 0 for item in data)
def test_finding_groups_provider_aggregation(
self, authenticated_client, finding_groups_fixture
):
@@ -15853,47 +16051,257 @@ class TestFindingGroupViewSet:
assert len(data) == 1
assert data[0]["id"] == "cloudtrail_enabled"
def test_finding_groups_latest_aggregates_latest_per_provider(
self, authenticated_client, providers_fixture
def test_finding_groups_latest_status_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest aggregates latest summary from each provider for the same check."""
"""Test /latest supports status filter on aggregated status."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[status]": "FAIL"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["status"] == "FAIL" for item in data)
def test_finding_groups_latest_region_filter_reaggregates_metrics(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest recomputes metrics from findings matching region filter."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{
"filter[check_id]": "ec2_instance_public_ip",
"filter[region]": "us-east-1",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
assert attrs["status"] == "PASS"
assert attrs["pass_count"] == 1
assert attrs["fail_count"] == 0
assert attrs["resources_total"] == 1
assert attrs["resources_fail"] == 0
def test_finding_groups_latest_status_in_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest supports status__in filter on aggregated status."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[status__in]": "FAIL,PASS"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["status"] in {"FAIL", "PASS"} for item in data)
def test_finding_groups_latest_severity_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest supports severity filter on aggregated severity."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[severity]": "critical"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["severity"] == "critical" for item in data)
@pytest.mark.parametrize(
"filter_name,filter_value",
[
("region", "__region_does_not_exist__"),
("service", "__service_does_not_exist__"),
("category", "__category_does_not_exist__"),
("resource_groups", "__group_does_not_exist__"),
("resource_type", "__type_does_not_exist__"),
("scan", "00000000-0000-7000-8000-000000000001"),
],
)
def test_finding_groups_latest_finding_level_filters_are_applied(
self,
authenticated_client,
finding_groups_fixture,
filter_name,
filter_value,
):
"""Test finding-level filters are applied in /finding-groups/latest aggregation."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{f"filter[{filter_name}]": filter_value},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 0
def test_finding_groups_check_title_filter_applies_with_delta(
self, authenticated_client, finding_groups_fixture
):
"""Test check_title filter is honored when finding-level path is used."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[delta]": "new",
"filter[check_title.icontains]": "__missing_check_title__",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 0
def test_finding_groups_latest_check_title_filter_applies_with_delta(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest check_title filter is honored on finding-level path."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{
"filter[delta]": "new",
"filter[check_title.icontains]": "__missing_check_title__",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 0
def test_finding_groups_latest_delta_filter_is_applied(
self, authenticated_client, finding_groups_fixture
):
"""Test delta filter is applied in /finding-groups/latest aggregation."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[delta]": "new"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
assert all(item["attributes"]["new_count"] > 0 for item in data)
def test_finding_groups_latest_aggregates_latest_per_provider(
self,
authenticated_client,
providers_fixture,
resources_fixture,
):
"""Test /latest keeps all findings from the latest scan per provider.
Verifies that when the latest scan produces multiple findings for the
same check_id (e.g. one per resource), all of them are included in the
aggregation not just one.
"""
provider1 = providers_fixture[0]
provider2 = providers_fixture[1]
resource1 = resources_fixture[0]
resource2 = resources_fixture[1]
resource3 = resources_fixture[2]
check_id = "cross_provider_latest_resources_total"
now = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0)
FindingGroupDailySummary.objects.create(
latest_scan_provider1 = Scan.objects.create(
tenant_id=provider1.tenant_id,
provider=provider1,
check_id=check_id,
inserted_at=now - timedelta(days=1),
resources_total=20,
resources_fail=20,
fail_count=20,
state=StateChoices.COMPLETED,
trigger=Scan.TriggerChoices.MANUAL,
completed_at=datetime.now(timezone.utc),
)
FindingGroupDailySummary.objects.create(
latest_scan_provider2 = Scan.objects.create(
tenant_id=provider2.tenant_id,
provider=provider2,
check_id=check_id,
inserted_at=now,
resources_total=7,
resources_fail=7,
fail_count=7,
state=StateChoices.COMPLETED,
trigger=Scan.TriggerChoices.MANUAL,
completed_at=datetime.now(timezone.utc),
)
older_scan_provider1 = Scan.objects.create(
tenant_id=provider1.tenant_id,
provider=provider1,
state=StateChoices.COMPLETED,
trigger=Scan.TriggerChoices.MANUAL,
completed_at=datetime.now(timezone.utc) - timedelta(days=1),
)
# Older scan — these should be excluded from /latest
Finding.objects.create(
tenant_id=provider1.tenant_id,
uid="old_cross_provider_1",
scan=older_scan_provider1,
delta="new",
status="FAIL",
severity="high",
impact="high",
check_id=check_id,
check_metadata={"CheckId": check_id, "checktitle": "Cross provider check"},
first_seen_at=datetime.now(timezone.utc) - timedelta(days=2),
muted=False,
)
# Latest scan provider1: TWO findings (PASS + FAIL) for the same check
latest_p1_pass = Finding.objects.create(
tenant_id=provider1.tenant_id,
uid="latest_cross_provider_1_pass",
scan=latest_scan_provider1,
delta="new",
status="PASS",
severity="high",
impact="high",
check_id=check_id,
check_metadata={"CheckId": check_id, "checktitle": "Cross provider check"},
first_seen_at=datetime.now(timezone.utc) - timedelta(hours=1),
muted=False,
)
latest_p1_pass.add_resources([resource1])
latest_p1_fail = Finding.objects.create(
tenant_id=provider1.tenant_id,
uid="latest_cross_provider_1_fail",
scan=latest_scan_provider1,
delta="new",
status="FAIL",
severity="high",
impact="high",
check_id=check_id,
check_metadata={"CheckId": check_id, "checktitle": "Cross provider check"},
first_seen_at=datetime.now(timezone.utc) - timedelta(hours=1),
muted=False,
)
latest_p1_fail.add_resources([resource2])
# Latest scan provider2: one finding
latest_p2 = Finding.objects.create(
tenant_id=provider2.tenant_id,
uid="latest_cross_provider_2",
scan=latest_scan_provider2,
delta="new",
status="FAIL",
severity="high",
impact="high",
check_id=check_id,
check_metadata={"CheckId": check_id, "checktitle": "Cross provider check"},
first_seen_at=datetime.now(timezone.utc) - timedelta(hours=1),
muted=False,
)
latest_p2.add_resources([resource3])
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[check_id]": check_id},
{"filter[check_id]": check_id, "filter[delta]": "new"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
assert attrs["resources_total"] == 27
assert attrs["resources_fail"] == 27
assert attrs["fail_count"] == 27
# 3 findings total: 2 from provider1 latest + 1 from provider2 latest
assert attrs["pass_count"] == 1
assert attrs["fail_count"] == 2
assert attrs["resources_total"] == 3
assert attrs["resources_fail"] == 2
def test_finding_groups_latest_provider_type_filter(
self, authenticated_client, finding_groups_fixture
@@ -15934,6 +16342,44 @@ class TestFindingGroupViewSet:
check_ids = [item["id"] for item in data]
assert check_ids == sorted(check_ids)
def test_finding_groups_latest_sort_by_check_title(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest supports sorting by check_title."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"sort": "check_title"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
check_titles = [item["attributes"]["check_title"] for item in data]
assert check_titles == sorted(check_titles)
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
)
@pytest.mark.parametrize(
"sort_field",
["first_seen_at", "-first_seen_at", "last_seen_at", "failing_since"],
)
def test_finding_groups_sort_by_time_fields(
self,
authenticated_client,
finding_groups_fixture,
endpoint_name,
sort_field,
):
"""Test sorting by aggregated time fields (first_seen_at, last_seen_at, failing_since)."""
params = {"sort": sort_field}
if endpoint_name == "finding-group-list":
params["filter[inserted_at]"] = TODAY
response = authenticated_client.get(reverse(endpoint_name), params)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
def test_finding_groups_latest_ignores_date_filters(
self, authenticated_client, finding_groups_fixture
):
+1
View File
@@ -4180,6 +4180,7 @@ class FindingGroupResourceSerializer(BaseSerializerV1):
severity = serializers.CharField()
first_seen_at = serializers.DateTimeField(required=False, allow_null=True)
last_seen_at = serializers.DateTimeField(required=False, allow_null=True)
muted_reason = serializers.CharField(required=False, allow_null=True)
class JSONAPIMeta:
resource_name = "finding-group-resources"
+223 -107
View File
@@ -31,6 +31,7 @@ from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import (
Case,
CharField,
Count,
DecimalField,
ExpressionWrapper,
@@ -47,7 +48,7 @@ from django.db.models import (
When,
Window,
)
from django.db.models.functions import Coalesce, RowNumber
from django.db.models.functions import Cast, Coalesce, DenseRank, RowNumber
from django.http import HttpResponse, QueryDict
from django.shortcuts import redirect
from django.urls import reverse
@@ -124,6 +125,7 @@ from api.filters import (
CustomDjangoFilterBackend,
DailySeveritySummaryFilter,
FindingFilter,
FindingGroupAggregatedComputedFilter,
FindingGroupFilter,
FindingGroupSummaryFilter,
IntegrationFilter,
@@ -6786,13 +6788,13 @@ class FindingGroupViewSet(BaseRLSViewSet):
security analysts to see which checks are failing across their
infrastructure without scrolling through thousands of individual findings.
Uses pre-aggregated FindingGroupDailySummary table for efficient queries.
Daily summaries are re-aggregated across the requested date range.
Uses a hybrid strategy: pre-aggregated daily summaries when possible,
and raw findings when finding-level filters require precise subset metrics.
"""
queryset = FindingGroupDailySummary.objects.all()
serializer_class = FindingGroupSerializer
filterset_class = FindingGroupSummaryFilter
filterset_class = FindingGroupFilter
filter_backends = [
jsonapi_filters.QueryParameterValidationFilter,
jsonapi_filters.OrderingFilter,
@@ -6811,12 +6813,12 @@ class FindingGroupViewSet(BaseRLSViewSet):
affects the OpenAPI schema generated by drf-spectacular.
"""
if self.action == "latest":
return LatestFindingGroupSummaryFilter
return LatestFindingGroupFilter
if self.action == "resources":
return FindingGroupFilter
if self.action == "latest_resources":
return LatestFindingGroupFilter
return FindingGroupSummaryFilter
return FindingGroupFilter
def get_queryset(self):
"""Get the base FindingGroupDailySummary queryset with RLS filtering."""
@@ -6923,20 +6925,27 @@ class FindingGroupViewSet(BaseRLSViewSet):
return filterset.qs.values("id")
def _get_finding_level_filter_keys(self, latest: bool = False) -> set[str]:
"""Derive filters that require querying raw findings."""
summary_filterset = (
LatestFindingGroupSummaryFilter if latest else FindingGroupSummaryFilter
)
finding_filterset = LatestFindingGroupFilter if latest else FindingGroupFilter
summary_supported = set(summary_filterset.base_filters.keys())
finding_supported = set(finding_filterset.base_filters.keys())
return finding_supported - summary_supported
def _requires_finding_level_aggregation(
self, params: QueryDict, latest: bool = False
) -> bool:
finding_level_keys = self._get_finding_level_filter_keys(latest=latest)
return any(key in finding_level_keys for key in params.keys())
def _aggregate_daily_summaries(self, queryset):
"""
Re-aggregate daily summaries across the date range.
Takes pre-computed daily summaries and aggregates them by check_id
to produce totals across the selected date range.
"""
from django.db.models import CharField
from django.db.models.functions import Cast
"""Re-aggregate summary rows by check_id."""
return queryset.values("check_id").annotate(
# Max severity across days
severity_order=Max("severity_order"),
# Sum counts across days
pass_count=Sum("pass_count"),
fail_count=Sum("fail_count"),
muted_count=Sum("muted_count"),
@@ -6944,22 +6953,99 @@ class FindingGroupViewSet(BaseRLSViewSet):
changed_count=Sum("changed_count"),
resources_total=Sum("resources_total"),
resources_fail=Sum("resources_fail"),
# Collect provider types using StringAgg (cast enum to text first)
impacted_providers_str=StringAgg(
Cast("provider__provider", CharField()),
delimiter=",",
distinct=True,
default="",
),
# Min/Max timing across days
first_seen_at=Min("first_seen_at"),
last_seen_at=Max("last_seen_at"),
failing_since=Min("failing_since"),
# Get check metadata from first row (same for all days)
agg_first_seen_at=Min("first_seen_at"),
agg_last_seen_at=Max("last_seen_at"),
agg_failing_since=Min("failing_since"),
check_title=Max("check_title"),
check_description=Max("check_description"),
)
def _aggregate_findings(self, queryset):
"""Aggregate findings by check_id for finding-group endpoints."""
severity_case = Case(
*[
When(severity=severity, then=Value(order))
for severity, order in SEVERITY_ORDER.items()
],
output_field=IntegerField(),
)
return queryset.values("check_id").annotate(
severity_order=Max(severity_case),
pass_count=Count("id", filter=Q(status="PASS", muted=False)),
fail_count=Count("id", filter=Q(status="FAIL", muted=False)),
muted_count=Count("id", filter=Q(muted=True)),
new_count=Count("id", filter=Q(delta="new", muted=False)),
changed_count=Count("id", filter=Q(delta="changed", muted=False)),
resources_total=Count("resources__id", distinct=True),
resources_fail=Count(
"resources__id",
distinct=True,
filter=Q(status="FAIL", muted=False),
),
impacted_providers_str=StringAgg(
Cast("scan__provider__provider", CharField()),
delimiter=",",
distinct=True,
default="",
),
agg_first_seen_at=Min("first_seen_at"),
agg_last_seen_at=Max("inserted_at"),
agg_failing_since=Min(
"first_seen_at", filter=Q(status="FAIL", muted=False)
),
check_title=Coalesce(
Max(Cast("check_metadata__CheckTitle", CharField())),
Max(Cast("check_metadata__checktitle", CharField())),
Max(Cast("check_metadata__Checktitle", CharField())),
),
check_description=Coalesce(
Max(Cast("check_metadata__Description", CharField())),
Max(Cast("check_metadata__description", CharField())),
),
)
def _split_computed_aggregate_filters(
self, params: QueryDict
) -> tuple[QueryDict, QueryDict]:
"""Split finding filters from computed aggregate filters."""
computed_keys = {"status", "status__in", "severity", "severity__in"}
finding_params = QueryDict(mutable=True)
computed_params = QueryDict(mutable=True)
for key, values in params.lists():
if key in computed_keys:
computed_params.setlist(key, values)
else:
finding_params.setlist(key, values)
return finding_params, computed_params
def _get_latest_findings_per_check_provider(self, filtered_queryset):
"""Keep all findings from the latest scan per (check_id, provider)."""
latest_ids = (
filtered_queryset.annotate(
scan_rank=Window(
expression=DenseRank(),
partition_by=[F("check_id"), F("scan__provider_id")],
order_by=[
F("scan__completed_at").desc(nulls_last=True),
F("scan_id").desc(),
],
)
)
.filter(scan_rank=1)
.values("id")
)
return filtered_queryset.filter(id__in=Subquery(latest_ids))
def _post_process_aggregation(self, aggregated_data):
"""
Post-process aggregation results to add computed fields.
@@ -6976,6 +7062,13 @@ class FindingGroupViewSet(BaseRLSViewSet):
severity_order, "informational"
)
if "agg_first_seen_at" in row:
row["first_seen_at"] = row.pop("agg_first_seen_at")
if "agg_last_seen_at" in row:
row["last_seen_at"] = row.pop("agg_last_seen_at")
if "agg_failing_since" in row:
row["failing_since"] = row.pop("agg_failing_since")
# Compute aggregated status
if row.get("fail_count", 0) > 0:
row["status"] = "FAIL"
@@ -6998,6 +7091,7 @@ class FindingGroupViewSet(BaseRLSViewSet):
"""Validate and map JSON:API sort fields for aggregated finding groups."""
sort_field_map = {
"check_id": "check_id",
"check_title": "check_title",
"severity": "severity_order",
"fail_count": "fail_count",
"pass_count": "pass_count",
@@ -7006,9 +7100,9 @@ class FindingGroupViewSet(BaseRLSViewSet):
"changed_count": "changed_count",
"resources_total": "resources_total",
"resources_fail": "resources_fail",
"first_seen_at": "first_seen_at",
"last_seen_at": "last_seen_at",
"failing_since": "failing_since",
"first_seen_at": "agg_first_seen_at",
"last_seen_at": "agg_last_seen_at",
"failing_since": "agg_failing_since",
}
ordering = []
@@ -7035,6 +7129,29 @@ class FindingGroupViewSet(BaseRLSViewSet):
return ordering
def _apply_aggregated_computed_filters(self, queryset, computed_params: QueryDict):
"""Apply computed filters (status/severity) on aggregated finding-group rows."""
if not computed_params:
return queryset
if computed_params.get("status") or computed_params.getlist("status__in"):
queryset = queryset.annotate(
aggregated_status=Case(
When(fail_count__gt=0, then=Value("FAIL")),
When(pass_count__gt=0, then=Value("PASS")),
default=Value("MUTED"),
output_field=CharField(),
)
)
filterset = FindingGroupAggregatedComputedFilter(
computed_params, queryset=queryset
)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
return filterset.qs
def _build_resource_mapping_queryset(
self, filtered_queryset, resource_ids=None, tenant_id: str | None = None
):
@@ -7107,6 +7224,7 @@ class FindingGroupViewSet(BaseRLSViewSet):
),
first_seen_at=Min("finding__first_seen_at"),
last_seen_at=Max("finding__inserted_at"),
muted_reason=Max("finding__muted_reason"),
)
.filter(resource_id__isnull=False)
.order_by("resource_id")
@@ -7142,56 +7260,90 @@ class FindingGroupViewSet(BaseRLSViewSet):
),
"first_seen_at": row["first_seen_at"],
"last_seen_at": row["last_seen_at"],
"muted_reason": row.get("muted_reason"),
}
)
return results
def _build_aggregated_queryset(self, finding_params, latest=False):
"""Select the summary or findings path and return an aggregated queryset."""
finding_filterset_class = (
LatestFindingGroupFilter if latest else FindingGroupFilter
)
summary_filterset_class = (
LatestFindingGroupSummaryFilter if latest else FindingGroupSummaryFilter
)
if self._requires_finding_level_aggregation(finding_params, latest=latest):
finding_queryset = self._get_finding_queryset()
filterset = finding_filterset_class(
finding_params, queryset=finding_queryset
)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
if latest:
filtered_queryset = self._get_latest_findings_per_check_provider(
filtered_queryset
)
return self._aggregate_findings(filtered_queryset)
summary_queryset = self.get_queryset()
filterset = summary_filterset_class(finding_params, queryset=summary_queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
if latest:
latest_per_check_ids = (
filtered_queryset.order_by("check_id", "provider_id", "-inserted_at")
.distinct("check_id", "provider_id")
.values("id")
)
filtered_queryset = filtered_queryset.filter(
id__in=Subquery(latest_per_check_ids)
)
return self._aggregate_daily_summaries(filtered_queryset)
def _sorted_paginated_response(self, request, aggregated_queryset):
"""Apply ordering, pagination, post-processing, and return the Response."""
sort_param = request.query_params.get("sort")
if sort_param:
ordering = self._validate_sort_fields(sort_param)
if ordering:
aggregated_queryset = aggregated_queryset.order_by(*ordering)
else:
aggregated_queryset = aggregated_queryset.order_by(
"-fail_count", "-severity_order", "check_id"
)
page = self.paginate_queryset(aggregated_queryset)
if page is not None:
processed_data = self._post_process_aggregation(page)
serializer = self.get_serializer(processed_data, many=True)
return self.get_paginated_response(serializer.data)
processed_data = self._post_process_aggregation(aggregated_queryset)
serializer = self.get_serializer(processed_data, many=True)
return Response(serializer.data)
def list(self, request, *args, **kwargs):
"""
List finding groups with aggregation and filtering.
Returns findings grouped by check_id with aggregated metrics.
Requires at least one date filter for performance.
Uses pre-aggregated daily summaries for efficient queries.
Uses summaries when possible and raw findings for finding-level filters.
"""
queryset = self.get_queryset()
# Apply filters
normalized_params = self._normalize_jsonapi_params(request.query_params)
filterset = self.filterset_class(normalized_params, queryset=queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
# Re-aggregate daily summaries across the date range
aggregated_queryset = self._aggregate_daily_summaries(filtered_queryset)
# Apply ordering (respect JSON:API sort param or use default)
sort_param = request.query_params.get("sort")
if sort_param:
# Convert JSON:API sort notation (prefix '-' for descending)
ordering = self._validate_sort_fields(sort_param)
if ordering:
aggregated_queryset = aggregated_queryset.order_by(*ordering)
else:
# Default ordering: failures first, then severity, then check_id
aggregated_queryset = aggregated_queryset.order_by(
"-fail_count", "-severity_order", "check_id"
)
# Paginate
page = self.paginate_queryset(aggregated_queryset)
if page is not None:
# Post-process the page
processed_data = self._post_process_aggregation(page)
serializer = self.get_serializer(processed_data, many=True)
return self.get_paginated_response(serializer.data)
# Post-process all results (no pagination)
processed_data = self._post_process_aggregation(aggregated_queryset)
serializer = self.get_serializer(processed_data, many=True)
return Response(serializer.data)
finding_params, computed_params = self._split_computed_aggregate_filters(
normalized_params
)
aggregated_qs = self._build_aggregated_queryset(finding_params, latest=False)
aggregated_qs = self._apply_aggregated_computed_filters(
aggregated_qs, computed_params
)
return self._sorted_paginated_response(request, aggregated_qs)
@extend_schema(
summary="List latest finding groups",
@@ -7209,58 +7361,22 @@ class FindingGroupViewSet(BaseRLSViewSet):
"""
List the latest finding group state per check_id.
Returns findings grouped by check_id using the latest available
inserted_at date per check_id, without requiring date filters.
Returns findings grouped by check_id using latest data per
(check_id, provider), without requiring date filters.
"""
queryset = self.get_queryset()
# Apply other filters (provider_id, provider_type, check_id, etc.)
normalized_params = self._normalize_jsonapi_params(request.query_params)
# Remove date filters since we're using latest
for key in list(normalized_params.keys()):
if key.startswith("inserted_at"):
del normalized_params[key]
filterset_class = self.get_filterset_class()
filterset = filterset_class(normalized_params, queryset=queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
# Keep only the latest row per (check_id, provider), then aggregate by check_id.
latest_per_check_ids = (
filtered_queryset.order_by("check_id", "provider_id", "-inserted_at")
.distinct("check_id", "provider_id")
.values("id")
finding_params, computed_params = self._split_computed_aggregate_filters(
normalized_params
)
latest_per_check = filtered_queryset.filter(
id__in=Subquery(latest_per_check_ids)
aggregated_qs = self._build_aggregated_queryset(finding_params, latest=True)
aggregated_qs = self._apply_aggregated_computed_filters(
aggregated_qs, computed_params
)
# Re-aggregate daily summaries
aggregated_queryset = self._aggregate_daily_summaries(latest_per_check)
# Apply ordering
sort_param = request.query_params.get("sort")
if sort_param:
ordering = self._validate_sort_fields(sort_param)
if ordering:
aggregated_queryset = aggregated_queryset.order_by(*ordering)
else:
aggregated_queryset = aggregated_queryset.order_by(
"-fail_count", "-severity_order", "check_id"
)
# Paginate
page = self.paginate_queryset(aggregated_queryset)
if page is not None:
processed_data = self._post_process_aggregation(page)
serializer = self.get_serializer(processed_data, many=True)
return self.get_paginated_response(serializer.data)
processed_data = self._post_process_aggregation(aggregated_queryset)
serializer = self.get_serializer(processed_data, many=True)
return Response(serializer.data)
return self._sorted_paginated_response(request, aggregated_qs)
@extend_schema(
summary="List resources for a finding group",