diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index f74662509c..cb01a36de2 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to the **Prowler API** are documented in this file. ### 🚀 Added +- Finding group summaries and resources endpoints for hierarchical findings views [(#9961)](https://github.com/prowler-cloud/prowler/pull/9961) - OpenStack provider support [(#10003)](https://github.com/prowler-cloud/prowler/pull/10003) - PDF report for the CSA CCM compliance framework [(#10088)](https://github.com/prowler-cloud/prowler/pull/10088) diff --git a/api/src/backend/api/constants.py b/api/src/backend/api/constants.py new file mode 100644 index 0000000000..c209de9de6 --- /dev/null +++ b/api/src/backend/api/constants.py @@ -0,0 +1,7 @@ +SEVERITY_ORDER = { + "critical": 5, + "high": 4, + "medium": 3, + "low": 2, + "informational": 1, +} diff --git a/api/src/backend/api/filters.py b/api/src/backend/api/filters.py index bf34950156..a64cc0ea13 100644 --- a/api/src/backend/api/filters.py +++ b/api/src/backend/api/filters.py @@ -23,13 +23,14 @@ from api.db_utils import ( StatusEnumField, ) from api.models import ( + AttackPathsScan, AttackSurfaceOverview, ComplianceRequirementOverview, DailySeveritySummary, Finding, + FindingGroupDailySummary, Integration, Invitation, - AttackPathsScan, LighthouseProviderConfiguration, LighthouseProviderModels, Membership, @@ -181,7 +182,7 @@ class CommonFindingFilters(FilterSet): help_text="If this filter is not provided, muted and non-muted findings will be returned." ) - resources = UUIDInFilter(field_name="resource__id", lookup_expr="in") + resources = UUIDInFilter(field_name="resources__id", lookup_expr="in") region = CharFilter(method="filter_resource_region") region__in = CharInFilter(field_name="resource_regions", lookup_expr="overlap") @@ -469,9 +470,10 @@ class ResourceFilter(ProviderRelationshipFilterSet): class Meta: model = Resource fields = { + "id": ["exact", "in"], "provider": ["exact", "in"], - "uid": ["exact", "icontains"], - "name": ["exact", "icontains"], + "uid": ["exact", "icontains", "in"], + "name": ["exact", "icontains", "in"], "region": ["exact", "icontains", "in"], "service": ["exact", "icontains", "in"], "type": ["exact", "icontains", "in"], @@ -554,9 +556,10 @@ class LatestResourceFilter(ProviderRelationshipFilterSet): class Meta: model = Resource fields = { + "id": ["exact", "in"], "provider": ["exact", "in"], - "uid": ["exact", "icontains"], - "name": ["exact", "icontains"], + "uid": ["exact", "icontains", "in"], + "name": ["exact", "icontains", "in"], "region": ["exact", "icontains", "in"], "service": ["exact", "icontains", "in"], "type": ["exact", "icontains", "in"], @@ -647,16 +650,15 @@ class FindingFilter(CommonFindingFilters): ] ) - gte_date = ( - datetime.strptime(self.data.get("inserted_at__gte"), "%Y-%m-%d").date() - if self.data.get("inserted_at__gte") - else datetime.now(timezone.utc).date() - ) - lte_date = ( - datetime.strptime(self.data.get("inserted_at__lte"), "%Y-%m-%d").date() - if self.data.get("inserted_at__lte") - else datetime.now(timezone.utc).date() - ) + cleaned = self.form.cleaned_data + exact_date = cleaned.get("inserted_at") or cleaned.get("inserted_at__date") + gte_date = cleaned.get("inserted_at__gte") or exact_date + lte_date = cleaned.get("inserted_at__lte") or exact_date + + if gte_date is None: + gte_date = datetime.now(timezone.utc).date() + if lte_date is None: + lte_date = datetime.now(timezone.utc).date() if abs(lte_date - gte_date) > timedelta( days=settings.FINDINGS_MAX_DAYS_IN_RANGE @@ -779,6 +781,267 @@ class LatestFindingFilter(CommonFindingFilters): } +class FindingGroupFilter(CommonFindingFilters): + """ + Filter for FindingGroup aggregations. + + Requires at least one date filter for performance (partition pruning). + Inherits all provider, status, severity, region, service filters from CommonFindingFilters. + """ + + inserted_at = DateFilter(method="filter_inserted_at", lookup_expr="date") + inserted_at__date = DateFilter(method="filter_inserted_at", lookup_expr="date") + inserted_at__gte = DateFilter( + method="filter_inserted_at_gte", + help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.", + ) + inserted_at__lte = DateFilter( + method="filter_inserted_at_lte", + help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.", + ) + + check_id = CharFilter(field_name="check_id", lookup_expr="exact") + check_id__in = CharInFilter(field_name="check_id", lookup_expr="in") + check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains") + + class Meta: + model = Finding + fields = { + "check_id": ["exact", "in", "icontains"], + } + + def filter_queryset(self, queryset): + """Validate that at least one date filter is provided.""" + if not ( + self.data.get("inserted_at") + or self.data.get("inserted_at__date") + or self.data.get("inserted_at__gte") + or self.data.get("inserted_at__lte") + ): + raise ValidationError( + [ + { + "detail": "At least one date filter is required: filter[inserted_at], filter[inserted_at.gte], " + "or filter[inserted_at.lte].", + "status": 400, + "source": {"pointer": "/data/attributes/inserted_at"}, + "code": "required", + } + ] + ) + + # Validate date range doesn't exceed maximum + cleaned = self.form.cleaned_data + exact_date = cleaned.get("inserted_at") or cleaned.get("inserted_at__date") + gte_date = cleaned.get("inserted_at__gte") or exact_date + lte_date = cleaned.get("inserted_at__lte") or exact_date + + if gte_date is None: + gte_date = datetime.now(timezone.utc).date() + if lte_date is None: + lte_date = datetime.now(timezone.utc).date() + + if abs(lte_date - gte_date) > timedelta( + days=settings.FINDINGS_MAX_DAYS_IN_RANGE + ): + raise ValidationError( + [ + { + "detail": f"The date range cannot exceed {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.", + "status": 400, + "source": {"pointer": "/data/attributes/inserted_at"}, + "code": "invalid", + } + ] + ) + + return super().filter_queryset(queryset) + + def filter_inserted_at(self, queryset, name, value): + """Filter by exact date using UUIDv7 partition-aware filtering.""" + datetime_value = self._maybe_date_to_datetime(value) + start = uuid7_start(datetime_to_uuid7(datetime_value)) + end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1))) + return queryset.filter(id__gte=start, id__lt=end) + + def filter_inserted_at_gte(self, queryset, name, value): + """Filter by start date using UUIDv7 partition-aware filtering.""" + datetime_value = self._maybe_date_to_datetime(value) + start = uuid7_start(datetime_to_uuid7(datetime_value)) + return queryset.filter(id__gte=start) + + def filter_inserted_at_lte(self, queryset, name, value): + """Filter by end date using UUIDv7 partition-aware filtering.""" + datetime_value = self._maybe_date_to_datetime(value) + end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1))) + return queryset.filter(id__lt=end) + + @staticmethod + def _maybe_date_to_datetime(value): + """Convert date to datetime if needed.""" + dt = value + if isinstance(value, date): + dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc) + return dt + + +class LatestFindingGroupFilter(CommonFindingFilters): + """ + Filter for FindingGroup resources in /latest endpoint. + + Same as FindingGroupFilter but without date validation. + """ + + check_id = CharFilter(field_name="check_id", lookup_expr="exact") + check_id__in = CharInFilter(field_name="check_id", lookup_expr="in") + check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains") + + class Meta: + model = Finding + fields = { + "check_id": ["exact", "in", "icontains"], + } + + +class FindingGroupSummaryFilter(FilterSet): + """ + Filter for FindingGroupDailySummary queries. + + Filters the pre-aggregated summary table by date range, check_id, and provider. + Requires at least one date filter for performance. + """ + + inserted_at = DateFilter(method="filter_inserted_at", lookup_expr="date") + inserted_at__date = DateFilter(method="filter_inserted_at", lookup_expr="date") + inserted_at__gte = DateFilter( + method="filter_inserted_at_gte", + help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.", + ) + inserted_at__lte = DateFilter( + method="filter_inserted_at_lte", + help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.", + ) + + # Check ID filters + check_id = CharFilter(field_name="check_id", lookup_expr="exact") + check_id__in = CharInFilter(field_name="check_id", lookup_expr="in") + check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains") + + # Provider filters + provider_id = UUIDFilter(field_name="provider_id", lookup_expr="exact") + provider_id__in = UUIDInFilter(field_name="provider_id", lookup_expr="in") + provider_type = ChoiceFilter( + field_name="provider__provider", choices=Provider.ProviderChoices.choices + ) + provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in") + + class Meta: + model = FindingGroupDailySummary + fields = { + "check_id": ["exact", "in", "icontains"], + "inserted_at": ["date", "gte", "lte"], + "provider_id": ["exact", "in"], + } + + def filter_queryset(self, queryset): + if not ( + self.data.get("inserted_at") + or self.data.get("inserted_at__date") + or self.data.get("inserted_at__gte") + or self.data.get("inserted_at__lte") + ): + raise ValidationError( + [ + { + "detail": "At least one date filter is required: filter[inserted_at], filter[inserted_at.gte], " + "or filter[inserted_at.lte].", + "status": 400, + "source": {"pointer": "/data/attributes/inserted_at"}, + "code": "required", + } + ] + ) + + cleaned = self.form.cleaned_data + exact_date = cleaned.get("inserted_at") or cleaned.get("inserted_at__date") + gte_date = cleaned.get("inserted_at__gte") or exact_date + lte_date = cleaned.get("inserted_at__lte") or exact_date + + if gte_date is None: + gte_date = datetime.now(timezone.utc).date() + if lte_date is None: + lte_date = datetime.now(timezone.utc).date() + + if abs(lte_date - gte_date) > timedelta( + days=settings.FINDINGS_MAX_DAYS_IN_RANGE + ): + raise ValidationError( + [ + { + "detail": f"The date range cannot exceed {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.", + "status": 400, + "source": {"pointer": "/data/attributes/inserted_at"}, + "code": "invalid", + } + ] + ) + + return super().filter_queryset(queryset) + + def filter_inserted_at(self, queryset, name, value): + """Filter by exact inserted_at date.""" + datetime_value = self._maybe_date_to_datetime(value) + start = datetime_value + end = datetime_value + timedelta(days=1) + return queryset.filter(inserted_at__gte=start, inserted_at__lt=end) + + def filter_inserted_at_gte(self, queryset, name, value): + """Filter by inserted_at >= value (date boundary).""" + datetime_value = self._maybe_date_to_datetime(value) + return queryset.filter(inserted_at__gte=datetime_value) + + def filter_inserted_at_lte(self, queryset, name, value): + """Filter by inserted_at <= value (inclusive date boundary).""" + datetime_value = self._maybe_date_to_datetime(value) + return queryset.filter(inserted_at__lt=datetime_value + timedelta(days=1)) + + @staticmethod + def _maybe_date_to_datetime(value): + dt = value + if isinstance(value, date): + dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc) + return dt + + +class LatestFindingGroupSummaryFilter(FilterSet): + """ + Filter for FindingGroupDailySummary /latest endpoint. + + Same as FindingGroupSummaryFilter but without date validation. + Used when the endpoint automatically determines the date. + """ + + # Check ID filters + check_id = CharFilter(field_name="check_id", lookup_expr="exact") + check_id__in = CharInFilter(field_name="check_id", lookup_expr="in") + check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains") + + # Provider filters + provider_id = UUIDFilter(field_name="provider_id", lookup_expr="exact") + provider_id__in = UUIDInFilter(field_name="provider_id", lookup_expr="in") + provider_type = ChoiceFilter( + field_name="provider__provider", choices=Provider.ProviderChoices.choices + ) + provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in") + + class Meta: + model = FindingGroupDailySummary + fields = { + "check_id": ["exact", "in", "icontains"], + "provider_id": ["exact", "in"], + } + + class ProviderSecretFilter(FilterSet): inserted_at = DateFilter( field_name="inserted_at", diff --git a/api/src/backend/api/migrations/0081_finding_group_daily_summary.py b/api/src/backend/api/migrations/0081_finding_group_daily_summary.py new file mode 100644 index 0000000000..31c09c464f --- /dev/null +++ b/api/src/backend/api/migrations/0081_finding_group_daily_summary.py @@ -0,0 +1,132 @@ +# Generated by Django 5.1.15 on 2026-01-26 + +import uuid + +import django.db.models.deletion +from django.contrib.postgres.indexes import GinIndex, OpClass +from django.db import migrations, models +from django.db.models.functions import Upper +from django.utils import timezone + +import api.rls + + +class Migration(migrations.Migration): + dependencies = [ + ("api", "0080_backfill_attack_paths_graph_data_ready"), + ] + + operations = [ + migrations.CreateModel( + name="FindingGroupDailySummary", + fields=[ + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ( + "inserted_at", + models.DateTimeField(default=timezone.now, editable=False), + ), + ("updated_at", models.DateTimeField(auto_now=True, editable=False)), + ("check_id", models.CharField(db_index=True, max_length=255)), + ( + "check_title", + models.CharField(blank=True, max_length=500, null=True), + ), + ("check_description", models.TextField(blank=True, null=True)), + ("severity_order", models.SmallIntegerField(default=1)), + ("pass_count", models.IntegerField(default=0)), + ("fail_count", models.IntegerField(default=0)), + ("muted_count", models.IntegerField(default=0)), + ("new_count", models.IntegerField(default=0)), + ("changed_count", models.IntegerField(default=0)), + ("resources_fail", models.IntegerField(default=0)), + ("resources_total", models.IntegerField(default=0)), + ("first_seen_at", models.DateTimeField(blank=True, null=True)), + ("last_seen_at", models.DateTimeField(blank=True, null=True)), + ("failing_since", models.DateTimeField(blank=True, null=True)), + ( + "tenant", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="api.tenant", + ), + ), + ( + "provider", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="finding_group_summaries", + to="api.provider", + ), + ), + ], + options={ + "db_table": "finding_group_daily_summaries", + "abstract": False, + }, + ), + migrations.AddIndex( + model_name="findinggroupdailysummary", + index=models.Index( + fields=["tenant_id", "inserted_at"], + name="fgds_tenant_inserted_at_idx", + ), + ), + migrations.AddIndex( + model_name="findinggroupdailysummary", + index=models.Index( + fields=["tenant_id", "provider", "inserted_at"], + name="fgds_tenant_prov_ins_idx", + ), + ), + migrations.AddIndex( + model_name="findinggroupdailysummary", + index=models.Index( + fields=["tenant_id", "check_id", "inserted_at"], + name="fgds_tenant_chk_ins_idx", + ), + ), + migrations.AddIndex( + model_name="resource", + index=GinIndex( + OpClass(Upper("uid"), name="gin_trgm_ops"), + name="res_uid_trgm_idx", + ), + ), + migrations.AddIndex( + model_name="resource", + index=GinIndex( + OpClass(Upper("name"), name="gin_trgm_ops"), + name="res_name_trgm_idx", + ), + ), + migrations.AddConstraint( + model_name="findinggroupdailysummary", + constraint=models.UniqueConstraint( + fields=("tenant_id", "provider", "check_id", "inserted_at"), + name="unique_finding_group_daily_summary", + ), + ), + migrations.AddConstraint( + model_name="findinggroupdailysummary", + constraint=api.rls.RowLevelSecurityConstraint( + "tenant_id", + name="rls_on_findinggroupdailysummary", + statements=["SELECT", "INSERT", "UPDATE", "DELETE"], + ), + ), + migrations.AddIndex( + model_name="finding", + index=models.Index( + fields=["tenant_id", "check_id", "inserted_at"], + name="find_tenant_check_ins_idx", + ), + ), + ] diff --git a/api/src/backend/api/migrations/0082_backfill_finding_group_summaries.py b/api/src/backend/api/migrations/0082_backfill_finding_group_summaries.py new file mode 100644 index 0000000000..38cc07f43d --- /dev/null +++ b/api/src/backend/api/migrations/0082_backfill_finding_group_summaries.py @@ -0,0 +1,30 @@ +# Generated by Django 5.1.14 on 2026-02-02 + +from django.db import migrations +from tasks.tasks import backfill_finding_group_summaries_task + +from api.db_router import MainRouter +from api.rls import Tenant + + +def trigger_backfill_task(apps, schema_editor): + """ + Trigger the backfill task for all tenants. + + This dispatches backfill_finding_group_summaries_task for each tenant + in the system to populate FindingGroupDailySummary records from historical scans. + """ + tenant_ids = Tenant.objects.using(MainRouter.admin_db).values_list("id", flat=True) + + for tenant_id in tenant_ids: + backfill_finding_group_summaries_task.delay(tenant_id=str(tenant_id), days=30) + + +class Migration(migrations.Migration): + dependencies = [ + ("api", "0081_finding_group_daily_summary"), + ] + + operations = [ + migrations.RunPython(trigger_backfill_task, migrations.RunPython.noop), + ] diff --git a/api/src/backend/api/models.py b/api/src/backend/api/models.py index 5597963216..882abeb7b2 100644 --- a/api/src/backend/api/models.py +++ b/api/src/backend/api/models.py @@ -12,12 +12,15 @@ from cryptography.fernet import Fernet, InvalidToken from django.conf import settings from django.contrib.auth.models import AbstractBaseUser from django.contrib.postgres.fields import ArrayField +from django.contrib.postgres.indexes import GinIndex, OpClass from django.contrib.postgres.search import SearchVector, SearchVectorField from django.contrib.sites.models import Site from django.core.exceptions import ValidationError from django.core.validators import MinLengthValidator from django.db import models from django.db.models import Q +from django.db.models.functions import Upper +from django.utils import timezone as django_timezone from django.utils.translation import gettext_lazy as _ from django_celery_beat.models import PeriodicTask from django_celery_results.models import TaskResult @@ -855,6 +858,16 @@ class Resource(RowLevelSecurityProtectedModel): fields=["tenant_id", "service", "region", "type"], name="resource_tenant_metadata_idx", ), + # icontains compiles to UPPER(field) LIKE, so index the same expression + GinIndex( + OpClass(Upper("uid"), name="gin_trgm_ops"), + name="res_uid_trgm_idx", + ), + GinIndex( + OpClass(Upper("name"), name="gin_trgm_ops"), + name="res_name_trgm_idx", + ), + GinIndex(fields=["text_search"], name="gin_resources_search_idx"), models.Index(fields=["tenant_id", "id"], name="resources_tenant_id_idx"), models.Index( fields=["tenant_id", "provider_id"], @@ -1052,6 +1065,10 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel): fields=["tenant_id", "uid", "-inserted_at"], name="find_tenant_uid_inserted_idx", ), + models.Index( + fields=["tenant_id", "check_id", "inserted_at"], + name="find_tenant_check_ins_idx", + ), models.Index( fields=["tenant_id", "scan_id", "check_id"], name="find_tenant_scan_check_idx", @@ -1669,6 +1686,89 @@ class DailySeveritySummary(RowLevelSecurityProtectedModel): ] +class FindingGroupDailySummary(RowLevelSecurityProtectedModel): + """ + Pre-aggregated daily finding counts per check_id per provider. + Used by finding-groups endpoint for efficient queries over date ranges. + + Instead of aggregating millions of findings on-the-fly, we pre-compute + daily summaries and re-aggregate them when querying date ranges. + This reduces query complexity from O(findings) to O(days × checks × providers). + """ + + objects = ActiveProviderManager() + + id = models.UUIDField(primary_key=True, default=uuid4, editable=False) + inserted_at = models.DateTimeField(default=django_timezone.now, editable=False) + updated_at = models.DateTimeField(auto_now=True, editable=False) + check_id = models.CharField(max_length=255, db_index=True) + + # Provider FK for filtering by specific provider + provider = models.ForeignKey( + "Provider", + on_delete=models.CASCADE, + related_name="finding_group_summaries", + ) + + # Check metadata (denormalized for performance) + check_title = models.CharField(max_length=500, blank=True, null=True) + check_description = models.TextField(blank=True, null=True) + + # Severity stored as integer for MAX aggregation (5=critical, 4=high, etc.) + severity_order = models.SmallIntegerField(default=1) + + # Finding counts + pass_count = models.IntegerField(default=0) + fail_count = models.IntegerField(default=0) + muted_count = models.IntegerField(default=0) + + # Delta counts + new_count = models.IntegerField(default=0) + changed_count = models.IntegerField(default=0) + + # Resource counts + resources_fail = models.IntegerField(default=0) + resources_total = models.IntegerField(default=0) + + # Timing + first_seen_at = models.DateTimeField(null=True, blank=True) + last_seen_at = models.DateTimeField(null=True, blank=True) + failing_since = models.DateTimeField(null=True, blank=True) + + class Meta(RowLevelSecurityProtectedModel.Meta): + db_table = "finding_group_daily_summaries" + + constraints = [ + models.UniqueConstraint( + fields=("tenant_id", "provider", "check_id", "inserted_at"), + name="unique_finding_group_daily_summary", + ), + RowLevelSecurityConstraint( + field="tenant_id", + name="rls_on_%(class)s", + statements=["SELECT", "INSERT", "UPDATE", "DELETE"], + ), + ] + + indexes = [ + models.Index( + fields=["tenant_id", "inserted_at"], + name="fgds_tenant_inserted_at_idx", + ), + models.Index( + fields=["tenant_id", "check_id", "inserted_at"], + name="fgds_tenant_chk_ins_idx", + ), + models.Index( + fields=["tenant_id", "provider", "inserted_at"], + name="fgds_tenant_prov_ins_idx", + ), + ] + + class JSONAPIMeta: + resource_name = "finding-group-daily-summaries" + + class Integration(RowLevelSecurityProtectedModel): class IntegrationChoices(models.TextChoices): AMAZON_S3 = "amazon_s3", _("Amazon S3") diff --git a/api/src/backend/api/specs/v1.yaml b/api/src/backend/api/specs/v1.yaml index 0a2b4327ca..659962badd 100644 --- a/api/src/backend/api/specs/v1.yaml +++ b/api/src/backend/api/specs/v1.yaml @@ -1134,6 +1134,365 @@ paths: description: The task is in progress '500': description: Compliance overviews generation task failed + /api/v1/finding-groups: + get: + operationId: finding_groups_list + description: "\n Retrieve aggregated findings grouped by check_id.\n\n\ + \ Each group shows:\n - Aggregated status (FAIL if any non-muted\ + \ failure)\n - Maximum severity across all findings\n - Resource\ + \ counts (failing vs total)\n - Finding counts by status and delta\n\ + \ - Affected provider types\n\n At least one date filter is\ + \ required for performance reasons.\n " + summary: List finding groups + parameters: + - in: query + name: fields[finding-groups] + schema: + type: array + items: + type: string + enum: + - id + - check_id + - check_title + - check_description + - severity + - status + - impacted_providers + - resources_fail + - resources_total + - pass_count + - fail_count + - muted_count + - new_count + - changed_count + - first_seen_at + - last_seen_at + - failing_since + description: endpoint return only specific fields in the response on a per-type + basis by including a fields[TYPE] query parameter. + explode: false + - in: query + name: filter[check_id] + schema: + type: string + - in: query + name: filter[check_id__icontains] + schema: + type: string + - in: query + name: filter[check_id__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form + - in: query + name: filter[inserted_at] + schema: + type: string + format: date + - in: query + name: filter[inserted_at__date] + schema: + type: string + format: date + - in: query + name: filter[inserted_at__gte] + schema: + type: string + format: date + description: Maximum date range is 7 days. + - in: query + name: filter[inserted_at__lte] + schema: + type: string + format: date + description: Maximum date range is 7 days. + - in: query + name: filter[provider_id] + schema: + type: string + format: uuid + - in: query + name: filter[provider_id__in] + schema: + type: array + items: + type: string + format: uuid + description: Multiple values may be separated by commas. + explode: false + style: form + - in: query + name: filter[provider_type] + schema: + type: string + x-spec-enum-id: 4b8815b179aa7216 + enum: + - alibabacloud + - aws + - azure + - cloudflare + - gcp + - github + - iac + - kubernetes + - m365 + - mongodbatlas + - openstack + - oraclecloud + description: |- + * `aws` - AWS + * `azure` - Azure + * `gcp` - GCP + * `kubernetes` - Kubernetes + * `m365` - M365 + * `github` - GitHub + * `mongodbatlas` - MongoDB Atlas + * `iac` - IaC + * `oraclecloud` - Oracle Cloud Infrastructure + * `alibabacloud` - Alibaba Cloud + * `cloudflare` - Cloudflare + * `openstack` - OpenStack + - in: query + name: filter[provider_type__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form + - name: filter[search] + required: false + in: query + description: A search term. + schema: + type: string + - name: page[number] + required: false + in: query + description: A page number within the paginated result set. + schema: + type: integer + - name: page[size] + required: false + in: query + description: Number of results to return per page. + schema: + type: integer + - name: sort + required: false + in: query + description: '[list of fields to sort by](https://jsonapi.org/format/#fetching-sorting)' + schema: + type: array + items: + type: string + enum: + - id + - -id + - check_id + - -check_id + - check_title + - -check_title + - check_description + - -check_description + - severity + - -severity + - status + - -status + - impacted_providers + - -impacted_providers + - resources_fail + - -resources_fail + - resources_total + - -resources_total + - pass_count + - -pass_count + - fail_count + - -fail_count + - muted_count + - -muted_count + - new_count + - -new_count + - changed_count + - -changed_count + - first_seen_at + - -first_seen_at + - last_seen_at + - -last_seen_at + - failing_since + - -failing_since + explode: false + tags: + - Finding Groups + security: + - JWT or API Key: [] + responses: + '200': + content: + application/vnd.api+json: + schema: + $ref: '#/components/schemas/PaginatedFindingGroupList' + description: '' + /api/v1/finding-groups/{id}/resources: + get: + operationId: finding_groups_resources_retrieve + description: "\n Retrieve resources affected by a specific check (finding\ + \ group).\n\n Returns individual resources with their current status,\ + \ severity,\n and timing information including how long they have been\ + \ failing.\n " + summary: List resources for a finding group + parameters: + - in: query + name: fields[finding-groups] + schema: + type: array + items: + type: string + enum: + - id + - check_id + - check_title + - check_description + - severity + - status + - impacted_providers + - resources_fail + - resources_total + - pass_count + - fail_count + - muted_count + - new_count + - changed_count + - first_seen_at + - last_seen_at + - failing_since + description: endpoint return only specific fields in the response on a per-type + basis by including a fields[TYPE] query parameter. + explode: false + - in: path + name: id + schema: + type: string + format: uuid + description: A UUID string identifying this finding group daily summary. + required: true + tags: + - Finding Groups + security: + - JWT or API Key: [] + responses: + '200': + content: + application/vnd.api+json: + schema: + $ref: '#/components/schemas/FindingGroupResponse' + description: '' + /api/v1/finding-groups/latest: + get: + operationId: finding_groups_latest_retrieve + description: "\n Retrieve the latest available state for each finding\ + \ group (check_id).\n\n This endpoint returns finding groups without\ + \ requiring date filters,\n automatically using the latest available\ + \ data per check_id.\n All other filters (provider_id, provider_type,\ + \ check_id) are still supported.\n " + summary: List latest finding groups + parameters: + - in: query + name: fields[finding-groups] + schema: + type: array + items: + type: string + enum: + - id + - check_id + - check_title + - check_description + - severity + - status + - impacted_providers + - resources_fail + - resources_total + - pass_count + - fail_count + - muted_count + - new_count + - changed_count + - first_seen_at + - last_seen_at + - failing_since + description: endpoint return only specific fields in the response on a per-type + basis by including a fields[TYPE] query parameter. + explode: false + tags: + - Finding Groups + security: + - JWT or API Key: [] + responses: + '200': + content: + application/vnd.api+json: + schema: + $ref: '#/components/schemas/FindingGroupResponse' + description: '' + /api/v1/finding-groups/latest/{check_id}/resources: + get: + operationId: finding_groups_latest_resources_retrieve + description: "\n Retrieve resources affected by a specific check (finding\ + \ group) from the\n latest completed scan for each provider.\n\n \ + \ Returns individual resources with their current status, severity,\n\ + \ and timing information. No date filters required.\n " + summary: List resources for a finding group from latest scans + parameters: + - in: path + name: check_id + schema: + type: string + required: true + - in: query + name: fields[finding-groups] + schema: + type: array + items: + type: string + enum: + - id + - check_id + - check_title + - check_description + - severity + - status + - impacted_providers + - resources_fail + - resources_total + - pass_count + - fail_count + - muted_count + - new_count + - changed_count + - first_seen_at + - last_seen_at + - failing_since + description: endpoint return only specific fields in the response on a per-type + basis by including a fields[TYPE] query parameter. + explode: false + tags: + - Finding Groups + security: + - JWT or API Key: [] + responses: + '200': + content: + application/vnd.api+json: + schema: + $ref: '#/components/schemas/FindingGroupResponse' + description: '' /api/v1/findings: get: operationId: findings_list @@ -8270,6 +8629,21 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[id] + schema: + type: string + format: uuid + - in: query + name: filter[id__in] + schema: + type: array + items: + type: string + format: uuid + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[inserted_at] schema: @@ -8293,6 +8667,15 @@ paths: name: filter[name__icontains] schema: type: string + - in: query + name: filter[name__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[provider] schema: @@ -8521,6 +8904,15 @@ paths: name: filter[uid__icontains] schema: type: string + - in: query + name: filter[uid__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[updated_at] schema: @@ -8791,6 +9183,21 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[id] + schema: + type: string + format: uuid + - in: query + name: filter[id__in] + schema: + type: array + items: + type: string + format: uuid + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[name] schema: @@ -8799,6 +9206,15 @@ paths: name: filter[name__icontains] schema: type: string + - in: query + name: filter[name__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[provider] schema: @@ -9012,6 +9428,15 @@ paths: name: filter[uid__icontains] schema: type: string + - in: query + name: filter[uid__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: include schema: @@ -9095,6 +9520,21 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[id] + schema: + type: string + format: uuid + - in: query + name: filter[id__in] + schema: + type: array + items: + type: string + format: uuid + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[inserted_at] schema: @@ -9118,6 +9558,15 @@ paths: name: filter[name__icontains] schema: type: string + - in: query + name: filter[name__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[provider] schema: @@ -9346,6 +9795,15 @@ paths: name: filter[uid__icontains] schema: type: string + - in: query + name: filter[uid__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[updated_at] schema: @@ -9435,6 +9893,21 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: filter[id] + schema: + type: string + format: uuid + - in: query + name: filter[id__in] + schema: + type: array + items: + type: string + format: uuid + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[name] schema: @@ -9443,6 +9916,15 @@ paths: name: filter[name__icontains] schema: type: string + - in: query + name: filter[name__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: query name: filter[provider] schema: @@ -9656,6 +10138,15 @@ paths: name: filter[uid__icontains] schema: type: string + - in: query + name: filter[uid__in] + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - name: sort required: false in: query @@ -13371,6 +13862,87 @@ components: $ref: '#/components/schemas/FindingDynamicFilter' required: - data + FindingGroup: + type: object + required: + - type + - id + additionalProperties: false + properties: + type: + type: string + description: The [type](https://jsonapi.org/format/#document-resource-object-identification) + member is used to describe resource objects that share common attributes + and relationships. + enum: + - finding-groups + id: {} + attributes: + type: object + properties: + id: + type: string + check_id: + type: string + check_title: + type: string + nullable: true + check_description: + type: string + nullable: true + severity: + type: string + status: + type: string + impacted_providers: + type: array + items: + type: string + resources_fail: + type: integer + resources_total: + type: integer + pass_count: + type: integer + fail_count: + type: integer + muted_count: + type: integer + new_count: + type: integer + changed_count: + type: integer + first_seen_at: + type: string + format: date-time + nullable: true + last_seen_at: + type: string + format: date-time + nullable: true + failing_since: + type: string + format: date-time + nullable: true + required: + - id + - check_id + - severity + - status + - resources_fail + - resources_total + - pass_count + - fail_count + - muted_count + - new_count + - changed_count + FindingGroupResponse: + type: object + properties: + data: + $ref: '#/components/schemas/FindingGroup' + required: + - data FindingMetadata: type: object required: @@ -16188,6 +16760,15 @@ components: $ref: '#/components/schemas/ComplianceWatchlistOverview' required: - data + PaginatedFindingGroupList: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/FindingGroup' + required: + - data PaginatedFindingList: type: object properties: diff --git a/api/src/backend/api/tests/test_views.py b/api/src/backend/api/tests/test_views.py index cc7d367ffd..4ddc33d681 100644 --- a/api/src/backend/api/tests/test_views.py +++ b/api/src/backend/api/tests/test_views.py @@ -3045,21 +3045,21 @@ class TestScanViewSet: [ ("provider_type", "aws", 3), ("provider_type.in", "gcp,azure", 0), - ("provider_uid", "123456789012", 2), + ("provider_uid", "123456789012", 1), ("provider_uid.icontains", "1", 3), ("provider_uid.in", "123456789012,123456789013", 3), - ("provider_alias", "aws_testing_1", 2), + ("provider_alias", "aws_testing_1", 1), ("provider_alias.icontains", "aws", 3), ("provider_alias.in", "aws_testing_1,aws_testing_2", 3), ("name", "Scan 1", 1), ("name.icontains", "Scan", 3), - ("started_at", "2024-01-02", 3), + ("started_at", "2024-01-02", 1), ("started_at.gte", "2024-01-01", 3), ("started_at.lte", "2024-01-01", 0), ("trigger", Scan.TriggerChoices.MANUAL, 1), ("state", StateChoices.AVAILABLE, 1), - ("state", StateChoices.FAILED, 1), - ("state.in", f"{StateChoices.FAILED},{StateChoices.AVAILABLE}", 2), + ("state", StateChoices.FAILED, 0), + ("state.in", f"{StateChoices.FAILED},{StateChoices.AVAILABLE}", 1), ("trigger", Scan.TriggerChoices.MANUAL, 1), ] ), @@ -3102,20 +3102,52 @@ class TestScanViewSet: {"filter[provider]": scans_fixture[0].provider.id}, ) assert response.status_code == status.HTTP_200_OK - assert len(response.json()["data"]) == 2 + assert len(response.json()["data"]) == 1 def test_scan_filter_by_provider_id_in(self, authenticated_client, scans_fixture): response = authenticated_client.get( reverse("scan-list"), { - "filter[provider.in]": [ - scans_fixture[0].provider.id, - scans_fixture[1].provider.id, - ] + "filter[provider.in]": f"{scans_fixture[0].provider.id},{scans_fixture[1].provider.id}", }, ) assert response.status_code == status.HTTP_200_OK - assert len(response.json()["data"]) == 2 + assert len(response.json()["data"]) == 3 + + def test_scans_filter_state_failed(self, authenticated_client, scans_fixture): + """Ensure state filter matches only FAILED scans.""" + scan1, *_ = scans_fixture + failed_scan = Scan.objects.create( + name="Scan Failed", + provider=scan1.provider, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.FAILED, + tenant_id=scan1.tenant_id, + ) + response = authenticated_client.get( + reverse("scan-list"), + {"filter[state]": StateChoices.FAILED}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + assert data[0]["id"] == str(failed_scan.id) + + def test_scans_filter_provider_alias_exact( + self, authenticated_client, scans_fixture + ): + """Ensure provider_alias filter returns all scans for that provider.""" + scan1, *_ = scans_fixture + response = authenticated_client.get( + reverse("scan-list"), + {"filter[provider_alias]": scan1.provider.alias}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + assert data[0]["relationships"]["provider"]["data"]["id"] == str( + scan1.provider.id + ) @pytest.mark.parametrize( "sort_field", @@ -4365,15 +4397,10 @@ class TestResourceViewSet: ): response = authenticated_client.get( reverse("resource-list"), - { - "filter[scan.in]": [ - scans_fixture[0].id, - scans_fixture[1].id, - ] - }, + {"filter[scan.in]": f"{scans_fixture[0].id},{scans_fixture[1].id}"}, ) assert response.status_code == status.HTTP_200_OK - assert len(response.json()["data"]) == 2 + assert len(response.json()["data"]) == 3 def test_resource_filter_by_provider_id_in( self, authenticated_client, resources_fixture @@ -14265,3 +14292,765 @@ class TestMuteRuleViewSet: assert len(data) == len(mute_rules_fixture) for rule_data in data: assert rule_data["id"] != str(other_rule.id) + + +@pytest.mark.django_db +class TestFindingGroupViewSet: + """Tests for Finding Groups API - aggregates findings by check_id.""" + + def test_finding_groups_requires_date_filter(self, authenticated_client): + """Test that at least one date filter is required.""" + response = authenticated_client.get(reverse("finding-group-list")) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json()["errors"][0]["code"] == "required" + + def test_finding_groups_empty(self, authenticated_client): + """Test empty list returned when no findings exist.""" + response = authenticated_client.get( + reverse("finding-group-list"), {"filter[inserted_at]": TODAY} + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 0 + + def test_finding_groups_single_check( + self, authenticated_client, finding_groups_fixture + ): + """Test that findings with same check_id are grouped correctly.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "s3_bucket_public_access", + }, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + assert data[0]["id"] == "s3_bucket_public_access" + assert data[0]["attributes"]["check_id"] == "s3_bucket_public_access" + + def test_finding_groups_multiple_checks( + self, authenticated_client, finding_groups_fixture + ): + """Test that different check_ids produce separate finding groups.""" + response = authenticated_client.get( + reverse("finding-group-list"), {"filter[inserted_at]": TODAY} + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # Should have 5 distinct check_ids from fixture + assert len(data) == 5 + check_ids = {item["id"] for item in data} + assert "s3_bucket_public_access" in check_ids + assert "ec2_instance_public_ip" in check_ids + assert "iam_password_policy" in check_ids + assert "rds_encryption" in check_ids + assert "cloudtrail_enabled" in check_ids + + def test_finding_groups_severity_max( + self, authenticated_client, finding_groups_fixture + ): + """Test that max severity is returned across all findings in group.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "s3_bucket_public_access", + }, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + # s3_bucket_public_access has critical and high severity findings + # Max should be critical + assert data[0]["attributes"]["severity"] == "critical" + + def test_finding_groups_status_fail_priority( + self, authenticated_client, finding_groups_fixture + ): + """Test that FAIL status takes priority over PASS when any non-muted FAIL exists.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "ec2_instance_public_ip", + }, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + # ec2_instance_public_ip has 1 PASS and 1 FAIL, should aggregate to FAIL + assert data[0]["attributes"]["status"] == "FAIL" + + def test_finding_groups_status_pass_when_no_fail( + self, authenticated_client, finding_groups_fixture + ): + """Test that PASS status returned when no non-muted FAIL exists.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[check_id]": "iam_password_policy"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + # iam_password_policy has only PASS findings + assert data[0]["attributes"]["status"] == "PASS" + + def test_finding_groups_status_muted_all( + self, authenticated_client, finding_groups_fixture + ): + """Test that MUTED status returned when all findings are muted.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[check_id]": "rds_encryption"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + # rds_encryption has all muted findings + assert data[0]["attributes"]["status"] == "MUTED" + + def test_finding_groups_provider_aggregation( + self, authenticated_client, finding_groups_fixture + ): + """Test that impacted_providers contains distinct provider types.""" + response = authenticated_client.get( + reverse("finding-group-list"), {"filter[inserted_at]": TODAY} + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # Find the s3_bucket_public_access group + s3_group = next( + (item for item in data if item["id"] == "s3_bucket_public_access"), None + ) + assert s3_group is not None + # Should have aws provider + assert "aws" in s3_group["attributes"]["impacted_providers"] + + def test_finding_groups_resource_counts( + self, authenticated_client, finding_groups_fixture + ): + """Test resources_fail and resources_total counts are correct.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "s3_bucket_public_access", + }, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + attrs = data[0]["attributes"] + # s3_bucket_public_access has 2 FAIL findings on 2 different resources + assert attrs["resources_fail"] == 2 + assert attrs["resources_total"] == 2 + + def test_finding_groups_finding_counts( + self, authenticated_client, finding_groups_fixture + ): + """Test pass_count, fail_count, muted_count are correct.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "ec2_instance_public_ip", + }, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + attrs = data[0]["attributes"] + # ec2_instance_public_ip has 1 PASS and 1 FAIL (non-muted) + assert attrs["pass_count"] == 1 + assert attrs["fail_count"] == 1 + assert attrs["muted_count"] == 0 + + def test_finding_groups_delta_counts( + self, authenticated_client, finding_groups_fixture + ): + """Test new_count and changed_count are correct.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "s3_bucket_public_access", + }, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + attrs = data[0]["attributes"] + # s3_bucket_public_access has 1 new and 1 changed finding + assert attrs["new_count"] == 1 + assert attrs["changed_count"] == 1 + + def test_finding_groups_timing(self, authenticated_client, finding_groups_fixture): + """Test first_seen_at, last_seen_at, and failing_since are returned.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "s3_bucket_public_access", + }, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + attrs = data[0]["attributes"] + assert "first_seen_at" in attrs + assert "last_seen_at" in attrs + assert "failing_since" in attrs + assert attrs["first_seen_at"] is not None + assert attrs["last_seen_at"] is not None + # s3_bucket_public_access has FAIL findings, so failing_since should be set + assert attrs["failing_since"] is not None + + # Test failing_since for checks without failures + def test_finding_groups_failing_since_null_when_passing( + self, authenticated_client, finding_groups_fixture + ): + """Test failing_since is null for checks that only have PASS findings.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[check_id]": "iam_password_policy"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + attrs = data[0]["attributes"] + # iam_password_policy has only PASS findings, so failing_since should be null + assert attrs["failing_since"] is None + + def test_finding_groups_rls_isolation( + self, authenticated_client, finding_groups_fixture, tenants_fixture + ): + """Test that users only see finding groups from their tenant.""" + # Create finding in another tenant + from api.models import Finding, Provider, Resource, Scan + from api.rls import Tenant + + other_tenant = Tenant.objects.create(name="Other Tenant") + other_provider = Provider.objects.create( + tenant_id=other_tenant.id, + provider="aws", + uid="999999999999", # Valid 12-digit AWS account ID + alias="Other Account", + ) + other_scan = Scan.objects.create( + tenant_id=other_tenant.id, + name="Other scan", + provider=other_provider, + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.COMPLETED, + ) + other_resource = Resource.objects.create( + tenant_id=other_tenant.id, + provider=other_provider, + uid="other-resource-uid", + name="Other Resource", + region="us-west-2", + service="s3", + type="bucket", + ) + other_finding = Finding.objects.create( + tenant_id=other_tenant.id, + uid="other_tenant_finding", + scan=other_scan, + delta=None, + status="FAIL", + severity="critical", + impact="critical", + check_id="other_tenant_check", + check_metadata={"CheckId": "other_tenant_check"}, + first_seen_at="2024-01-02T00:00:00Z", + ) + other_finding.add_resources([other_resource]) + + # Request should not include other tenant's finding groups + response = authenticated_client.get( + reverse("finding-group-list"), {"filter[inserted_at]": TODAY} + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + check_ids = {item["id"] for item in data} + assert "other_tenant_check" not in check_ids + + def test_finding_groups_rbac_unlimited( + self, authenticated_client, finding_groups_fixture + ): + """Test that users with unlimited visibility see all finding groups.""" + response = authenticated_client.get( + reverse("finding-group-list"), {"filter[inserted_at]": TODAY} + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # Should see all 5 check_ids from the fixture + assert len(data) == 5 + + def test_finding_groups_date_filter_gte( + self, authenticated_client, finding_groups_fixture + ): + """Test filtering by start date.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at.gte]": today_after_n_days(-1)}, + ) + assert response.status_code == status.HTTP_200_OK + # All fixture findings were created today + assert len(response.json()["data"]) == 5 + + def test_finding_groups_date_filter_lte( + self, authenticated_client, finding_groups_fixture + ): + """Test filtering by end date.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at.lte]": today_after_n_days(1)}, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 5 + + def test_finding_groups_date_filter_range( + self, authenticated_client, finding_groups_fixture + ): + """Test filtering by date range (max 7 days).""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + # Use 6-day range to stay within 7-day max limit + "filter[inserted_at.gte]": today_after_n_days(-6), + "filter[inserted_at.lte]": today_after_n_days(0), + }, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 5 + + def test_finding_groups_date_filter_outside_backfill_range_returns_empty( + self, authenticated_client, finding_groups_fixture + ): + """Test that older dates return empty results without error.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": today_after_n_days(-60)}, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 0 + + def test_finding_groups_date_filter_max_range(self, authenticated_client): + """Test that exceeding max date range returns 400.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at.lte]": today_after_n_days( + -(settings.FINDINGS_MAX_DAYS_IN_RANGE + 1) + ), + }, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json()["errors"][0]["code"] == "invalid" + + def test_finding_groups_provider_filter( + self, authenticated_client, finding_groups_fixture, providers_fixture + ): + """Test filtering by provider UUID.""" + provider = providers_fixture[0] + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[provider_id]": str(provider.id)}, + ) + assert response.status_code == status.HTTP_200_OK + # Should return finding groups associated with this provider + # Provider 1 has scan1 with checks: s3_bucket_public_access, ec2_instance_public_ip, + # iam_password_policy, rds_encryption (4 checks) + assert len(response.json()["data"]) == 4 + + def test_finding_groups_provider_type_filter( + self, authenticated_client, finding_groups_fixture + ): + """Test filtering by provider type.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[provider_type]": "aws"}, + ) + assert response.status_code == status.HTTP_200_OK + # All fixture findings are from AWS provider + assert len(response.json()["data"]) == 5 + + def test_finding_groups_check_id_filter( + self, authenticated_client, finding_groups_fixture + ): + """Test filtering by exact check_id.""" + response = authenticated_client.get( + reverse("finding-group-list"), + { + "filter[inserted_at]": TODAY, + "filter[check_id]": "s3_bucket_public_access", + }, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 1 + assert response.json()["data"][0]["id"] == "s3_bucket_public_access" + + def test_finding_groups_check_id_icontains( + self, authenticated_client, finding_groups_fixture + ): + """Test searching check_ids with icontains.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[check_id.icontains]": "bucket"}, + ) + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["data"]) == 1 + assert "bucket" in response.json()["data"][0]["id"].lower() + + def test_resources_not_found(self, authenticated_client): + """Test 404 returned for nonexistent check_id.""" + response = authenticated_client.get( + reverse("finding-group-resources", kwargs={"pk": "nonexistent_check"}), + {"filter[inserted_at]": TODAY}, + ) + assert response.status_code == status.HTTP_404_NOT_FOUND + + def test_resources_list(self, authenticated_client, finding_groups_fixture): + """Test resources are returned correctly for a finding group.""" + response = authenticated_client.get( + reverse( + "finding-group-resources", kwargs={"pk": "s3_bucket_public_access"} + ), + {"filter[inserted_at]": TODAY}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # s3_bucket_public_access has 2 findings with 2 different resources + assert len(data) == 2 + + def test_resources_fields(self, authenticated_client, finding_groups_fixture): + """Test resource fields (uid, name, service, region, type) have valid values.""" + response = authenticated_client.get( + reverse( + "finding-group-resources", kwargs={"pk": "s3_bucket_public_access"} + ), + {"filter[inserted_at]": TODAY}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 2 + for item in data: + resource = item["attributes"]["resource"] + # All fields must be present and non-empty + assert resource.get("uid"), "resource.uid must not be empty" + assert resource.get("name"), "resource.name must not be empty" + assert resource.get("service"), "resource.service must not be empty" + assert resource.get("region"), "resource.region must not be empty" + assert resource.get("type"), "resource.type must not be empty" + + def test_resources_provider_info( + self, authenticated_client, finding_groups_fixture + ): + """Test provider info (type, uid, alias) has valid values.""" + response = authenticated_client.get( + reverse( + "finding-group-resources", kwargs={"pk": "s3_bucket_public_access"} + ), + {"filter[inserted_at]": TODAY}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 2 + for item in data: + provider = item["attributes"]["provider"] + assert provider.get("type") == "aws", "provider.type must be 'aws'" + assert provider.get("uid"), "provider.uid must not be empty" + assert provider.get("alias"), "provider.alias must not be empty" + + def test_resources_status_severity( + self, authenticated_client, finding_groups_fixture + ): + """Test status and severity from latest finding have valid values.""" + response = authenticated_client.get( + reverse( + "finding-group-resources", kwargs={"pk": "s3_bucket_public_access"} + ), + {"filter[inserted_at]": TODAY}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 2 + for item in data: + attrs = item["attributes"] + # s3_bucket_public_access has FAIL findings + assert attrs["status"] == "FAIL", "status must be 'FAIL'" + # severity must be one of the valid values + assert attrs["severity"] in [ + "critical", + "high", + "medium", + "low", + "informational", + ] + + def test_resources_timing(self, authenticated_client, finding_groups_fixture): + """Test first_seen_at and last_seen_at are not null.""" + response = authenticated_client.get( + reverse( + "finding-group-resources", kwargs={"pk": "s3_bucket_public_access"} + ), + {"filter[inserted_at]": TODAY}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 2 + for item in data: + attrs = item["attributes"] + assert attrs["first_seen_at"] is not None, "first_seen_at must not be null" + assert attrs["last_seen_at"] is not None, "last_seen_at must not be null" + + def test_resources_filters_applied( + self, authenticated_client, finding_groups_fixture + ): + """Test that date filters work on resources endpoint.""" + response = authenticated_client.get( + reverse( + "finding-group-resources", kwargs={"pk": "s3_bucket_public_access"} + ), + { + "filter[inserted_at.gte]": today_after_n_days(-6), + "filter[inserted_at.lte]": today_after_n_days(0), + }, + ) + assert response.status_code == status.HTTP_200_OK + # Should still return the 2 resources within the date range + assert len(response.json()["data"]) == 2 + + # Test provider_id filter actually filters data + def test_finding_groups_provider_id_filter_actually_filters( + self, authenticated_client, finding_groups_fixture, providers_fixture + ): + """ + Test that provider_id filter returns ONLY data from that provider. + + This is a critical test - it verifies the filter doesn't just return 200 OK, + but actually restricts the data to the specified provider. + """ + provider1 = providers_fixture[0] # Has scan1 with 4 checks + provider2 = providers_fixture[1] # Has scan2 with 1 check (cloudtrail_enabled) + + # Get ALL finding groups (without provider filter) + response_all = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY}, + ) + assert response_all.status_code == status.HTTP_200_OK + all_check_ids = {item["id"] for item in response_all.json()["data"]} + assert len(all_check_ids) == 5, "Should have 5 total check_ids" + + # Get finding groups for provider1 only + response_p1 = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[provider_id]": str(provider1.id)}, + ) + assert response_p1.status_code == status.HTTP_200_OK + p1_check_ids = {item["id"] for item in response_p1.json()["data"]} + # Provider1 has scan1 with 4 checks + assert ( + len(p1_check_ids) == 4 + ), f"Provider1 should have 4 checks, got {len(p1_check_ids)}" + assert ( + "cloudtrail_enabled" not in p1_check_ids + ), "cloudtrail_enabled should NOT be in provider1" + + # Get finding groups for provider2 only + response_p2 = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[provider_id]": str(provider2.id)}, + ) + assert response_p2.status_code == status.HTTP_200_OK + p2_check_ids = {item["id"] for item in response_p2.json()["data"]} + # Provider2 has scan2 with 1 check + assert ( + len(p2_check_ids) == 1 + ), f"Provider2 should have 1 check, got {len(p2_check_ids)}" + assert ( + "cloudtrail_enabled" in p2_check_ids + ), "cloudtrail_enabled should be in provider2" + + # Test provider_type filter actually filters data + def test_finding_groups_provider_type_filter_actually_filters( + self, authenticated_client, finding_groups_fixture + ): + """ + Test that provider_type filter returns ONLY data from that provider type. + """ + # All fixtures use AWS providers, so filtering by AWS should return all 5 + response_aws = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[provider_type]": "aws"}, + ) + assert response_aws.status_code == status.HTTP_200_OK + assert len(response_aws.json()["data"]) == 5 + + # Filtering by GCP should return 0 (no GCP findings in fixture) + response_gcp = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "filter[provider_type]": "gcp"}, + ) + assert response_gcp.status_code == status.HTTP_200_OK + assert ( + len(response_gcp.json()["data"]) == 0 + ), "GCP filter should return 0 results" + + def test_finding_groups_pagination( + self, authenticated_client, finding_groups_fixture + ): + """Test pagination metadata and links.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "page[size]": 2}, + ) + assert response.status_code == status.HTTP_200_OK + # Should have pagination metadata + assert "meta" in response.json() + meta = response.json()["meta"] + assert "pagination" in meta + assert "count" in meta["pagination"] + + def test_resources_pagination(self, authenticated_client, finding_groups_fixture): + """Test pagination on resources endpoint.""" + response = authenticated_client.get( + reverse( + "finding-group-resources", kwargs={"pk": "s3_bucket_public_access"} + ), + {"filter[inserted_at]": TODAY, "page[size]": 1}, + ) + assert response.status_code == status.HTTP_200_OK + assert "meta" in response.json() + + def test_finding_groups_ordering_default( + self, authenticated_client, finding_groups_fixture + ): + """Test default ordering (-fail_count, -severity, check_id).""" + response = authenticated_client.get( + reverse("finding-group-list"), {"filter[inserted_at]": TODAY} + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # First results should have highest fail_count or critical severity + # s3_bucket_public_access has 2 fails with critical severity + assert data[0]["id"] in ["s3_bucket_public_access", "cloudtrail_enabled"] + + def test_finding_groups_ordering_custom( + self, authenticated_client, finding_groups_fixture + ): + """Test custom sort parameter.""" + response = authenticated_client.get( + reverse("finding-group-list"), + {"filter[inserted_at]": TODAY, "sort": "check_id"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # Results should be in alphabetical order by check_id + check_ids = [item["id"] for item in data] + assert check_ids == sorted(check_ids) + + def test_finding_groups_latest_no_date_filter_required( + self, authenticated_client, finding_groups_fixture + ): + """Test that /latest endpoint works without date filters.""" + response = authenticated_client.get( + reverse("finding-group-latest"), + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # Should return all 5 checks from the fixture + assert len(data) == 5 + + def test_finding_groups_latest_empty(self, authenticated_client): + """Test /latest returns empty list when no summaries exist.""" + response = authenticated_client.get( + reverse("finding-group-latest"), + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 0 + + def test_finding_groups_latest_provider_id_filter( + self, authenticated_client, finding_groups_fixture, providers_fixture + ): + """Test /latest with provider_id filter returns only that provider's data.""" + provider1 = providers_fixture[0] # Has 4 checks + provider2 = providers_fixture[1] # Has 1 check + + # Filter by provider1 + response = authenticated_client.get( + reverse("finding-group-latest"), + {"filter[provider_id]": str(provider1.id)}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 4 + check_ids = {item["id"] for item in data} + assert "cloudtrail_enabled" not in check_ids + + # Filter by provider2 + response = authenticated_client.get( + reverse("finding-group-latest"), + {"filter[provider_id]": str(provider2.id)}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + assert data[0]["id"] == "cloudtrail_enabled" + + def test_finding_groups_latest_provider_type_filter( + self, authenticated_client, finding_groups_fixture + ): + """Test /latest with provider_type filter.""" + response = authenticated_client.get( + reverse("finding-group-latest"), + {"filter[provider_type]": "aws"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # All providers in fixture are AWS + assert len(data) == 5 + + def test_finding_groups_latest_check_id_filter( + self, authenticated_client, finding_groups_fixture + ): + """Test /latest with check_id filter.""" + response = authenticated_client.get( + reverse("finding-group-latest"), + {"filter[check_id]": "s3_bucket_public_access"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + assert len(data) == 1 + assert data[0]["id"] == "s3_bucket_public_access" + + def test_finding_groups_latest_custom_sort( + self, authenticated_client, finding_groups_fixture + ): + """Test /latest with custom sort parameter.""" + response = authenticated_client.get( + reverse("finding-group-latest"), + {"sort": "check_id"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + check_ids = [item["id"] for item in data] + assert check_ids == sorted(check_ids) + + def test_finding_groups_latest_ignores_date_filters( + self, authenticated_client, finding_groups_fixture + ): + """Test that /latest ignores any date filters passed in params.""" + # Even with an old date filter, /latest should return current data + response = authenticated_client.get( + reverse("finding-group-latest"), + {"filter[inserted_at]": "2020-01-01"}, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json()["data"] + # Should still return data, not filtered by the old date + assert len(data) == 5 diff --git a/api/src/backend/api/v1/serializers.py b/api/src/backend/api/v1/serializers.py index 8cb7ef49cb..18619a2384 100644 --- a/api/src/backend/api/v1/serializers.py +++ b/api/src/backend/api/v1/serializers.py @@ -4051,3 +4051,98 @@ class ResourceEventSerializer(BaseSerializerV1): class Meta: resource_name = "resource-events" + + +# Finding Groups - Virtual aggregation entities + + +class FindingGroupSerializer(BaseSerializerV1): + """ + Serializer for Finding Groups - aggregated findings by check_id. + + This is a non-model serializer since FindingGroup is a virtual entity + created by aggregating the Finding model. + """ + + id = serializers.CharField(source="check_id") + check_id = serializers.CharField() + check_title = serializers.CharField(required=False, allow_null=True) + check_description = serializers.CharField(required=False, allow_null=True) + severity = serializers.CharField() + status = serializers.CharField() + impacted_providers = serializers.ListField( + child=serializers.CharField(), required=False + ) + resources_fail = serializers.IntegerField() + resources_total = serializers.IntegerField() + pass_count = serializers.IntegerField() + fail_count = serializers.IntegerField() + muted_count = serializers.IntegerField() + new_count = serializers.IntegerField() + changed_count = serializers.IntegerField() + first_seen_at = serializers.DateTimeField(required=False, allow_null=True) + last_seen_at = serializers.DateTimeField(required=False, allow_null=True) + failing_since = serializers.DateTimeField(required=False, allow_null=True) + + class JSONAPIMeta: + resource_name = "finding-groups" + + +class FindingGroupResourceSerializer(BaseSerializerV1): + """ + Serializer for Finding Group Resources - resources within a finding group. + + Returns individual resources with their current status, severity, + and timing information. + """ + + id = serializers.UUIDField(source="resource_id") + resource = serializers.SerializerMethodField() + provider = serializers.SerializerMethodField() + status = serializers.CharField() + severity = serializers.CharField() + first_seen_at = serializers.DateTimeField(required=False, allow_null=True) + last_seen_at = serializers.DateTimeField(required=False, allow_null=True) + + class JSONAPIMeta: + resource_name = "finding-group-resources" + + @extend_schema_field( + { + "type": "object", + "properties": { + "uid": {"type": "string"}, + "name": {"type": "string"}, + "service": {"type": "string"}, + "region": {"type": "string"}, + "type": {"type": "string"}, + }, + } + ) + def get_resource(self, obj): + """Return nested resource object.""" + return { + "uid": obj.get("resource_uid", ""), + "name": obj.get("resource_name", ""), + "service": obj.get("resource_service", ""), + "region": obj.get("resource_region", ""), + "type": obj.get("resource_type", ""), + } + + @extend_schema_field( + { + "type": "object", + "properties": { + "type": {"type": "string"}, + "uid": {"type": "string"}, + "alias": {"type": "string"}, + }, + } + ) + def get_provider(self, obj): + """Return nested provider object.""" + return { + "type": obj.get("provider_type", ""), + "uid": obj.get("provider_uid", ""), + "alias": obj.get("provider_alias", ""), + } diff --git a/api/src/backend/api/v1/urls.py b/api/src/backend/api/v1/urls.py index 840f027b42..f2578c9d95 100644 --- a/api/src/backend/api/v1/urls.py +++ b/api/src/backend/api/v1/urls.py @@ -10,6 +10,7 @@ from api.v1.views import ( CustomTokenObtainView, CustomTokenRefreshView, CustomTokenSwitchTenantView, + FindingGroupViewSet, FindingViewSet, GithubSocialLoginView, GoogleSocialLoginView, @@ -60,6 +61,7 @@ router.register( router.register(r"tasks", TaskViewSet, basename="task") router.register(r"resources", ResourceViewSet, basename="resource") router.register(r"findings", FindingViewSet, basename="finding") +router.register(r"finding-groups", FindingGroupViewSet, basename="finding-group") router.register(r"roles", RoleViewSet, basename="role") router.register( r"compliance-overviews", ComplianceOverviewViewSet, basename="complianceoverview" diff --git a/api/src/backend/api/v1/views.py b/api/src/backend/api/v1/views.py index 3f0f8c7a8f..e844de92ac 100644 --- a/api/src/backend/api/v1/views.py +++ b/api/src/backend/api/v1/views.py @@ -24,7 +24,7 @@ from config.settings.social_login import ( ) from dj_rest_auth.registration.views import SocialLoginView from django.conf import settings as django_settings -from django.contrib.postgres.aggregates import ArrayAgg +from django.contrib.postgres.aggregates import ArrayAgg, StringAgg from django.contrib.postgres.search import SearchQuery from django.db import transaction from django.db.models import ( @@ -35,8 +35,10 @@ from django.db.models import ( F, IntegerField, Max, + Min, Prefetch, Q, + QuerySet, Subquery, Sum, Value, @@ -99,6 +101,7 @@ from api.compliance import ( PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE, get_compliance_frameworks, ) +from api.constants import SEVERITY_ORDER from api.db_router import MainRouter from api.db_utils import rls_transaction from api.exceptions import ( @@ -117,10 +120,14 @@ from api.filters import ( CustomDjangoFilterBackend, DailySeveritySummaryFilter, FindingFilter, + FindingGroupFilter, + FindingGroupSummaryFilter, IntegrationFilter, IntegrationJiraFindingsFilter, InvitationFilter, LatestFindingFilter, + LatestFindingGroupFilter, + LatestFindingGroupSummaryFilter, LatestResourceFilter, LighthouseProviderConfigFilter, LighthouseProviderModelsFilter, @@ -149,6 +156,7 @@ from api.models import ( ComplianceRequirementOverview, DailySeveritySummary, Finding, + FindingGroupDailySummary, Integration, Invitation, LighthouseConfiguration, @@ -210,6 +218,8 @@ from api.v1.serializers import ( ComplianceOverviewSerializer, ComplianceWatchlistOverviewSerializer, FindingDynamicFilterSerializer, + FindingGroupResourceSerializer, + FindingGroupSerializer, FindingMetadataSerializer, FindingSerializer, FindingsSeverityOverTimeSerializer, @@ -6547,3 +6557,660 @@ class MuteRuleViewSet(BaseRLSViewSet): data=serializer.data, status=status.HTTP_201_CREATED, ) + + +SEVERITY_ORDER_REVERSE = {v: k for k, v in SEVERITY_ORDER.items()} + + +@extend_schema_view( + list=extend_schema( + summary="List finding groups", + description=""" + Retrieve aggregated findings grouped by check_id. + + Each group shows: + - Aggregated status (FAIL if any non-muted failure) + - Maximum severity across all findings + - Resource counts (failing vs total) + - Finding counts by status and delta + - Affected provider types + + At least one date filter is required for performance reasons. + """, + tags=["Finding Groups"], + ), + retrieve=extend_schema(exclude=True), +) +class FindingGroupViewSet(BaseRLSViewSet): + """ + ViewSet for Finding Groups - aggregates findings by check_id. + + This endpoint provides a summary view of security checks, aggregating + metrics across all findings for each unique check_id. This enables + security analysts to see which checks are failing across their + infrastructure without scrolling through thousands of individual findings. + + Uses pre-aggregated FindingGroupDailySummary table for efficient queries. + Daily summaries are re-aggregated across the requested date range. + """ + + queryset = FindingGroupDailySummary.objects.all() + serializer_class = FindingGroupSerializer + filterset_class = FindingGroupSummaryFilter + http_method_names = ["get"] + required_permissions = [] + + def get_filterset_class(self): + """Return appropriate filter based on action.""" + if self.action == "latest": + return LatestFindingGroupSummaryFilter + return FindingGroupSummaryFilter + + def get_queryset(self): + """Get the base FindingGroupDailySummary queryset with RLS filtering.""" + tenant_id = self.request.tenant_id + role = get_role(self.request.user) + queryset = FindingGroupDailySummary.objects.filter(tenant_id=tenant_id) + + if not role.unlimited_visibility: + queryset = queryset.filter(provider__in=get_providers(role)) + + return queryset + + def _get_finding_queryset(self): + """Get the Finding queryset for resources drill-down (with RBAC).""" + role = get_role(self.request.user) + providers = get_providers(role) + + tenant_id = self.request.tenant_id + queryset = Finding.all_objects.filter(tenant_id=tenant_id) + + # Apply RBAC provider filtering + if not role.unlimited_visibility: + queryset = queryset.filter(scan__provider_id__in=providers) + + return queryset + + def _normalize_jsonapi_params(self, query_params): + """Convert JSON:API filter params (filter[X]) to flat params (X).""" + normalized = QueryDict(mutable=True) + for key, values in query_params.lists(): + normalized_key = ( + key[7:-1] if key.startswith("filter[") and key.endswith("]") else key + ) + # Convert JSON:API dot notation to Django double underscore + normalized_key = normalized_key.replace(".", "__") + normalized.setlist(normalized_key, values) + return normalized + + @extend_schema(exclude=True) + def retrieve(self, request, *args, **kwargs): + raise MethodNotAllowed(method="GET") + + RESOURCE_FILTER_MAP = { + "resources": "id__in", + "resource_uid": "uid", + "resource_uid__in": "uid__in", + "resource_uid__icontains": "uid__icontains", + "resource_name": "name", + "resource_name__in": "name__in", + "resource_name__icontains": "name__icontains", + "resource_type": "type", + "resource_type__in": "type__in", + "resource_type__icontains": "type__icontains", + } + + def _split_resource_filters(self, params: QueryDict) -> tuple[QueryDict, QueryDict]: + resource_keys = set(self.RESOURCE_FILTER_MAP) + finding_params = QueryDict(mutable=True) + resource_params = QueryDict(mutable=True) + for key, values in params.lists(): + if key in resource_keys: + resource_params.setlist(key, values) + else: + finding_params.setlist(key, values) + return finding_params, resource_params + + def _resource_ids_from_params( + self, params: QueryDict, tenant_id: str | None + ) -> QuerySet | None: + if not params: + return None + + queryset = Resource.objects.all() + if tenant_id: + queryset = queryset.filter(tenant_id=tenant_id) + + filter_params = QueryDict(mutable=True) + for key, mapped_key in self.RESOURCE_FILTER_MAP.items(): + if key not in params: + continue + if key == "resources" or key.endswith("__in"): + values = params.getlist(key) + items: list[str] = [] + for value in values: + if value is None: + continue + for part in value.split(","): + part = part.strip() + if part: + items.append(part) + if items: + filter_params.setlist(mapped_key, [",".join(items)]) + else: + value = params.get(key) + if value: + filter_params.setlist(mapped_key, [value]) + + if not filter_params: + return None + + filterset = LatestResourceFilter(filter_params, queryset=queryset) + if not filterset.is_valid(): + raise ValidationError(filterset.errors) + + return filterset.qs.values("id") + + def _aggregate_daily_summaries(self, queryset): + """ + Re-aggregate daily summaries across the date range. + + Takes pre-computed daily summaries and aggregates them by check_id + to produce totals across the selected date range. + """ + from django.db.models import CharField + from django.db.models.functions import Cast + + return queryset.values("check_id").annotate( + # Max severity across days + severity_order=Max("severity_order"), + # Sum counts across days + pass_count=Sum("pass_count"), + fail_count=Sum("fail_count"), + muted_count=Sum("muted_count"), + new_count=Sum("new_count"), + changed_count=Sum("changed_count"), + resources_total=Sum("resources_total"), + resources_fail=Sum("resources_fail"), + # Collect provider types using StringAgg (cast enum to text first) + impacted_providers_str=StringAgg( + Cast("provider__provider", CharField()), + delimiter=",", + distinct=True, + default="", + ), + # Min/Max timing across days + first_seen_at=Min("first_seen_at"), + last_seen_at=Max("last_seen_at"), + failing_since=Min("failing_since"), + # Get check metadata from first row (same for all days) + check_title=Max("check_title"), + check_description=Max("check_description"), + ) + + def _post_process_aggregation(self, aggregated_data): + """ + Post-process aggregation results to add computed fields. + + - Converts severity integer back to string + - Computes aggregated status (FAIL > PASS > MUTED) + - Converts provider string to list + """ + results = [] + for row in aggregated_data: + # Convert severity order back to string + severity_order = row.get("severity_order", 1) + row["severity"] = SEVERITY_ORDER_REVERSE.get( + severity_order, "informational" + ) + + # Compute aggregated status + if row.get("fail_count", 0) > 0: + row["status"] = "FAIL" + elif row.get("pass_count", 0) > 0: + row["status"] = "PASS" + else: + row["status"] = "MUTED" + + # Convert provider string to list + providers_str = row.pop("impacted_providers_str", "") or "" + row["impacted_providers"] = [ + p.strip() for p in providers_str.split(",") if p.strip() + ] + + results.append(row) + + return results + + def _validate_sort_fields(self, sort_param): + """Validate and map JSON:API sort fields for aggregated finding groups.""" + sort_field_map = { + "check_id": "check_id", + "severity": "severity_order", + "fail_count": "fail_count", + "pass_count": "pass_count", + "muted_count": "muted_count", + "new_count": "new_count", + "changed_count": "changed_count", + "resources_total": "resources_total", + "resources_fail": "resources_fail", + "first_seen_at": "first_seen_at", + "last_seen_at": "last_seen_at", + "failing_since": "failing_since", + } + + ordering = [] + for field in sort_param.split(","): + field = field.strip() + if not field: + continue + is_desc = field.startswith("-") + raw_field = field[1:] if is_desc else field + if raw_field not in sort_field_map: + # Validate sort fields explicitly to return JSON:API 400 instead of FieldError. + raise ValidationError( + [ + { + "detail": f"invalid sort parameter: {raw_field}", + "status": "400", + "source": {"pointer": "/data"}, + "code": "invalid", + } + ] + ) + mapped_field = sort_field_map[raw_field] + ordering.append(f"-{mapped_field}" if is_desc else mapped_field) + + return ordering + + def _build_resource_mapping_queryset( + self, filtered_queryset, resource_ids=None, tenant_id: str | None = None + ): + """ + Build resource mapping queryset using a filtered findings subquery. + + Starting from ResourceFindingMapping avoids scanning all mappings + before applying check_id/date filters on findings. + """ + finding_ids = filtered_queryset.order_by().values("id") + + mapping_queryset = ResourceFindingMapping.objects.filter( + finding_id__in=Subquery(finding_ids) + ) + if tenant_id: + mapping_queryset = mapping_queryset.filter(tenant_id=tenant_id) + if resource_ids is not None: + if isinstance(resource_ids, QuerySet): + mapping_queryset = mapping_queryset.filter( + resource_id__in=Subquery(resource_ids) + ) + else: + mapping_queryset = mapping_queryset.filter(resource_id__in=resource_ids) + + return mapping_queryset + + def _build_resource_aggregation( + self, filtered_queryset, resource_ids=None, tenant_id: str | None = None + ): + """Build resource aggregation using a filtered findings subquery.""" + mapping_queryset = self._build_resource_mapping_queryset( + filtered_queryset, resource_ids=resource_ids, tenant_id=tenant_id + ) + + return ( + mapping_queryset.values("resource_id") + .annotate( + resource_uid=Max("resource__uid"), + resource_name=Max("resource__name"), + resource_service=Max("resource__service"), + resource_region=Max("resource__region"), + resource_type=Max("resource__type"), + provider_type=Max("resource__provider__provider"), + provider_uid=Max("resource__provider__uid"), + provider_alias=Max("resource__provider__alias"), + status_order=Max( + Case( + When( + finding__status="FAIL", + finding__muted=False, + then=Value(3), + ), + When( + finding__status="PASS", + finding__muted=False, + then=Value(2), + ), + default=Value(1), + output_field=IntegerField(), + ) + ), + severity_order=Max( + Case( + *[ + When(finding__severity=severity, then=Value(order)) + for severity, order in SEVERITY_ORDER.items() + ], + output_field=IntegerField(), + ) + ), + first_seen_at=Min("finding__first_seen_at"), + last_seen_at=Max("finding__inserted_at"), + ) + .filter(resource_id__isnull=False) + .order_by("resource_id") + ) + + def _post_process_resources(self, resource_data): + """Convert resource aggregation rows to API output.""" + results = [] + for row in resource_data: + severity_order = row.get("severity_order", 1) + status_order = row.get("status_order", 1) + if status_order == 3: + status = "FAIL" + elif status_order == 2: + status = "PASS" + else: + status = "MUTED" + + results.append( + { + "resource_id": row["resource_id"], + "resource_uid": row["resource_uid"], + "resource_name": row["resource_name"], + "resource_service": row["resource_service"], + "resource_region": row["resource_region"], + "resource_type": row["resource_type"], + "provider_type": row["provider_type"], + "provider_uid": row["provider_uid"], + "provider_alias": row["provider_alias"], + "status": status, + "severity": SEVERITY_ORDER_REVERSE.get( + severity_order, "informational" + ), + "first_seen_at": row["first_seen_at"], + "last_seen_at": row["last_seen_at"], + } + ) + + return results + + def list(self, request, *args, **kwargs): + """ + List finding groups with aggregation and filtering. + + Returns findings grouped by check_id with aggregated metrics. + Requires at least one date filter for performance. + Uses pre-aggregated daily summaries for efficient queries. + """ + queryset = self.get_queryset() + + # Apply filters + normalized_params = self._normalize_jsonapi_params(request.query_params) + filterset = self.filterset_class(normalized_params, queryset=queryset) + if not filterset.is_valid(): + raise ValidationError(filterset.errors) + filtered_queryset = filterset.qs + + # Re-aggregate daily summaries across the date range + aggregated_queryset = self._aggregate_daily_summaries(filtered_queryset) + + # Apply ordering (respect JSON:API sort param or use default) + sort_param = request.query_params.get("sort") + if sort_param: + # Convert JSON:API sort notation (prefix '-' for descending) + ordering = self._validate_sort_fields(sort_param) + if ordering: + aggregated_queryset = aggregated_queryset.order_by(*ordering) + else: + # Default ordering: failures first, then severity, then check_id + aggregated_queryset = aggregated_queryset.order_by( + "-fail_count", "-severity_order", "check_id" + ) + + # Paginate + page = self.paginate_queryset(aggregated_queryset) + if page is not None: + # Post-process the page + processed_data = self._post_process_aggregation(page) + serializer = self.get_serializer(processed_data, many=True) + return self.get_paginated_response(serializer.data) + + # Post-process all results (no pagination) + processed_data = self._post_process_aggregation(aggregated_queryset) + serializer = self.get_serializer(processed_data, many=True) + return Response(serializer.data) + + @extend_schema( + summary="List latest finding groups", + description=""" + Retrieve the latest available state for each finding group (check_id). + + This endpoint returns finding groups without requiring date filters, + automatically using the latest available data per check_id. + All other filters (provider_id, provider_type, check_id) are still supported. + """, + tags=["Finding Groups"], + ) + @action(detail=False, methods=["get"], url_name="latest") + def latest(self, request): + """ + List the latest finding group state per check_id. + + Returns findings grouped by check_id using the latest available + inserted_at date per check_id, without requiring date filters. + """ + queryset = self.get_queryset() + + # Apply other filters (provider_id, provider_type, check_id, etc.) + normalized_params = self._normalize_jsonapi_params(request.query_params) + # Remove date filters since we're using latest + for key in list(normalized_params.keys()): + if key.startswith("inserted_at"): + del normalized_params[key] + + filterset_class = self.get_filterset_class() + filterset = filterset_class(normalized_params, queryset=queryset) + if not filterset.is_valid(): + raise ValidationError(filterset.errors) + filtered_queryset = filterset.qs + + # Keep only rows from the latest inserted_at date per check_id + latest_per_check = filtered_queryset.annotate( + latest_inserted_at=Window( + expression=Max("inserted_at"), + partition_by=[F("check_id")], + ) + ).filter(inserted_at=F("latest_inserted_at")) + + # Re-aggregate daily summaries + aggregated_queryset = self._aggregate_daily_summaries(latest_per_check) + + # Apply ordering + sort_param = request.query_params.get("sort") + if sort_param: + ordering = self._validate_sort_fields(sort_param) + if ordering: + aggregated_queryset = aggregated_queryset.order_by(*ordering) + else: + aggregated_queryset = aggregated_queryset.order_by( + "-fail_count", "-severity_order", "check_id" + ) + + # Paginate + page = self.paginate_queryset(aggregated_queryset) + if page is not None: + processed_data = self._post_process_aggregation(page) + serializer = self.get_serializer(processed_data, many=True) + return self.get_paginated_response(serializer.data) + + processed_data = self._post_process_aggregation(aggregated_queryset) + serializer = self.get_serializer(processed_data, many=True) + return Response(serializer.data) + + @extend_schema( + summary="List resources for a finding group", + description=""" + Retrieve resources affected by a specific check (finding group). + + Returns individual resources with their current status, severity, + and timing information including how long they have been failing. + """, + tags=["Finding Groups"], + ) + @action(detail=True, methods=["get"], url_path="resources") + def resources(self, request, pk=None): + """ + List resources for a specific finding group (check_id). + + Returns resources with their status, severity, and provider info + for the specified check_id. Uses Finding table for resource details. + """ + check_id = pk + queryset = self._get_finding_queryset() + + # Apply date filters from request to Finding queryset + normalized_params = self._normalize_jsonapi_params(request.query_params) + finding_params, resource_params = self._split_resource_filters( + normalized_params + ) + + filterset = FindingGroupFilter(finding_params, queryset=queryset) + if not filterset.is_valid(): + raise ValidationError(filterset.errors) + filtered_queryset = filterset.qs + + # Filter by check_id + filtered_queryset = filtered_queryset.filter(check_id=check_id) + + # Check if any findings exist for this check_id + if not filtered_queryset.exists(): + raise NotFound(f"Finding group '{check_id}' not found.") + + resource_ids = self._resource_ids_from_params( + resource_params, request.tenant_id + ) + mapping_queryset = self._build_resource_mapping_queryset( + filtered_queryset, + resource_ids=resource_ids, + tenant_id=request.tenant_id, + ) + resource_id_queryset = ( + mapping_queryset.values_list("resource_id", flat=True) + .distinct() + .order_by("resource_id") + ) + + page_ids = self.paginate_queryset(resource_id_queryset) + if page_ids is not None: + resource_data = self._build_resource_aggregation( + filtered_queryset, + resource_ids=page_ids, + tenant_id=request.tenant_id, + ) + results = self._post_process_resources(resource_data) + serializer = FindingGroupResourceSerializer(results, many=True) + return self.get_paginated_response(serializer.data) + + resource_data = self._build_resource_aggregation( + filtered_queryset, + resource_ids=resource_ids, + tenant_id=request.tenant_id, + ) + results = self._post_process_resources(resource_data) + serializer = FindingGroupResourceSerializer(results, many=True) + return Response(serializer.data) + + @extend_schema( + summary="List resources for a finding group from latest scans", + description=""" + Retrieve resources affected by a specific check (finding group) from the + latest completed scan for each provider. + + Returns individual resources with their current status, severity, + and timing information. No date filters required. + """, + tags=["Finding Groups"], + ) + @action( + detail=False, + methods=["get"], + url_path="latest/(?P[^/.]+)/resources", + url_name="latest_resources", + ) + def latest_resources(self, request, check_id=None): + """ + List resources for a specific finding group from the latest scan. + + Similar to `resources` but automatically filters to only include + findings from the most recent completed scan for each provider. + """ + tenant_id = request.tenant_id + queryset = self._get_finding_queryset() + + # Get latest completed scan for each provider + latest_scan_ids = ( + Scan.objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED) + .order_by("provider_id", "-inserted_at") + .distinct("provider_id") + .values_list("id", flat=True) + ) + + normalized_params = self._normalize_jsonapi_params(request.query_params) + # Remove date filters since we're using latest + for key in list(normalized_params.keys()): + if key.startswith("inserted_at"): + del normalized_params[key] + + finding_params, resource_params = self._split_resource_filters( + normalized_params + ) + + filterset = LatestFindingGroupFilter(finding_params, queryset=queryset) + if not filterset.is_valid(): + raise ValidationError(filterset.errors) + filtered_queryset = filterset.qs + + # Filter to latest scans and check_id + filtered_queryset = filtered_queryset.filter( + scan_id__in=latest_scan_ids, + check_id=check_id, + ) + + # Check if any findings exist for this check_id + if not filtered_queryset.exists(): + raise NotFound(f"Finding group '{check_id}' not found.") + + resource_ids = self._resource_ids_from_params( + resource_params, request.tenant_id + ) + mapping_queryset = self._build_resource_mapping_queryset( + filtered_queryset, + resource_ids=resource_ids, + tenant_id=request.tenant_id, + ) + resource_id_queryset = ( + mapping_queryset.values_list("resource_id", flat=True) + .distinct() + .order_by("resource_id") + ) + + page_ids = self.paginate_queryset(resource_id_queryset) + if page_ids is not None: + resource_data = self._build_resource_aggregation( + filtered_queryset, + resource_ids=page_ids, + tenant_id=request.tenant_id, + ) + results = self._post_process_resources(resource_data) + serializer = FindingGroupResourceSerializer(results, many=True) + return self.get_paginated_response(serializer.data) + + resource_data = self._build_resource_aggregation( + filtered_queryset, + resource_ids=resource_ids, + tenant_id=request.tenant_id, + ) + results = self._post_process_resources(resource_data) + serializer = FindingGroupResourceSerializer(results, many=True) + return Response(serializer.data) diff --git a/api/src/backend/conftest.py b/api/src/backend/conftest.py index 189d5a31d5..209292ffad 100644 --- a/api/src/backend/conftest.py +++ b/api/src/backend/conftest.py @@ -678,21 +678,25 @@ def scans_fixture(tenants_fixture, providers_fixture): tenant, *_ = tenants_fixture provider, provider2, *_ = providers_fixture + now = datetime.now(timezone.utc) + scan1 = Scan.objects.create( name="Scan 1", provider=provider, trigger=Scan.TriggerChoices.MANUAL, state=StateChoices.COMPLETED, tenant_id=tenant.id, - started_at="2024-01-02T00:00:00Z", + started_at=now, + completed_at=now, ) scan2 = Scan.objects.create( name="Scan 2", - provider=provider, + provider=provider2, trigger=Scan.TriggerChoices.SCHEDULED, - state=StateChoices.FAILED, + state=StateChoices.COMPLETED, tenant_id=tenant.id, - started_at="2024-01-02T00:00:00Z", + started_at=now, + completed_at=now, ) scan3 = Scan.objects.create( name="Scan 3", @@ -1954,6 +1958,275 @@ def tenant_compliance_summary_fixture(tenants_fixture): return summaries +@pytest.fixture +def finding_groups_fixture( + tenants_fixture, providers_fixture, scans_fixture, resources_fixture +): + """ + Create a comprehensive set of findings for testing Finding Groups aggregation. + + Creates findings for multiple check_ids with varying: + - Statuses (PASS, FAIL) + - Severities (critical, high, medium, low) + - Deltas (new, changed, None) + - Muted states (True, False) + + This fixture tests aggregation logic for: + - Multiple findings per check_id + - Status aggregation (FAIL > PASS > MUTED) + - Severity aggregation (max severity) + - Provider aggregation (distinct list) + - Resource counts + - Finding counts (pass, fail, muted, new, changed) + """ + tenant = tenants_fixture[0] + provider1, provider2, *_ = providers_fixture + scan1, scan2, *_ = scans_fixture + resource1, resource2, *_ = resources_fixture + + findings = [] + + # Check 1: s3_bucket_public_access - Multiple FAIL findings (critical) + # Should aggregate to: status=FAIL, severity=critical, fail_count=2, pass_count=0 + finding1a = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_s3_check_1a", + scan=scan1, + delta="new", + status=Status.FAIL, + status_extended="S3 bucket allows public access", + impact=Severity.critical, + impact_extended="Critical security risk", + severity=Severity.critical, + raw_result={"status": Status.FAIL, "severity": Severity.critical}, + tags={"env": "prod"}, + check_id="s3_bucket_public_access", + check_metadata={ + "CheckId": "s3_bucket_public_access", + "checktitle": "Ensure S3 buckets do not allow public access", + "Description": "S3 buckets should be configured to restrict public access.", + }, + first_seen_at="2024-01-02T00:00:00Z", + muted=False, + ) + finding1a.add_resources([resource1]) + findings.append(finding1a) + + finding1b = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_s3_check_1b", + scan=scan1, + delta="changed", + status=Status.FAIL, + status_extended="S3 bucket allows public read", + impact=Severity.high, + impact_extended="High security risk", + severity=Severity.high, + raw_result={"status": Status.FAIL, "severity": Severity.high}, + tags={"env": "staging"}, + check_id="s3_bucket_public_access", + check_metadata={ + "CheckId": "s3_bucket_public_access", + "checktitle": "Ensure S3 buckets do not allow public access", + "Description": "S3 buckets should be configured to restrict public access.", + }, + first_seen_at="2024-01-03T00:00:00Z", + muted=False, + ) + finding1b.add_resources([resource2]) + findings.append(finding1b) + + # Check 2: ec2_instance_public_ip - Mixed PASS/FAIL (high severity max) + # Should aggregate to: status=FAIL, severity=high, fail_count=1, pass_count=1 + finding2a = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_ec2_check_2a", + scan=scan1, + delta=None, + status=Status.PASS, + status_extended="EC2 instance has no public IP", + impact=Severity.medium, + impact_extended="Medium risk", + severity=Severity.medium, + raw_result={"status": Status.PASS, "severity": Severity.medium}, + tags={"env": "dev"}, + check_id="ec2_instance_public_ip", + check_metadata={ + "CheckId": "ec2_instance_public_ip", + "checktitle": "Ensure EC2 instances do not have public IPs", + "Description": "EC2 instances should use private IPs only.", + }, + first_seen_at="2024-01-04T00:00:00Z", + muted=False, + ) + finding2a.add_resources([resource1]) + findings.append(finding2a) + + finding2b = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_ec2_check_2b", + scan=scan1, + delta="new", + status=Status.FAIL, + status_extended="EC2 instance has public IP assigned", + impact=Severity.high, + impact_extended="High risk", + severity=Severity.high, + raw_result={"status": Status.FAIL, "severity": Severity.high}, + tags={"env": "prod"}, + check_id="ec2_instance_public_ip", + check_metadata={ + "CheckId": "ec2_instance_public_ip", + "checktitle": "Ensure EC2 instances do not have public IPs", + "Description": "EC2 instances should use private IPs only.", + }, + first_seen_at="2024-01-05T00:00:00Z", + muted=False, + ) + finding2b.add_resources([resource2]) + findings.append(finding2b) + + # Check 3: iam_password_policy - All PASS (low severity) + # Should aggregate to: status=PASS, severity=low, fail_count=0, pass_count=2 + finding3a = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_iam_check_3a", + scan=scan1, + delta=None, + status=Status.PASS, + status_extended="Password policy is compliant", + impact=Severity.low, + impact_extended="Low risk", + severity=Severity.low, + raw_result={"status": Status.PASS, "severity": Severity.low}, + tags={"env": "prod"}, + check_id="iam_password_policy", + check_metadata={ + "CheckId": "iam_password_policy", + "checktitle": "Ensure IAM password policy is strong", + "Description": "IAM password policy should enforce complexity.", + }, + first_seen_at="2024-01-06T00:00:00Z", + muted=False, + ) + finding3a.add_resources([resource1]) + findings.append(finding3a) + + finding3b = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_iam_check_3b", + scan=scan1, + delta=None, + status=Status.PASS, + status_extended="Password policy meets requirements", + impact=Severity.low, + impact_extended="Low risk", + severity=Severity.low, + raw_result={"status": Status.PASS, "severity": Severity.low}, + tags={"env": "staging"}, + check_id="iam_password_policy", + check_metadata={ + "CheckId": "iam_password_policy", + "checktitle": "Ensure IAM password policy is strong", + "Description": "IAM password policy should enforce complexity.", + }, + first_seen_at="2024-01-07T00:00:00Z", + muted=False, + ) + finding3b.add_resources([resource2]) + findings.append(finding3b) + + # Check 4: rds_encryption - All muted (medium severity) + # Should aggregate to: status=MUTED, severity=medium, fail_count=0, pass_count=0, muted_count=2 + finding4a = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_rds_check_4a", + scan=scan1, + delta=None, + status=Status.FAIL, + status_extended="RDS instance not encrypted", + impact=Severity.medium, + impact_extended="Medium risk", + severity=Severity.medium, + raw_result={"status": Status.FAIL, "severity": Severity.medium}, + tags={"env": "dev"}, + check_id="rds_encryption", + check_metadata={ + "CheckId": "rds_encryption", + "checktitle": "Ensure RDS instances are encrypted", + "Description": "RDS instances should use encryption at rest.", + }, + first_seen_at="2024-01-08T00:00:00Z", + muted=True, + ) + finding4a.add_resources([resource1]) + findings.append(finding4a) + + finding4b = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_rds_check_4b", + scan=scan1, + delta=None, + status=Status.FAIL, + status_extended="RDS encryption disabled", + impact=Severity.medium, + impact_extended="Medium risk", + severity=Severity.medium, + raw_result={"status": Status.FAIL, "severity": Severity.medium}, + tags={"env": "test"}, + check_id="rds_encryption", + check_metadata={ + "CheckId": "rds_encryption", + "checktitle": "Ensure RDS instances are encrypted", + "Description": "RDS instances should use encryption at rest.", + }, + first_seen_at="2024-01-09T00:00:00Z", + muted=True, + ) + finding4b.add_resources([resource2]) + findings.append(finding4b) + + # Check 5: cloudtrail_enabled - Multiple providers (from scan2 which uses provider2) + # Should aggregate to: impacted_providers contains both provider types + finding5 = Finding.objects.create( + tenant_id=tenant.id, + uid="fg_cloudtrail_check_5", + scan=scan2, + delta="new", + status=Status.FAIL, + status_extended="CloudTrail not enabled", + impact=Severity.critical, + impact_extended="Critical risk", + severity=Severity.critical, + raw_result={"status": Status.FAIL, "severity": Severity.critical}, + tags={"env": "prod"}, + check_id="cloudtrail_enabled", + check_metadata={ + "CheckId": "cloudtrail_enabled", + "checktitle": "Ensure CloudTrail is enabled", + "Description": "CloudTrail should be enabled for audit logging.", + }, + first_seen_at="2024-01-10T00:00:00Z", + muted=False, + ) + finding5.add_resources([resource1]) + findings.append(finding5) + + # Aggregate findings into FindingGroupDailySummary for the endpoint to read + from tasks.jobs.scan import aggregate_finding_group_summaries + + aggregate_finding_group_summaries( + tenant_id=str(tenant.id), + scan_id=str(scan1.id), + ) + aggregate_finding_group_summaries( + tenant_id=str(tenant.id), + scan_id=str(scan2.id), + ) + + return findings + + def pytest_collection_modifyitems(items): """Ensure test_rbac.py is executed first.""" items.sort(key=lambda item: 0 if "test_rbac.py" in item.nodeid else 1) diff --git a/api/src/backend/tasks/jobs/backfill.py b/api/src/backend/tasks/jobs/backfill.py index d9985afafb..ff43fb33b3 100644 --- a/api/src/backend/tasks/jobs/backfill.py +++ b/api/src/backend/tasks/jobs/backfill.py @@ -8,7 +8,11 @@ from tasks.jobs.queries import ( COMPLIANCE_UPSERT_PROVIDER_SCORE_SQL, COMPLIANCE_UPSERT_TENANT_SUMMARY_ALL_SQL, ) -from tasks.jobs.scan import aggregate_category_counts, aggregate_resource_group_counts +from tasks.jobs.scan import ( + aggregate_category_counts, + aggregate_finding_group_summaries, + aggregate_resource_group_counts, +) from api.db_router import READ_REPLICA_ALIAS, MainRouter from api.db_utils import ( @@ -552,3 +556,82 @@ def backfill_provider_compliance_scores(tenant_id: str) -> dict: "total_upserted": total_upserted, "tenant_summary_count": tenant_summary_count, } + + +def backfill_finding_group_summaries(tenant_id: str, days: int = None): + """ + Backfill FindingGroupDailySummary from completed scans. + + Iterates over completed scans and aggregates findings by check_id + to create daily summary records. + + Args: + tenant_id: Tenant that owns the scans. + days: Optional limit on how many days back to backfill. + + Returns: + dict: Statistics about the backfill operation. + """ + scans_processed = 0 + scans_skipped = 0 + total_created = 0 + total_updated = 0 + + with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS): + scan_filter = { + "tenant_id": tenant_id, + "state": StateChoices.COMPLETED, + "completed_at__isnull": False, + } + + if days is not None: + cutoff_date = timezone.now() - timedelta(days=days) + scan_filter["completed_at__gte"] = cutoff_date + + completed_scans = ( + Scan.objects.filter(**scan_filter) + .order_by("-completed_at") + .values("id", "completed_at") + ) + + if not completed_scans: + return {"status": "no scans to backfill"} + + # Keep only latest scan per day + latest_scans_by_day = {} + for scan in completed_scans: + key = scan["completed_at"].date() + if key not in latest_scans_by_day: + latest_scans_by_day[key] = scan + + # Process each day's scan + for scan_date, scan in latest_scans_by_day.items(): + scan_id = str(scan["id"]) + + try: + result = aggregate_finding_group_summaries(tenant_id, scan_id) + if result.get("status") == "completed": + scans_processed += 1 + total_created += result.get("created", 0) + total_updated += result.get("updated", 0) + else: + scans_skipped += 1 + except Exception as e: + logger.warning( + f"Failed to backfill finding group summaries for scan {scan_id}: {e}" + ) + scans_skipped += 1 + + logger.info( + f"Backfilled finding group summaries for tenant {tenant_id}: " + f"{scans_processed} scans processed, {scans_skipped} skipped, " + f"{total_created} created, {total_updated} updated" + ) + + return { + "status": "backfilled", + "scans_processed": scans_processed, + "scans_skipped": scans_skipped, + "total_created": total_created, + "total_updated": total_updated, + } diff --git a/api/src/backend/tasks/jobs/scan.py b/api/src/backend/tasks/jobs/scan.py index 9697359065..b70ce36a7f 100644 --- a/api/src/backend/tasks/jobs/scan.py +++ b/api/src/backend/tasks/jobs/scan.py @@ -13,7 +13,8 @@ from celery.utils.log import get_task_logger from config.env import env from config.settings.celery import CELERY_DEADLOCK_ATTEMPTS from django.db import IntegrityError, OperationalError -from django.db.models import Case, Count, IntegerField, Prefetch, Q, Sum, When +from django.db.models import Case, Count, IntegerField, Max, Min, Prefetch, Q, Sum, When +from django.utils import timezone as django_timezone from tasks.jobs.queries import ( COMPLIANCE_UPSERT_PROVIDER_SCORE_SQL, COMPLIANCE_UPSERT_TENANT_SUMMARY_SQL, @@ -21,6 +22,7 @@ from tasks.jobs.queries import ( from tasks.utils import CustomEncoder from api.compliance import PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE +from api.constants import SEVERITY_ORDER from api.db_router import READ_REPLICA_ALIAS, MainRouter from api.db_utils import ( POSTGRES_TENANT_VAR, @@ -36,6 +38,7 @@ from api.models import ( ComplianceRequirementOverview, DailySeveritySummary, Finding, + FindingGroupDailySummary, MuteRule, Processor, Provider, @@ -1746,3 +1749,191 @@ def update_provider_compliance_scores(tenant_id: str, scan_id: str): f"Error updating provider compliance scores for scan {scan_id}: {e}" ) raise + + +def aggregate_finding_group_summaries(tenant_id: str, scan_id: str): + """ + Aggregate finding group summaries for a completed scan. + + Creates or updates FindingGroupDailySummary records for each unique check_id + found in the scan's findings. These pre-aggregated summaries enable efficient + queries over date ranges without scanning millions of findings. + + Args: + tenant_id: Tenant that owns the scan. + scan_id: Scan UUID whose findings should be aggregated. + + Returns: + dict: Statistics about the aggregation operation. + """ + with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS): + scan = Scan.objects.filter( + tenant_id=tenant_id, + id=scan_id, + state=StateChoices.COMPLETED, + ).first() + + if not scan: + logger.warning( + f"Scan {scan_id} not found or not completed for finding group summary" + ) + return {"status": "skipped", "reason": "scan not completed"} + + if not scan.provider: + logger.warning(f"Scan {scan_id} has no provider for finding group summary") + return {"status": "skipped", "reason": "scan has no provider"} + + summary_timestamp = scan.completed_at + if django_timezone.is_naive(summary_timestamp): + summary_timestamp = django_timezone.make_aware( + summary_timestamp, timezone.utc + ) + summary_timestamp = summary_timestamp.replace( + hour=0, minute=0, second=0, microsecond=0 + ) + provider_id = scan.provider_id + + # Build severity Case/When expression + severity_case = Case( + *[ + When(severity=severity, then=order) + for severity, order in SEVERITY_ORDER.items() + ], + output_field=IntegerField(), + ) + + # Aggregate findings by check_id for this scan + aggregated = ( + Finding.objects.filter( + tenant_id=tenant_id, + scan_id=scan_id, + ) + .values("check_id") + .annotate( + severity_order=Max(severity_case), + pass_count=Count("id", filter=Q(status="PASS", muted=False)), + fail_count=Count("id", filter=Q(status="FAIL", muted=False)), + muted_count=Count("id", filter=Q(muted=True)), + new_count=Count("id", filter=Q(delta="new", muted=False)), + changed_count=Count("id", filter=Q(delta="changed", muted=False)), + resources_total=Count("resources__id", distinct=True), + resources_fail=Count( + "resources__id", + distinct=True, + filter=Q(status="FAIL", muted=False), + ), + # Use prefixed names to avoid conflict with model field names + agg_first_seen_at=Min("first_seen_at"), + agg_last_seen_at=Max("inserted_at"), + agg_failing_since=Min( + "first_seen_at", filter=Q(status="FAIL", muted=False) + ), + ) + ) + + # Force evaluate queryset while inside RLS transaction (prevents lazy re-query issues) + aggregated_list = list(aggregated) + + # Fetch check metadata for all check_ids in one query + check_ids = [row["check_id"] for row in aggregated_list] + check_metadata_map = {} + if check_ids: + findings_with_metadata = ( + Finding.objects.filter( + tenant_id=tenant_id, + scan_id=scan_id, + check_id__in=check_ids, + ) + .order_by("check_id") + .distinct("check_id") + .values("check_id", "check_metadata") + ) + + for f in findings_with_metadata: + if f["check_id"] not in check_metadata_map and f["check_metadata"]: + check_metadata_map[f["check_id"]] = f["check_metadata"] + + # Upsert summaries in bulk for performance + created_count = 0 + updated_count = 0 + + with rls_transaction(tenant_id): + check_ids = [row["check_id"] for row in aggregated_list] + existing_check_ids = set() + if check_ids: + existing_check_ids = set( + FindingGroupDailySummary.objects.filter( + tenant_id=tenant_id, + provider_id=provider_id, + check_id__in=check_ids, + inserted_at=summary_timestamp, + ).values_list("check_id", flat=True) + ) + + created_count = len(check_ids) - len(existing_check_ids) + updated_count = len(existing_check_ids) + + summaries_to_upsert = [] + updated_at = django_timezone.now() + for row in aggregated_list: + check_id = row["check_id"] + metadata = check_metadata_map.get(check_id, {}) + + summaries_to_upsert.append( + FindingGroupDailySummary( + tenant_id=tenant_id, + provider_id=provider_id, + check_id=check_id, + inserted_at=summary_timestamp, + updated_at=updated_at, + check_title=metadata.get("checktitle", ""), + check_description=metadata.get("Description", ""), + severity_order=row["severity_order"] or 1, + pass_count=row["pass_count"], + fail_count=row["fail_count"], + muted_count=row["muted_count"], + new_count=row["new_count"], + changed_count=row["changed_count"], + resources_total=row["resources_total"], + resources_fail=row["resources_fail"], + first_seen_at=row["agg_first_seen_at"], + last_seen_at=row["agg_last_seen_at"], + failing_since=row["agg_failing_since"], + ) + ) + + if summaries_to_upsert: + FindingGroupDailySummary.objects.bulk_create( + summaries_to_upsert, + update_conflicts=True, + unique_fields=["tenant_id", "provider", "check_id", "inserted_at"], + update_fields=[ + "check_title", + "check_description", + "severity_order", + "pass_count", + "fail_count", + "muted_count", + "new_count", + "changed_count", + "resources_total", + "resources_fail", + "first_seen_at", + "last_seen_at", + "failing_since", + "updated_at", + ], + ) + + logger.info( + f"Finding group summaries aggregated for scan {scan_id}: " + f"{created_count} created, {updated_count} updated" + ) + + return { + "status": "completed", + "scan_id": str(scan_id), + "date": str(summary_timestamp.date()), + "created": created_count, + "updated": updated_count, + } diff --git a/api/src/backend/tasks/tasks.py b/api/src/backend/tasks/tasks.py index 721fba9d07..2e31ebc0f0 100644 --- a/api/src/backend/tasks/tasks.py +++ b/api/src/backend/tasks/tasks.py @@ -16,6 +16,7 @@ from tasks.jobs.attack_paths import ( from tasks.jobs.backfill import ( backfill_compliance_summaries, backfill_daily_severity_summaries, + backfill_finding_group_summaries, backfill_provider_compliance_scores, backfill_resource_scan_summaries, backfill_scan_category_summaries, @@ -48,6 +49,7 @@ from tasks.jobs.report import generate_compliance_reports_job from tasks.jobs.scan import ( aggregate_attack_surface, aggregate_daily_severity, + aggregate_finding_group_summaries, aggregate_findings, create_compliance_requirements, perform_prowler_scan, @@ -145,6 +147,9 @@ def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str) perform_scan_summary_task.si(tenant_id=tenant_id, scan_id=scan_id), group( aggregate_daily_severity_task.si(tenant_id=tenant_id, scan_id=scan_id), + aggregate_finding_group_summaries_task.si( + tenant_id=tenant_id, scan_id=scan_id + ), generate_outputs_task.si( scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id ), @@ -642,6 +647,12 @@ def backfill_daily_severity_summaries_task(tenant_id: str, days: int = None): return backfill_daily_severity_summaries(tenant_id=tenant_id, days=days) +@shared_task(name="backfill-finding-group-summaries", queue="backfill") +def backfill_finding_group_summaries_task(tenant_id: str, days: int = None): + """Backfill FindingGroupDailySummary from historical scans. Use days param to limit scope.""" + return backfill_finding_group_summaries(tenant_id=tenant_id, days=days) + + @shared_task(name="backfill-scan-category-summaries", queue="backfill") @handle_provider_deletion def backfill_scan_category_summaries_task(tenant_id: str, scan_id: str): @@ -741,6 +752,14 @@ def aggregate_daily_severity_task(tenant_id: str, scan_id: str): return aggregate_daily_severity(tenant_id=tenant_id, scan_id=scan_id) +@shared_task(base=RLSTask, name="scan-finding-group-summaries", queue="overview") +@set_tenant(keep_tenant=True) +@handle_provider_deletion +def aggregate_finding_group_summaries_task(tenant_id: str, scan_id: str): + """Aggregate findings by check_id into FindingGroupDailySummary for finding-groups endpoint.""" + return aggregate_finding_group_summaries(tenant_id=tenant_id, scan_id=scan_id) + + @shared_task(base=RLSTask, name="lighthouse-connection-check") @set_tenant def check_lighthouse_connection_task(lighthouse_config_id: str, tenant_id: str = None): diff --git a/api/src/backend/tasks/tests/test_backfill.py b/api/src/backend/tasks/tests/test_backfill.py index 04b3158d22..469b0a393b 100644 --- a/api/src/backend/tasks/tests/test_backfill.py +++ b/api/src/backend/tasks/tests/test_backfill.py @@ -14,11 +14,13 @@ from tasks.jobs.backfill import ( from api.models import ( ComplianceOverviewSummary, Finding, + ProviderComplianceScore, ResourceScanSummary, Scan, ScanCategorySummary, ScanGroupSummary, StateChoices, + StatusChoices, ) from prowler.lib.check.models import Severity from prowler.lib.outputs.finding import Status @@ -364,12 +366,29 @@ class TestBackfillProviderComplianceScores: def test_no_scans_to_process(self, tenants_fixture, scans_fixture): tenant = tenants_fixture[0] - scan = scans_fixture[0] - scan.completed_at = None - scan.save() + scan1, scan2, _ = scans_fixture + + ProviderComplianceScore.objects.create( + tenant_id=tenant.id, + scan=scan1, + provider=scan1.provider, + compliance_id="aws_cis_1.0", + requirement_id="1.1", + requirement_status=StatusChoices.PASS, + scan_completed_at=scan1.completed_at, + ) + ProviderComplianceScore.objects.create( + tenant_id=tenant.id, + scan=scan2, + provider=scan2.provider, + compliance_id="aws_cis_1.0", + requirement_id="1.1", + requirement_status=StatusChoices.PASS, + scan_completed_at=scan2.completed_at, + ) result = backfill_provider_compliance_scores(str(tenant.id)) - assert result == {"status": "no completed scans"} + assert result == {"status": "no scans to process"} @patch("tasks.jobs.backfill.psycopg_connection") def test_successful_backfill_executes_sql_queries( @@ -383,10 +402,14 @@ class TestBackfillProviderComplianceScores: settings.DATABASES.setdefault("admin", settings.DATABASES["default"]) tenant = tenants_fixture[0] scan = scans_fixture[0] + scan2 = scans_fixture[1] # Set completed_at to make the scan eligible for backfill scan.completed_at = datetime.now(timezone.utc) scan.save() + scan2.state = StateChoices.AVAILABLE + scan2.completed_at = None + scan2.save() connection = MagicMock() cursor = MagicMock() diff --git a/api/src/backend/tasks/tests/test_scan.py b/api/src/backend/tasks/tests/test_scan.py index 5f244e0103..ac4d5474dc 100644 --- a/api/src/backend/tasks/tests/test_scan.py +++ b/api/src/backend/tasks/tests/test_scan.py @@ -4093,6 +4093,10 @@ class TestUpdateProviderComplianceScores: tenant_id = str(tenant.id) scan_id = str(scan.id) + scan.state = StateChoices.AVAILABLE + scan.completed_at = None + scan.save() + result = update_provider_compliance_scores(tenant_id, scan_id) assert result["status"] == "skipped"