feat(api): add finding groups summaries (#9961)

Co-authored-by: Alan Buscaglia <gentlemanprogramming@gmail.com>
This commit is contained in:
Adrián Peña
2026-02-23 13:44:45 +01:00
committed by GitHub
parent 5830cb63c9
commit 584455a12a
17 changed files with 3305 additions and 45 deletions
+1
View File
@@ -6,6 +6,7 @@ All notable changes to the **Prowler API** are documented in this file.
### 🚀 Added
- Finding group summaries and resources endpoints for hierarchical findings views [(#9961)](https://github.com/prowler-cloud/prowler/pull/9961)
- OpenStack provider support [(#10003)](https://github.com/prowler-cloud/prowler/pull/10003)
- PDF report for the CSA CCM compliance framework [(#10088)](https://github.com/prowler-cloud/prowler/pull/10088)
+7
View File
@@ -0,0 +1,7 @@
SEVERITY_ORDER = {
"critical": 5,
"high": 4,
"medium": 3,
"low": 2,
"informational": 1,
}
+279 -16
View File
@@ -23,13 +23,14 @@ from api.db_utils import (
StatusEnumField,
)
from api.models import (
AttackPathsScan,
AttackSurfaceOverview,
ComplianceRequirementOverview,
DailySeveritySummary,
Finding,
FindingGroupDailySummary,
Integration,
Invitation,
AttackPathsScan,
LighthouseProviderConfiguration,
LighthouseProviderModels,
Membership,
@@ -181,7 +182,7 @@ class CommonFindingFilters(FilterSet):
help_text="If this filter is not provided, muted and non-muted findings will be returned."
)
resources = UUIDInFilter(field_name="resource__id", lookup_expr="in")
resources = UUIDInFilter(field_name="resources__id", lookup_expr="in")
region = CharFilter(method="filter_resource_region")
region__in = CharInFilter(field_name="resource_regions", lookup_expr="overlap")
@@ -469,9 +470,10 @@ class ResourceFilter(ProviderRelationshipFilterSet):
class Meta:
model = Resource
fields = {
"id": ["exact", "in"],
"provider": ["exact", "in"],
"uid": ["exact", "icontains"],
"name": ["exact", "icontains"],
"uid": ["exact", "icontains", "in"],
"name": ["exact", "icontains", "in"],
"region": ["exact", "icontains", "in"],
"service": ["exact", "icontains", "in"],
"type": ["exact", "icontains", "in"],
@@ -554,9 +556,10 @@ class LatestResourceFilter(ProviderRelationshipFilterSet):
class Meta:
model = Resource
fields = {
"id": ["exact", "in"],
"provider": ["exact", "in"],
"uid": ["exact", "icontains"],
"name": ["exact", "icontains"],
"uid": ["exact", "icontains", "in"],
"name": ["exact", "icontains", "in"],
"region": ["exact", "icontains", "in"],
"service": ["exact", "icontains", "in"],
"type": ["exact", "icontains", "in"],
@@ -647,16 +650,15 @@ class FindingFilter(CommonFindingFilters):
]
)
gte_date = (
datetime.strptime(self.data.get("inserted_at__gte"), "%Y-%m-%d").date()
if self.data.get("inserted_at__gte")
else datetime.now(timezone.utc).date()
)
lte_date = (
datetime.strptime(self.data.get("inserted_at__lte"), "%Y-%m-%d").date()
if self.data.get("inserted_at__lte")
else datetime.now(timezone.utc).date()
)
cleaned = self.form.cleaned_data
exact_date = cleaned.get("inserted_at") or cleaned.get("inserted_at__date")
gte_date = cleaned.get("inserted_at__gte") or exact_date
lte_date = cleaned.get("inserted_at__lte") or exact_date
if gte_date is None:
gte_date = datetime.now(timezone.utc).date()
if lte_date is None:
lte_date = datetime.now(timezone.utc).date()
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
@@ -779,6 +781,267 @@ class LatestFindingFilter(CommonFindingFilters):
}
class FindingGroupFilter(CommonFindingFilters):
"""
Filter for FindingGroup aggregations.
Requires at least one date filter for performance (partition pruning).
Inherits all provider, status, severity, region, service filters from CommonFindingFilters.
"""
inserted_at = DateFilter(method="filter_inserted_at", lookup_expr="date")
inserted_at__date = DateFilter(method="filter_inserted_at", lookup_expr="date")
inserted_at__gte = DateFilter(
method="filter_inserted_at_gte",
help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
)
inserted_at__lte = DateFilter(
method="filter_inserted_at_lte",
help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
)
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
class Meta:
model = Finding
fields = {
"check_id": ["exact", "in", "icontains"],
}
def filter_queryset(self, queryset):
"""Validate that at least one date filter is provided."""
if not (
self.data.get("inserted_at")
or self.data.get("inserted_at__date")
or self.data.get("inserted_at__gte")
or self.data.get("inserted_at__lte")
):
raise ValidationError(
[
{
"detail": "At least one date filter is required: filter[inserted_at], filter[inserted_at.gte], "
"or filter[inserted_at.lte].",
"status": 400,
"source": {"pointer": "/data/attributes/inserted_at"},
"code": "required",
}
]
)
# Validate date range doesn't exceed maximum
cleaned = self.form.cleaned_data
exact_date = cleaned.get("inserted_at") or cleaned.get("inserted_at__date")
gte_date = cleaned.get("inserted_at__gte") or exact_date
lte_date = cleaned.get("inserted_at__lte") or exact_date
if gte_date is None:
gte_date = datetime.now(timezone.utc).date()
if lte_date is None:
lte_date = datetime.now(timezone.utc).date()
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
):
raise ValidationError(
[
{
"detail": f"The date range cannot exceed {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
"status": 400,
"source": {"pointer": "/data/attributes/inserted_at"},
"code": "invalid",
}
]
)
return super().filter_queryset(queryset)
def filter_inserted_at(self, queryset, name, value):
"""Filter by exact date using UUIDv7 partition-aware filtering."""
datetime_value = self._maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
return queryset.filter(id__gte=start, id__lt=end)
def filter_inserted_at_gte(self, queryset, name, value):
"""Filter by start date using UUIDv7 partition-aware filtering."""
datetime_value = self._maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
return queryset.filter(id__gte=start)
def filter_inserted_at_lte(self, queryset, name, value):
"""Filter by end date using UUIDv7 partition-aware filtering."""
datetime_value = self._maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
return queryset.filter(id__lt=end)
@staticmethod
def _maybe_date_to_datetime(value):
"""Convert date to datetime if needed."""
dt = value
if isinstance(value, date):
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
return dt
class LatestFindingGroupFilter(CommonFindingFilters):
"""
Filter for FindingGroup resources in /latest endpoint.
Same as FindingGroupFilter but without date validation.
"""
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
class Meta:
model = Finding
fields = {
"check_id": ["exact", "in", "icontains"],
}
class FindingGroupSummaryFilter(FilterSet):
"""
Filter for FindingGroupDailySummary queries.
Filters the pre-aggregated summary table by date range, check_id, and provider.
Requires at least one date filter for performance.
"""
inserted_at = DateFilter(method="filter_inserted_at", lookup_expr="date")
inserted_at__date = DateFilter(method="filter_inserted_at", lookup_expr="date")
inserted_at__gte = DateFilter(
method="filter_inserted_at_gte",
help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
)
inserted_at__lte = DateFilter(
method="filter_inserted_at_lte",
help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
)
# Check ID filters
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
# Provider filters
provider_id = UUIDFilter(field_name="provider_id", lookup_expr="exact")
provider_id__in = UUIDInFilter(field_name="provider_id", lookup_expr="in")
provider_type = ChoiceFilter(
field_name="provider__provider", choices=Provider.ProviderChoices.choices
)
provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
class Meta:
model = FindingGroupDailySummary
fields = {
"check_id": ["exact", "in", "icontains"],
"inserted_at": ["date", "gte", "lte"],
"provider_id": ["exact", "in"],
}
def filter_queryset(self, queryset):
if not (
self.data.get("inserted_at")
or self.data.get("inserted_at__date")
or self.data.get("inserted_at__gte")
or self.data.get("inserted_at__lte")
):
raise ValidationError(
[
{
"detail": "At least one date filter is required: filter[inserted_at], filter[inserted_at.gte], "
"or filter[inserted_at.lte].",
"status": 400,
"source": {"pointer": "/data/attributes/inserted_at"},
"code": "required",
}
]
)
cleaned = self.form.cleaned_data
exact_date = cleaned.get("inserted_at") or cleaned.get("inserted_at__date")
gte_date = cleaned.get("inserted_at__gte") or exact_date
lte_date = cleaned.get("inserted_at__lte") or exact_date
if gte_date is None:
gte_date = datetime.now(timezone.utc).date()
if lte_date is None:
lte_date = datetime.now(timezone.utc).date()
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
):
raise ValidationError(
[
{
"detail": f"The date range cannot exceed {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
"status": 400,
"source": {"pointer": "/data/attributes/inserted_at"},
"code": "invalid",
}
]
)
return super().filter_queryset(queryset)
def filter_inserted_at(self, queryset, name, value):
"""Filter by exact inserted_at date."""
datetime_value = self._maybe_date_to_datetime(value)
start = datetime_value
end = datetime_value + timedelta(days=1)
return queryset.filter(inserted_at__gte=start, inserted_at__lt=end)
def filter_inserted_at_gte(self, queryset, name, value):
"""Filter by inserted_at >= value (date boundary)."""
datetime_value = self._maybe_date_to_datetime(value)
return queryset.filter(inserted_at__gte=datetime_value)
def filter_inserted_at_lte(self, queryset, name, value):
"""Filter by inserted_at <= value (inclusive date boundary)."""
datetime_value = self._maybe_date_to_datetime(value)
return queryset.filter(inserted_at__lt=datetime_value + timedelta(days=1))
@staticmethod
def _maybe_date_to_datetime(value):
dt = value
if isinstance(value, date):
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
return dt
class LatestFindingGroupSummaryFilter(FilterSet):
"""
Filter for FindingGroupDailySummary /latest endpoint.
Same as FindingGroupSummaryFilter but without date validation.
Used when the endpoint automatically determines the date.
"""
# Check ID filters
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
# Provider filters
provider_id = UUIDFilter(field_name="provider_id", lookup_expr="exact")
provider_id__in = UUIDInFilter(field_name="provider_id", lookup_expr="in")
provider_type = ChoiceFilter(
field_name="provider__provider", choices=Provider.ProviderChoices.choices
)
provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
class Meta:
model = FindingGroupDailySummary
fields = {
"check_id": ["exact", "in", "icontains"],
"provider_id": ["exact", "in"],
}
class ProviderSecretFilter(FilterSet):
inserted_at = DateFilter(
field_name="inserted_at",
@@ -0,0 +1,132 @@
# Generated by Django 5.1.15 on 2026-01-26
import uuid
import django.db.models.deletion
from django.contrib.postgres.indexes import GinIndex, OpClass
from django.db import migrations, models
from django.db.models.functions import Upper
from django.utils import timezone
import api.rls
class Migration(migrations.Migration):
dependencies = [
("api", "0080_backfill_attack_paths_graph_data_ready"),
]
operations = [
migrations.CreateModel(
name="FindingGroupDailySummary",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
(
"inserted_at",
models.DateTimeField(default=timezone.now, editable=False),
),
("updated_at", models.DateTimeField(auto_now=True, editable=False)),
("check_id", models.CharField(db_index=True, max_length=255)),
(
"check_title",
models.CharField(blank=True, max_length=500, null=True),
),
("check_description", models.TextField(blank=True, null=True)),
("severity_order", models.SmallIntegerField(default=1)),
("pass_count", models.IntegerField(default=0)),
("fail_count", models.IntegerField(default=0)),
("muted_count", models.IntegerField(default=0)),
("new_count", models.IntegerField(default=0)),
("changed_count", models.IntegerField(default=0)),
("resources_fail", models.IntegerField(default=0)),
("resources_total", models.IntegerField(default=0)),
("first_seen_at", models.DateTimeField(blank=True, null=True)),
("last_seen_at", models.DateTimeField(blank=True, null=True)),
("failing_since", models.DateTimeField(blank=True, null=True)),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="api.tenant",
),
),
(
"provider",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="finding_group_summaries",
to="api.provider",
),
),
],
options={
"db_table": "finding_group_daily_summaries",
"abstract": False,
},
),
migrations.AddIndex(
model_name="findinggroupdailysummary",
index=models.Index(
fields=["tenant_id", "inserted_at"],
name="fgds_tenant_inserted_at_idx",
),
),
migrations.AddIndex(
model_name="findinggroupdailysummary",
index=models.Index(
fields=["tenant_id", "provider", "inserted_at"],
name="fgds_tenant_prov_ins_idx",
),
),
migrations.AddIndex(
model_name="findinggroupdailysummary",
index=models.Index(
fields=["tenant_id", "check_id", "inserted_at"],
name="fgds_tenant_chk_ins_idx",
),
),
migrations.AddIndex(
model_name="resource",
index=GinIndex(
OpClass(Upper("uid"), name="gin_trgm_ops"),
name="res_uid_trgm_idx",
),
),
migrations.AddIndex(
model_name="resource",
index=GinIndex(
OpClass(Upper("name"), name="gin_trgm_ops"),
name="res_name_trgm_idx",
),
),
migrations.AddConstraint(
model_name="findinggroupdailysummary",
constraint=models.UniqueConstraint(
fields=("tenant_id", "provider", "check_id", "inserted_at"),
name="unique_finding_group_daily_summary",
),
),
migrations.AddConstraint(
model_name="findinggroupdailysummary",
constraint=api.rls.RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_findinggroupdailysummary",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "check_id", "inserted_at"],
name="find_tenant_check_ins_idx",
),
),
]
@@ -0,0 +1,30 @@
# Generated by Django 5.1.14 on 2026-02-02
from django.db import migrations
from tasks.tasks import backfill_finding_group_summaries_task
from api.db_router import MainRouter
from api.rls import Tenant
def trigger_backfill_task(apps, schema_editor):
"""
Trigger the backfill task for all tenants.
This dispatches backfill_finding_group_summaries_task for each tenant
in the system to populate FindingGroupDailySummary records from historical scans.
"""
tenant_ids = Tenant.objects.using(MainRouter.admin_db).values_list("id", flat=True)
for tenant_id in tenant_ids:
backfill_finding_group_summaries_task.delay(tenant_id=str(tenant_id), days=30)
class Migration(migrations.Migration):
dependencies = [
("api", "0081_finding_group_daily_summary"),
]
operations = [
migrations.RunPython(trigger_backfill_task, migrations.RunPython.noop),
]
+100
View File
@@ -12,12 +12,15 @@ from cryptography.fernet import Fernet, InvalidToken
from django.conf import settings
from django.contrib.auth.models import AbstractBaseUser
from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.indexes import GinIndex, OpClass
from django.contrib.postgres.search import SearchVector, SearchVectorField
from django.contrib.sites.models import Site
from django.core.exceptions import ValidationError
from django.core.validators import MinLengthValidator
from django.db import models
from django.db.models import Q
from django.db.models.functions import Upper
from django.utils import timezone as django_timezone
from django.utils.translation import gettext_lazy as _
from django_celery_beat.models import PeriodicTask
from django_celery_results.models import TaskResult
@@ -855,6 +858,16 @@ class Resource(RowLevelSecurityProtectedModel):
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
# icontains compiles to UPPER(field) LIKE, so index the same expression
GinIndex(
OpClass(Upper("uid"), name="gin_trgm_ops"),
name="res_uid_trgm_idx",
),
GinIndex(
OpClass(Upper("name"), name="gin_trgm_ops"),
name="res_name_trgm_idx",
),
GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
models.Index(fields=["tenant_id", "id"], name="resources_tenant_id_idx"),
models.Index(
fields=["tenant_id", "provider_id"],
@@ -1052,6 +1065,10 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
fields=["tenant_id", "uid", "-inserted_at"],
name="find_tenant_uid_inserted_idx",
),
models.Index(
fields=["tenant_id", "check_id", "inserted_at"],
name="find_tenant_check_ins_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "check_id"],
name="find_tenant_scan_check_idx",
@@ -1669,6 +1686,89 @@ class DailySeveritySummary(RowLevelSecurityProtectedModel):
]
class FindingGroupDailySummary(RowLevelSecurityProtectedModel):
"""
Pre-aggregated daily finding counts per check_id per provider.
Used by finding-groups endpoint for efficient queries over date ranges.
Instead of aggregating millions of findings on-the-fly, we pre-compute
daily summaries and re-aggregate them when querying date ranges.
This reduces query complexity from O(findings) to O(days × checks × providers).
"""
objects = ActiveProviderManager()
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(default=django_timezone.now, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
check_id = models.CharField(max_length=255, db_index=True)
# Provider FK for filtering by specific provider
provider = models.ForeignKey(
"Provider",
on_delete=models.CASCADE,
related_name="finding_group_summaries",
)
# Check metadata (denormalized for performance)
check_title = models.CharField(max_length=500, blank=True, null=True)
check_description = models.TextField(blank=True, null=True)
# Severity stored as integer for MAX aggregation (5=critical, 4=high, etc.)
severity_order = models.SmallIntegerField(default=1)
# Finding counts
pass_count = models.IntegerField(default=0)
fail_count = models.IntegerField(default=0)
muted_count = models.IntegerField(default=0)
# Delta counts
new_count = models.IntegerField(default=0)
changed_count = models.IntegerField(default=0)
# Resource counts
resources_fail = models.IntegerField(default=0)
resources_total = models.IntegerField(default=0)
# Timing
first_seen_at = models.DateTimeField(null=True, blank=True)
last_seen_at = models.DateTimeField(null=True, blank=True)
failing_since = models.DateTimeField(null=True, blank=True)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "finding_group_daily_summaries"
constraints = [
models.UniqueConstraint(
fields=("tenant_id", "provider", "check_id", "inserted_at"),
name="unique_finding_group_daily_summary",
),
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
indexes = [
models.Index(
fields=["tenant_id", "inserted_at"],
name="fgds_tenant_inserted_at_idx",
),
models.Index(
fields=["tenant_id", "check_id", "inserted_at"],
name="fgds_tenant_chk_ins_idx",
),
models.Index(
fields=["tenant_id", "provider", "inserted_at"],
name="fgds_tenant_prov_ins_idx",
),
]
class JSONAPIMeta:
resource_name = "finding-group-daily-summaries"
class Integration(RowLevelSecurityProtectedModel):
class IntegrationChoices(models.TextChoices):
AMAZON_S3 = "amazon_s3", _("Amazon S3")
+581
View File
@@ -1134,6 +1134,365 @@ paths:
description: The task is in progress
'500':
description: Compliance overviews generation task failed
/api/v1/finding-groups:
get:
operationId: finding_groups_list
description: "\n Retrieve aggregated findings grouped by check_id.\n\n\
\ Each group shows:\n - Aggregated status (FAIL if any non-muted\
\ failure)\n - Maximum severity across all findings\n - Resource\
\ counts (failing vs total)\n - Finding counts by status and delta\n\
\ - Affected provider types\n\n At least one date filter is\
\ required for performance reasons.\n "
summary: List finding groups
parameters:
- in: query
name: fields[finding-groups]
schema:
type: array
items:
type: string
enum:
- id
- check_id
- check_title
- check_description
- severity
- status
- impacted_providers
- resources_fail
- resources_total
- pass_count
- fail_count
- muted_count
- new_count
- changed_count
- first_seen_at
- last_seen_at
- failing_since
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
- in: query
name: filter[check_id]
schema:
type: string
- in: query
name: filter[check_id__icontains]
schema:
type: string
- in: query
name: filter[check_id__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[inserted_at]
schema:
type: string
format: date
- in: query
name: filter[inserted_at__date]
schema:
type: string
format: date
- in: query
name: filter[inserted_at__gte]
schema:
type: string
format: date
description: Maximum date range is 7 days.
- in: query
name: filter[inserted_at__lte]
schema:
type: string
format: date
description: Maximum date range is 7 days.
- in: query
name: filter[provider_id]
schema:
type: string
format: uuid
- in: query
name: filter[provider_id__in]
schema:
type: array
items:
type: string
format: uuid
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[provider_type]
schema:
type: string
x-spec-enum-id: 4b8815b179aa7216
enum:
- alibabacloud
- aws
- azure
- cloudflare
- gcp
- github
- iac
- kubernetes
- m365
- mongodbatlas
- openstack
- oraclecloud
description: |-
* `aws` - AWS
* `azure` - Azure
* `gcp` - GCP
* `kubernetes` - Kubernetes
* `m365` - M365
* `github` - GitHub
* `mongodbatlas` - MongoDB Atlas
* `iac` - IaC
* `oraclecloud` - Oracle Cloud Infrastructure
* `alibabacloud` - Alibaba Cloud
* `cloudflare` - Cloudflare
* `openstack` - OpenStack
- in: query
name: filter[provider_type__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- name: filter[search]
required: false
in: query
description: A search term.
schema:
type: string
- name: page[number]
required: false
in: query
description: A page number within the paginated result set.
schema:
type: integer
- name: page[size]
required: false
in: query
description: Number of results to return per page.
schema:
type: integer
- name: sort
required: false
in: query
description: '[list of fields to sort by](https://jsonapi.org/format/#fetching-sorting)'
schema:
type: array
items:
type: string
enum:
- id
- -id
- check_id
- -check_id
- check_title
- -check_title
- check_description
- -check_description
- severity
- -severity
- status
- -status
- impacted_providers
- -impacted_providers
- resources_fail
- -resources_fail
- resources_total
- -resources_total
- pass_count
- -pass_count
- fail_count
- -fail_count
- muted_count
- -muted_count
- new_count
- -new_count
- changed_count
- -changed_count
- first_seen_at
- -first_seen_at
- last_seen_at
- -last_seen_at
- failing_since
- -failing_since
explode: false
tags:
- Finding Groups
security:
- JWT or API Key: []
responses:
'200':
content:
application/vnd.api+json:
schema:
$ref: '#/components/schemas/PaginatedFindingGroupList'
description: ''
/api/v1/finding-groups/{id}/resources:
get:
operationId: finding_groups_resources_retrieve
description: "\n Retrieve resources affected by a specific check (finding\
\ group).\n\n Returns individual resources with their current status,\
\ severity,\n and timing information including how long they have been\
\ failing.\n "
summary: List resources for a finding group
parameters:
- in: query
name: fields[finding-groups]
schema:
type: array
items:
type: string
enum:
- id
- check_id
- check_title
- check_description
- severity
- status
- impacted_providers
- resources_fail
- resources_total
- pass_count
- fail_count
- muted_count
- new_count
- changed_count
- first_seen_at
- last_seen_at
- failing_since
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
- in: path
name: id
schema:
type: string
format: uuid
description: A UUID string identifying this finding group daily summary.
required: true
tags:
- Finding Groups
security:
- JWT or API Key: []
responses:
'200':
content:
application/vnd.api+json:
schema:
$ref: '#/components/schemas/FindingGroupResponse'
description: ''
/api/v1/finding-groups/latest:
get:
operationId: finding_groups_latest_retrieve
description: "\n Retrieve the latest available state for each finding\
\ group (check_id).\n\n This endpoint returns finding groups without\
\ requiring date filters,\n automatically using the latest available\
\ data per check_id.\n All other filters (provider_id, provider_type,\
\ check_id) are still supported.\n "
summary: List latest finding groups
parameters:
- in: query
name: fields[finding-groups]
schema:
type: array
items:
type: string
enum:
- id
- check_id
- check_title
- check_description
- severity
- status
- impacted_providers
- resources_fail
- resources_total
- pass_count
- fail_count
- muted_count
- new_count
- changed_count
- first_seen_at
- last_seen_at
- failing_since
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
tags:
- Finding Groups
security:
- JWT or API Key: []
responses:
'200':
content:
application/vnd.api+json:
schema:
$ref: '#/components/schemas/FindingGroupResponse'
description: ''
/api/v1/finding-groups/latest/{check_id}/resources:
get:
operationId: finding_groups_latest_resources_retrieve
description: "\n Retrieve resources affected by a specific check (finding\
\ group) from the\n latest completed scan for each provider.\n\n \
\ Returns individual resources with their current status, severity,\n\
\ and timing information. No date filters required.\n "
summary: List resources for a finding group from latest scans
parameters:
- in: path
name: check_id
schema:
type: string
required: true
- in: query
name: fields[finding-groups]
schema:
type: array
items:
type: string
enum:
- id
- check_id
- check_title
- check_description
- severity
- status
- impacted_providers
- resources_fail
- resources_total
- pass_count
- fail_count
- muted_count
- new_count
- changed_count
- first_seen_at
- last_seen_at
- failing_since
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
tags:
- Finding Groups
security:
- JWT or API Key: []
responses:
'200':
content:
application/vnd.api+json:
schema:
$ref: '#/components/schemas/FindingGroupResponse'
description: ''
/api/v1/findings:
get:
operationId: findings_list
@@ -8270,6 +8629,21 @@ paths:
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[id]
schema:
type: string
format: uuid
- in: query
name: filter[id__in]
schema:
type: array
items:
type: string
format: uuid
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[inserted_at]
schema:
@@ -8293,6 +8667,15 @@ paths:
name: filter[name__icontains]
schema:
type: string
- in: query
name: filter[name__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[provider]
schema:
@@ -8521,6 +8904,15 @@ paths:
name: filter[uid__icontains]
schema:
type: string
- in: query
name: filter[uid__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[updated_at]
schema:
@@ -8791,6 +9183,21 @@ paths:
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[id]
schema:
type: string
format: uuid
- in: query
name: filter[id__in]
schema:
type: array
items:
type: string
format: uuid
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[name]
schema:
@@ -8799,6 +9206,15 @@ paths:
name: filter[name__icontains]
schema:
type: string
- in: query
name: filter[name__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[provider]
schema:
@@ -9012,6 +9428,15 @@ paths:
name: filter[uid__icontains]
schema:
type: string
- in: query
name: filter[uid__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: include
schema:
@@ -9095,6 +9520,21 @@ paths:
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[id]
schema:
type: string
format: uuid
- in: query
name: filter[id__in]
schema:
type: array
items:
type: string
format: uuid
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[inserted_at]
schema:
@@ -9118,6 +9558,15 @@ paths:
name: filter[name__icontains]
schema:
type: string
- in: query
name: filter[name__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[provider]
schema:
@@ -9346,6 +9795,15 @@ paths:
name: filter[uid__icontains]
schema:
type: string
- in: query
name: filter[uid__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[updated_at]
schema:
@@ -9435,6 +9893,21 @@ paths:
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[id]
schema:
type: string
format: uuid
- in: query
name: filter[id__in]
schema:
type: array
items:
type: string
format: uuid
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[name]
schema:
@@ -9443,6 +9916,15 @@ paths:
name: filter[name__icontains]
schema:
type: string
- in: query
name: filter[name__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[provider]
schema:
@@ -9656,6 +10138,15 @@ paths:
name: filter[uid__icontains]
schema:
type: string
- in: query
name: filter[uid__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- name: sort
required: false
in: query
@@ -13371,6 +13862,87 @@ components:
$ref: '#/components/schemas/FindingDynamicFilter'
required:
- data
FindingGroup:
type: object
required:
- type
- id
additionalProperties: false
properties:
type:
type: string
description: The [type](https://jsonapi.org/format/#document-resource-object-identification)
member is used to describe resource objects that share common attributes
and relationships.
enum:
- finding-groups
id: {}
attributes:
type: object
properties:
id:
type: string
check_id:
type: string
check_title:
type: string
nullable: true
check_description:
type: string
nullable: true
severity:
type: string
status:
type: string
impacted_providers:
type: array
items:
type: string
resources_fail:
type: integer
resources_total:
type: integer
pass_count:
type: integer
fail_count:
type: integer
muted_count:
type: integer
new_count:
type: integer
changed_count:
type: integer
first_seen_at:
type: string
format: date-time
nullable: true
last_seen_at:
type: string
format: date-time
nullable: true
failing_since:
type: string
format: date-time
nullable: true
required:
- id
- check_id
- severity
- status
- resources_fail
- resources_total
- pass_count
- fail_count
- muted_count
- new_count
- changed_count
FindingGroupResponse:
type: object
properties:
data:
$ref: '#/components/schemas/FindingGroup'
required:
- data
FindingMetadata:
type: object
required:
@@ -16188,6 +16760,15 @@ components:
$ref: '#/components/schemas/ComplianceWatchlistOverview'
required:
- data
PaginatedFindingGroupList:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/FindingGroup'
required:
- data
PaginatedFindingList:
type: object
properties:
+807 -18
View File
@@ -3045,21 +3045,21 @@ class TestScanViewSet:
[
("provider_type", "aws", 3),
("provider_type.in", "gcp,azure", 0),
("provider_uid", "123456789012", 2),
("provider_uid", "123456789012", 1),
("provider_uid.icontains", "1", 3),
("provider_uid.in", "123456789012,123456789013", 3),
("provider_alias", "aws_testing_1", 2),
("provider_alias", "aws_testing_1", 1),
("provider_alias.icontains", "aws", 3),
("provider_alias.in", "aws_testing_1,aws_testing_2", 3),
("name", "Scan 1", 1),
("name.icontains", "Scan", 3),
("started_at", "2024-01-02", 3),
("started_at", "2024-01-02", 1),
("started_at.gte", "2024-01-01", 3),
("started_at.lte", "2024-01-01", 0),
("trigger", Scan.TriggerChoices.MANUAL, 1),
("state", StateChoices.AVAILABLE, 1),
("state", StateChoices.FAILED, 1),
("state.in", f"{StateChoices.FAILED},{StateChoices.AVAILABLE}", 2),
("state", StateChoices.FAILED, 0),
("state.in", f"{StateChoices.FAILED},{StateChoices.AVAILABLE}", 1),
("trigger", Scan.TriggerChoices.MANUAL, 1),
]
),
@@ -3102,20 +3102,52 @@ class TestScanViewSet:
{"filter[provider]": scans_fixture[0].provider.id},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 2
assert len(response.json()["data"]) == 1
def test_scan_filter_by_provider_id_in(self, authenticated_client, scans_fixture):
response = authenticated_client.get(
reverse("scan-list"),
{
"filter[provider.in]": [
scans_fixture[0].provider.id,
scans_fixture[1].provider.id,
]
"filter[provider.in]": f"{scans_fixture[0].provider.id},{scans_fixture[1].provider.id}",
},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 2
assert len(response.json()["data"]) == 3
def test_scans_filter_state_failed(self, authenticated_client, scans_fixture):
"""Ensure state filter matches only FAILED scans."""
scan1, *_ = scans_fixture
failed_scan = Scan.objects.create(
name="Scan Failed",
provider=scan1.provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.FAILED,
tenant_id=scan1.tenant_id,
)
response = authenticated_client.get(
reverse("scan-list"),
{"filter[state]": StateChoices.FAILED},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["id"] == str(failed_scan.id)
def test_scans_filter_provider_alias_exact(
self, authenticated_client, scans_fixture
):
"""Ensure provider_alias filter returns all scans for that provider."""
scan1, *_ = scans_fixture
response = authenticated_client.get(
reverse("scan-list"),
{"filter[provider_alias]": scan1.provider.alias},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["relationships"]["provider"]["data"]["id"] == str(
scan1.provider.id
)
@pytest.mark.parametrize(
"sort_field",
@@ -4365,15 +4397,10 @@ class TestResourceViewSet:
):
response = authenticated_client.get(
reverse("resource-list"),
{
"filter[scan.in]": [
scans_fixture[0].id,
scans_fixture[1].id,
]
},
{"filter[scan.in]": f"{scans_fixture[0].id},{scans_fixture[1].id}"},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 2
assert len(response.json()["data"]) == 3
def test_resource_filter_by_provider_id_in(
self, authenticated_client, resources_fixture
@@ -14265,3 +14292,765 @@ class TestMuteRuleViewSet:
assert len(data) == len(mute_rules_fixture)
for rule_data in data:
assert rule_data["id"] != str(other_rule.id)
@pytest.mark.django_db
class TestFindingGroupViewSet:
"""Tests for Finding Groups API - aggregates findings by check_id."""
def test_finding_groups_requires_date_filter(self, authenticated_client):
"""Test that at least one date filter is required."""
response = authenticated_client.get(reverse("finding-group-list"))
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json()["errors"][0]["code"] == "required"
def test_finding_groups_empty(self, authenticated_client):
"""Test empty list returned when no findings exist."""
response = authenticated_client.get(
reverse("finding-group-list"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 0
def test_finding_groups_single_check(
self, authenticated_client, finding_groups_fixture
):
"""Test that findings with same check_id are grouped correctly."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "s3_bucket_public_access",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["id"] == "s3_bucket_public_access"
assert data[0]["attributes"]["check_id"] == "s3_bucket_public_access"
def test_finding_groups_multiple_checks(
self, authenticated_client, finding_groups_fixture
):
"""Test that different check_ids produce separate finding groups."""
response = authenticated_client.get(
reverse("finding-group-list"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# Should have 5 distinct check_ids from fixture
assert len(data) == 5
check_ids = {item["id"] for item in data}
assert "s3_bucket_public_access" in check_ids
assert "ec2_instance_public_ip" in check_ids
assert "iam_password_policy" in check_ids
assert "rds_encryption" in check_ids
assert "cloudtrail_enabled" in check_ids
def test_finding_groups_severity_max(
self, authenticated_client, finding_groups_fixture
):
"""Test that max severity is returned across all findings in group."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "s3_bucket_public_access",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
# s3_bucket_public_access has critical and high severity findings
# Max should be critical
assert data[0]["attributes"]["severity"] == "critical"
def test_finding_groups_status_fail_priority(
self, authenticated_client, finding_groups_fixture
):
"""Test that FAIL status takes priority over PASS when any non-muted FAIL exists."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "ec2_instance_public_ip",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
# ec2_instance_public_ip has 1 PASS and 1 FAIL, should aggregate to FAIL
assert data[0]["attributes"]["status"] == "FAIL"
def test_finding_groups_status_pass_when_no_fail(
self, authenticated_client, finding_groups_fixture
):
"""Test that PASS status returned when no non-muted FAIL exists."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[check_id]": "iam_password_policy"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
# iam_password_policy has only PASS findings
assert data[0]["attributes"]["status"] == "PASS"
def test_finding_groups_status_muted_all(
self, authenticated_client, finding_groups_fixture
):
"""Test that MUTED status returned when all findings are muted."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[check_id]": "rds_encryption"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
# rds_encryption has all muted findings
assert data[0]["attributes"]["status"] == "MUTED"
def test_finding_groups_provider_aggregation(
self, authenticated_client, finding_groups_fixture
):
"""Test that impacted_providers contains distinct provider types."""
response = authenticated_client.get(
reverse("finding-group-list"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# Find the s3_bucket_public_access group
s3_group = next(
(item for item in data if item["id"] == "s3_bucket_public_access"), None
)
assert s3_group is not None
# Should have aws provider
assert "aws" in s3_group["attributes"]["impacted_providers"]
def test_finding_groups_resource_counts(
self, authenticated_client, finding_groups_fixture
):
"""Test resources_fail and resources_total counts are correct."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "s3_bucket_public_access",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
# s3_bucket_public_access has 2 FAIL findings on 2 different resources
assert attrs["resources_fail"] == 2
assert attrs["resources_total"] == 2
def test_finding_groups_finding_counts(
self, authenticated_client, finding_groups_fixture
):
"""Test pass_count, fail_count, muted_count are correct."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "ec2_instance_public_ip",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
# ec2_instance_public_ip has 1 PASS and 1 FAIL (non-muted)
assert attrs["pass_count"] == 1
assert attrs["fail_count"] == 1
assert attrs["muted_count"] == 0
def test_finding_groups_delta_counts(
self, authenticated_client, finding_groups_fixture
):
"""Test new_count and changed_count are correct."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "s3_bucket_public_access",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
# s3_bucket_public_access has 1 new and 1 changed finding
assert attrs["new_count"] == 1
assert attrs["changed_count"] == 1
def test_finding_groups_timing(self, authenticated_client, finding_groups_fixture):
"""Test first_seen_at, last_seen_at, and failing_since are returned."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "s3_bucket_public_access",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
assert "first_seen_at" in attrs
assert "last_seen_at" in attrs
assert "failing_since" in attrs
assert attrs["first_seen_at"] is not None
assert attrs["last_seen_at"] is not None
# s3_bucket_public_access has FAIL findings, so failing_since should be set
assert attrs["failing_since"] is not None
# Test failing_since for checks without failures
def test_finding_groups_failing_since_null_when_passing(
self, authenticated_client, finding_groups_fixture
):
"""Test failing_since is null for checks that only have PASS findings."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[check_id]": "iam_password_policy"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
# iam_password_policy has only PASS findings, so failing_since should be null
assert attrs["failing_since"] is None
def test_finding_groups_rls_isolation(
self, authenticated_client, finding_groups_fixture, tenants_fixture
):
"""Test that users only see finding groups from their tenant."""
# Create finding in another tenant
from api.models import Finding, Provider, Resource, Scan
from api.rls import Tenant
other_tenant = Tenant.objects.create(name="Other Tenant")
other_provider = Provider.objects.create(
tenant_id=other_tenant.id,
provider="aws",
uid="999999999999", # Valid 12-digit AWS account ID
alias="Other Account",
)
other_scan = Scan.objects.create(
tenant_id=other_tenant.id,
name="Other scan",
provider=other_provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
)
other_resource = Resource.objects.create(
tenant_id=other_tenant.id,
provider=other_provider,
uid="other-resource-uid",
name="Other Resource",
region="us-west-2",
service="s3",
type="bucket",
)
other_finding = Finding.objects.create(
tenant_id=other_tenant.id,
uid="other_tenant_finding",
scan=other_scan,
delta=None,
status="FAIL",
severity="critical",
impact="critical",
check_id="other_tenant_check",
check_metadata={"CheckId": "other_tenant_check"},
first_seen_at="2024-01-02T00:00:00Z",
)
other_finding.add_resources([other_resource])
# Request should not include other tenant's finding groups
response = authenticated_client.get(
reverse("finding-group-list"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
check_ids = {item["id"] for item in data}
assert "other_tenant_check" not in check_ids
def test_finding_groups_rbac_unlimited(
self, authenticated_client, finding_groups_fixture
):
"""Test that users with unlimited visibility see all finding groups."""
response = authenticated_client.get(
reverse("finding-group-list"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# Should see all 5 check_ids from the fixture
assert len(data) == 5
def test_finding_groups_date_filter_gte(
self, authenticated_client, finding_groups_fixture
):
"""Test filtering by start date."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at.gte]": today_after_n_days(-1)},
)
assert response.status_code == status.HTTP_200_OK
# All fixture findings were created today
assert len(response.json()["data"]) == 5
def test_finding_groups_date_filter_lte(
self, authenticated_client, finding_groups_fixture
):
"""Test filtering by end date."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at.lte]": today_after_n_days(1)},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 5
def test_finding_groups_date_filter_range(
self, authenticated_client, finding_groups_fixture
):
"""Test filtering by date range (max 7 days)."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
# Use 6-day range to stay within 7-day max limit
"filter[inserted_at.gte]": today_after_n_days(-6),
"filter[inserted_at.lte]": today_after_n_days(0),
},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 5
def test_finding_groups_date_filter_outside_backfill_range_returns_empty(
self, authenticated_client, finding_groups_fixture
):
"""Test that older dates return empty results without error."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": today_after_n_days(-60)},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 0
def test_finding_groups_date_filter_max_range(self, authenticated_client):
"""Test that exceeding max date range returns 400."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at.lte]": today_after_n_days(
-(settings.FINDINGS_MAX_DAYS_IN_RANGE + 1)
),
},
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json()["errors"][0]["code"] == "invalid"
def test_finding_groups_provider_filter(
self, authenticated_client, finding_groups_fixture, providers_fixture
):
"""Test filtering by provider UUID."""
provider = providers_fixture[0]
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[provider_id]": str(provider.id)},
)
assert response.status_code == status.HTTP_200_OK
# Should return finding groups associated with this provider
# Provider 1 has scan1 with checks: s3_bucket_public_access, ec2_instance_public_ip,
# iam_password_policy, rds_encryption (4 checks)
assert len(response.json()["data"]) == 4
def test_finding_groups_provider_type_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test filtering by provider type."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[provider_type]": "aws"},
)
assert response.status_code == status.HTTP_200_OK
# All fixture findings are from AWS provider
assert len(response.json()["data"]) == 5
def test_finding_groups_check_id_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test filtering by exact check_id."""
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "s3_bucket_public_access",
},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 1
assert response.json()["data"][0]["id"] == "s3_bucket_public_access"
def test_finding_groups_check_id_icontains(
self, authenticated_client, finding_groups_fixture
):
"""Test searching check_ids with icontains."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[check_id.icontains]": "bucket"},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 1
assert "bucket" in response.json()["data"][0]["id"].lower()
def test_resources_not_found(self, authenticated_client):
"""Test 404 returned for nonexistent check_id."""
response = authenticated_client.get(
reverse("finding-group-resources", kwargs={"pk": "nonexistent_check"}),
{"filter[inserted_at]": TODAY},
)
assert response.status_code == status.HTTP_404_NOT_FOUND
def test_resources_list(self, authenticated_client, finding_groups_fixture):
"""Test resources are returned correctly for a finding group."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{"filter[inserted_at]": TODAY},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# s3_bucket_public_access has 2 findings with 2 different resources
assert len(data) == 2
def test_resources_fields(self, authenticated_client, finding_groups_fixture):
"""Test resource fields (uid, name, service, region, type) have valid values."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{"filter[inserted_at]": TODAY},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 2
for item in data:
resource = item["attributes"]["resource"]
# All fields must be present and non-empty
assert resource.get("uid"), "resource.uid must not be empty"
assert resource.get("name"), "resource.name must not be empty"
assert resource.get("service"), "resource.service must not be empty"
assert resource.get("region"), "resource.region must not be empty"
assert resource.get("type"), "resource.type must not be empty"
def test_resources_provider_info(
self, authenticated_client, finding_groups_fixture
):
"""Test provider info (type, uid, alias) has valid values."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{"filter[inserted_at]": TODAY},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 2
for item in data:
provider = item["attributes"]["provider"]
assert provider.get("type") == "aws", "provider.type must be 'aws'"
assert provider.get("uid"), "provider.uid must not be empty"
assert provider.get("alias"), "provider.alias must not be empty"
def test_resources_status_severity(
self, authenticated_client, finding_groups_fixture
):
"""Test status and severity from latest finding have valid values."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{"filter[inserted_at]": TODAY},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 2
for item in data:
attrs = item["attributes"]
# s3_bucket_public_access has FAIL findings
assert attrs["status"] == "FAIL", "status must be 'FAIL'"
# severity must be one of the valid values
assert attrs["severity"] in [
"critical",
"high",
"medium",
"low",
"informational",
]
def test_resources_timing(self, authenticated_client, finding_groups_fixture):
"""Test first_seen_at and last_seen_at are not null."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{"filter[inserted_at]": TODAY},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 2
for item in data:
attrs = item["attributes"]
assert attrs["first_seen_at"] is not None, "first_seen_at must not be null"
assert attrs["last_seen_at"] is not None, "last_seen_at must not be null"
def test_resources_filters_applied(
self, authenticated_client, finding_groups_fixture
):
"""Test that date filters work on resources endpoint."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{
"filter[inserted_at.gte]": today_after_n_days(-6),
"filter[inserted_at.lte]": today_after_n_days(0),
},
)
assert response.status_code == status.HTTP_200_OK
# Should still return the 2 resources within the date range
assert len(response.json()["data"]) == 2
# Test provider_id filter actually filters data
def test_finding_groups_provider_id_filter_actually_filters(
self, authenticated_client, finding_groups_fixture, providers_fixture
):
"""
Test that provider_id filter returns ONLY data from that provider.
This is a critical test - it verifies the filter doesn't just return 200 OK,
but actually restricts the data to the specified provider.
"""
provider1 = providers_fixture[0] # Has scan1 with 4 checks
provider2 = providers_fixture[1] # Has scan2 with 1 check (cloudtrail_enabled)
# Get ALL finding groups (without provider filter)
response_all = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY},
)
assert response_all.status_code == status.HTTP_200_OK
all_check_ids = {item["id"] for item in response_all.json()["data"]}
assert len(all_check_ids) == 5, "Should have 5 total check_ids"
# Get finding groups for provider1 only
response_p1 = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[provider_id]": str(provider1.id)},
)
assert response_p1.status_code == status.HTTP_200_OK
p1_check_ids = {item["id"] for item in response_p1.json()["data"]}
# Provider1 has scan1 with 4 checks
assert (
len(p1_check_ids) == 4
), f"Provider1 should have 4 checks, got {len(p1_check_ids)}"
assert (
"cloudtrail_enabled" not in p1_check_ids
), "cloudtrail_enabled should NOT be in provider1"
# Get finding groups for provider2 only
response_p2 = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[provider_id]": str(provider2.id)},
)
assert response_p2.status_code == status.HTTP_200_OK
p2_check_ids = {item["id"] for item in response_p2.json()["data"]}
# Provider2 has scan2 with 1 check
assert (
len(p2_check_ids) == 1
), f"Provider2 should have 1 check, got {len(p2_check_ids)}"
assert (
"cloudtrail_enabled" in p2_check_ids
), "cloudtrail_enabled should be in provider2"
# Test provider_type filter actually filters data
def test_finding_groups_provider_type_filter_actually_filters(
self, authenticated_client, finding_groups_fixture
):
"""
Test that provider_type filter returns ONLY data from that provider type.
"""
# All fixtures use AWS providers, so filtering by AWS should return all 5
response_aws = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[provider_type]": "aws"},
)
assert response_aws.status_code == status.HTTP_200_OK
assert len(response_aws.json()["data"]) == 5
# Filtering by GCP should return 0 (no GCP findings in fixture)
response_gcp = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "filter[provider_type]": "gcp"},
)
assert response_gcp.status_code == status.HTTP_200_OK
assert (
len(response_gcp.json()["data"]) == 0
), "GCP filter should return 0 results"
def test_finding_groups_pagination(
self, authenticated_client, finding_groups_fixture
):
"""Test pagination metadata and links."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "page[size]": 2},
)
assert response.status_code == status.HTTP_200_OK
# Should have pagination metadata
assert "meta" in response.json()
meta = response.json()["meta"]
assert "pagination" in meta
assert "count" in meta["pagination"]
def test_resources_pagination(self, authenticated_client, finding_groups_fixture):
"""Test pagination on resources endpoint."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{"filter[inserted_at]": TODAY, "page[size]": 1},
)
assert response.status_code == status.HTTP_200_OK
assert "meta" in response.json()
def test_finding_groups_ordering_default(
self, authenticated_client, finding_groups_fixture
):
"""Test default ordering (-fail_count, -severity, check_id)."""
response = authenticated_client.get(
reverse("finding-group-list"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# First results should have highest fail_count or critical severity
# s3_bucket_public_access has 2 fails with critical severity
assert data[0]["id"] in ["s3_bucket_public_access", "cloudtrail_enabled"]
def test_finding_groups_ordering_custom(
self, authenticated_client, finding_groups_fixture
):
"""Test custom sort parameter."""
response = authenticated_client.get(
reverse("finding-group-list"),
{"filter[inserted_at]": TODAY, "sort": "check_id"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# Results should be in alphabetical order by check_id
check_ids = [item["id"] for item in data]
assert check_ids == sorted(check_ids)
def test_finding_groups_latest_no_date_filter_required(
self, authenticated_client, finding_groups_fixture
):
"""Test that /latest endpoint works without date filters."""
response = authenticated_client.get(
reverse("finding-group-latest"),
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# Should return all 5 checks from the fixture
assert len(data) == 5
def test_finding_groups_latest_empty(self, authenticated_client):
"""Test /latest returns empty list when no summaries exist."""
response = authenticated_client.get(
reverse("finding-group-latest"),
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 0
def test_finding_groups_latest_provider_id_filter(
self, authenticated_client, finding_groups_fixture, providers_fixture
):
"""Test /latest with provider_id filter returns only that provider's data."""
provider1 = providers_fixture[0] # Has 4 checks
provider2 = providers_fixture[1] # Has 1 check
# Filter by provider1
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[provider_id]": str(provider1.id)},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 4
check_ids = {item["id"] for item in data}
assert "cloudtrail_enabled" not in check_ids
# Filter by provider2
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[provider_id]": str(provider2.id)},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["id"] == "cloudtrail_enabled"
def test_finding_groups_latest_provider_type_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest with provider_type filter."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[provider_type]": "aws"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# All providers in fixture are AWS
assert len(data) == 5
def test_finding_groups_latest_check_id_filter(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest with check_id filter."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[check_id]": "s3_bucket_public_access"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["id"] == "s3_bucket_public_access"
def test_finding_groups_latest_custom_sort(
self, authenticated_client, finding_groups_fixture
):
"""Test /latest with custom sort parameter."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"sort": "check_id"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
check_ids = [item["id"] for item in data]
assert check_ids == sorted(check_ids)
def test_finding_groups_latest_ignores_date_filters(
self, authenticated_client, finding_groups_fixture
):
"""Test that /latest ignores any date filters passed in params."""
# Even with an old date filter, /latest should return current data
response = authenticated_client.get(
reverse("finding-group-latest"),
{"filter[inserted_at]": "2020-01-01"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
# Should still return data, not filtered by the old date
assert len(data) == 5
+95
View File
@@ -4051,3 +4051,98 @@ class ResourceEventSerializer(BaseSerializerV1):
class Meta:
resource_name = "resource-events"
# Finding Groups - Virtual aggregation entities
class FindingGroupSerializer(BaseSerializerV1):
"""
Serializer for Finding Groups - aggregated findings by check_id.
This is a non-model serializer since FindingGroup is a virtual entity
created by aggregating the Finding model.
"""
id = serializers.CharField(source="check_id")
check_id = serializers.CharField()
check_title = serializers.CharField(required=False, allow_null=True)
check_description = serializers.CharField(required=False, allow_null=True)
severity = serializers.CharField()
status = serializers.CharField()
impacted_providers = serializers.ListField(
child=serializers.CharField(), required=False
)
resources_fail = serializers.IntegerField()
resources_total = serializers.IntegerField()
pass_count = serializers.IntegerField()
fail_count = serializers.IntegerField()
muted_count = serializers.IntegerField()
new_count = serializers.IntegerField()
changed_count = serializers.IntegerField()
first_seen_at = serializers.DateTimeField(required=False, allow_null=True)
last_seen_at = serializers.DateTimeField(required=False, allow_null=True)
failing_since = serializers.DateTimeField(required=False, allow_null=True)
class JSONAPIMeta:
resource_name = "finding-groups"
class FindingGroupResourceSerializer(BaseSerializerV1):
"""
Serializer for Finding Group Resources - resources within a finding group.
Returns individual resources with their current status, severity,
and timing information.
"""
id = serializers.UUIDField(source="resource_id")
resource = serializers.SerializerMethodField()
provider = serializers.SerializerMethodField()
status = serializers.CharField()
severity = serializers.CharField()
first_seen_at = serializers.DateTimeField(required=False, allow_null=True)
last_seen_at = serializers.DateTimeField(required=False, allow_null=True)
class JSONAPIMeta:
resource_name = "finding-group-resources"
@extend_schema_field(
{
"type": "object",
"properties": {
"uid": {"type": "string"},
"name": {"type": "string"},
"service": {"type": "string"},
"region": {"type": "string"},
"type": {"type": "string"},
},
}
)
def get_resource(self, obj):
"""Return nested resource object."""
return {
"uid": obj.get("resource_uid", ""),
"name": obj.get("resource_name", ""),
"service": obj.get("resource_service", ""),
"region": obj.get("resource_region", ""),
"type": obj.get("resource_type", ""),
}
@extend_schema_field(
{
"type": "object",
"properties": {
"type": {"type": "string"},
"uid": {"type": "string"},
"alias": {"type": "string"},
},
}
)
def get_provider(self, obj):
"""Return nested provider object."""
return {
"type": obj.get("provider_type", ""),
"uid": obj.get("provider_uid", ""),
"alias": obj.get("provider_alias", ""),
}
+2
View File
@@ -10,6 +10,7 @@ from api.v1.views import (
CustomTokenObtainView,
CustomTokenRefreshView,
CustomTokenSwitchTenantView,
FindingGroupViewSet,
FindingViewSet,
GithubSocialLoginView,
GoogleSocialLoginView,
@@ -60,6 +61,7 @@ router.register(
router.register(r"tasks", TaskViewSet, basename="task")
router.register(r"resources", ResourceViewSet, basename="resource")
router.register(r"findings", FindingViewSet, basename="finding")
router.register(r"finding-groups", FindingGroupViewSet, basename="finding-group")
router.register(r"roles", RoleViewSet, basename="role")
router.register(
r"compliance-overviews", ComplianceOverviewViewSet, basename="complianceoverview"
+668 -1
View File
@@ -24,7 +24,7 @@ from config.settings.social_login import (
)
from dj_rest_auth.registration.views import SocialLoginView
from django.conf import settings as django_settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.aggregates import ArrayAgg, StringAgg
from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import (
@@ -35,8 +35,10 @@ from django.db.models import (
F,
IntegerField,
Max,
Min,
Prefetch,
Q,
QuerySet,
Subquery,
Sum,
Value,
@@ -99,6 +101,7 @@ from api.compliance import (
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE,
get_compliance_frameworks,
)
from api.constants import SEVERITY_ORDER
from api.db_router import MainRouter
from api.db_utils import rls_transaction
from api.exceptions import (
@@ -117,10 +120,14 @@ from api.filters import (
CustomDjangoFilterBackend,
DailySeveritySummaryFilter,
FindingFilter,
FindingGroupFilter,
FindingGroupSummaryFilter,
IntegrationFilter,
IntegrationJiraFindingsFilter,
InvitationFilter,
LatestFindingFilter,
LatestFindingGroupFilter,
LatestFindingGroupSummaryFilter,
LatestResourceFilter,
LighthouseProviderConfigFilter,
LighthouseProviderModelsFilter,
@@ -149,6 +156,7 @@ from api.models import (
ComplianceRequirementOverview,
DailySeveritySummary,
Finding,
FindingGroupDailySummary,
Integration,
Invitation,
LighthouseConfiguration,
@@ -210,6 +218,8 @@ from api.v1.serializers import (
ComplianceOverviewSerializer,
ComplianceWatchlistOverviewSerializer,
FindingDynamicFilterSerializer,
FindingGroupResourceSerializer,
FindingGroupSerializer,
FindingMetadataSerializer,
FindingSerializer,
FindingsSeverityOverTimeSerializer,
@@ -6547,3 +6557,660 @@ class MuteRuleViewSet(BaseRLSViewSet):
data=serializer.data,
status=status.HTTP_201_CREATED,
)
SEVERITY_ORDER_REVERSE = {v: k for k, v in SEVERITY_ORDER.items()}
@extend_schema_view(
list=extend_schema(
summary="List finding groups",
description="""
Retrieve aggregated findings grouped by check_id.
Each group shows:
- Aggregated status (FAIL if any non-muted failure)
- Maximum severity across all findings
- Resource counts (failing vs total)
- Finding counts by status and delta
- Affected provider types
At least one date filter is required for performance reasons.
""",
tags=["Finding Groups"],
),
retrieve=extend_schema(exclude=True),
)
class FindingGroupViewSet(BaseRLSViewSet):
"""
ViewSet for Finding Groups - aggregates findings by check_id.
This endpoint provides a summary view of security checks, aggregating
metrics across all findings for each unique check_id. This enables
security analysts to see which checks are failing across their
infrastructure without scrolling through thousands of individual findings.
Uses pre-aggregated FindingGroupDailySummary table for efficient queries.
Daily summaries are re-aggregated across the requested date range.
"""
queryset = FindingGroupDailySummary.objects.all()
serializer_class = FindingGroupSerializer
filterset_class = FindingGroupSummaryFilter
http_method_names = ["get"]
required_permissions = []
def get_filterset_class(self):
"""Return appropriate filter based on action."""
if self.action == "latest":
return LatestFindingGroupSummaryFilter
return FindingGroupSummaryFilter
def get_queryset(self):
"""Get the base FindingGroupDailySummary queryset with RLS filtering."""
tenant_id = self.request.tenant_id
role = get_role(self.request.user)
queryset = FindingGroupDailySummary.objects.filter(tenant_id=tenant_id)
if not role.unlimited_visibility:
queryset = queryset.filter(provider__in=get_providers(role))
return queryset
def _get_finding_queryset(self):
"""Get the Finding queryset for resources drill-down (with RBAC)."""
role = get_role(self.request.user)
providers = get_providers(role)
tenant_id = self.request.tenant_id
queryset = Finding.all_objects.filter(tenant_id=tenant_id)
# Apply RBAC provider filtering
if not role.unlimited_visibility:
queryset = queryset.filter(scan__provider_id__in=providers)
return queryset
def _normalize_jsonapi_params(self, query_params):
"""Convert JSON:API filter params (filter[X]) to flat params (X)."""
normalized = QueryDict(mutable=True)
for key, values in query_params.lists():
normalized_key = (
key[7:-1] if key.startswith("filter[") and key.endswith("]") else key
)
# Convert JSON:API dot notation to Django double underscore
normalized_key = normalized_key.replace(".", "__")
normalized.setlist(normalized_key, values)
return normalized
@extend_schema(exclude=True)
def retrieve(self, request, *args, **kwargs):
raise MethodNotAllowed(method="GET")
RESOURCE_FILTER_MAP = {
"resources": "id__in",
"resource_uid": "uid",
"resource_uid__in": "uid__in",
"resource_uid__icontains": "uid__icontains",
"resource_name": "name",
"resource_name__in": "name__in",
"resource_name__icontains": "name__icontains",
"resource_type": "type",
"resource_type__in": "type__in",
"resource_type__icontains": "type__icontains",
}
def _split_resource_filters(self, params: QueryDict) -> tuple[QueryDict, QueryDict]:
resource_keys = set(self.RESOURCE_FILTER_MAP)
finding_params = QueryDict(mutable=True)
resource_params = QueryDict(mutable=True)
for key, values in params.lists():
if key in resource_keys:
resource_params.setlist(key, values)
else:
finding_params.setlist(key, values)
return finding_params, resource_params
def _resource_ids_from_params(
self, params: QueryDict, tenant_id: str | None
) -> QuerySet | None:
if not params:
return None
queryset = Resource.objects.all()
if tenant_id:
queryset = queryset.filter(tenant_id=tenant_id)
filter_params = QueryDict(mutable=True)
for key, mapped_key in self.RESOURCE_FILTER_MAP.items():
if key not in params:
continue
if key == "resources" or key.endswith("__in"):
values = params.getlist(key)
items: list[str] = []
for value in values:
if value is None:
continue
for part in value.split(","):
part = part.strip()
if part:
items.append(part)
if items:
filter_params.setlist(mapped_key, [",".join(items)])
else:
value = params.get(key)
if value:
filter_params.setlist(mapped_key, [value])
if not filter_params:
return None
filterset = LatestResourceFilter(filter_params, queryset=queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
return filterset.qs.values("id")
def _aggregate_daily_summaries(self, queryset):
"""
Re-aggregate daily summaries across the date range.
Takes pre-computed daily summaries and aggregates them by check_id
to produce totals across the selected date range.
"""
from django.db.models import CharField
from django.db.models.functions import Cast
return queryset.values("check_id").annotate(
# Max severity across days
severity_order=Max("severity_order"),
# Sum counts across days
pass_count=Sum("pass_count"),
fail_count=Sum("fail_count"),
muted_count=Sum("muted_count"),
new_count=Sum("new_count"),
changed_count=Sum("changed_count"),
resources_total=Sum("resources_total"),
resources_fail=Sum("resources_fail"),
# Collect provider types using StringAgg (cast enum to text first)
impacted_providers_str=StringAgg(
Cast("provider__provider", CharField()),
delimiter=",",
distinct=True,
default="",
),
# Min/Max timing across days
first_seen_at=Min("first_seen_at"),
last_seen_at=Max("last_seen_at"),
failing_since=Min("failing_since"),
# Get check metadata from first row (same for all days)
check_title=Max("check_title"),
check_description=Max("check_description"),
)
def _post_process_aggregation(self, aggregated_data):
"""
Post-process aggregation results to add computed fields.
- Converts severity integer back to string
- Computes aggregated status (FAIL > PASS > MUTED)
- Converts provider string to list
"""
results = []
for row in aggregated_data:
# Convert severity order back to string
severity_order = row.get("severity_order", 1)
row["severity"] = SEVERITY_ORDER_REVERSE.get(
severity_order, "informational"
)
# Compute aggregated status
if row.get("fail_count", 0) > 0:
row["status"] = "FAIL"
elif row.get("pass_count", 0) > 0:
row["status"] = "PASS"
else:
row["status"] = "MUTED"
# Convert provider string to list
providers_str = row.pop("impacted_providers_str", "") or ""
row["impacted_providers"] = [
p.strip() for p in providers_str.split(",") if p.strip()
]
results.append(row)
return results
def _validate_sort_fields(self, sort_param):
"""Validate and map JSON:API sort fields for aggregated finding groups."""
sort_field_map = {
"check_id": "check_id",
"severity": "severity_order",
"fail_count": "fail_count",
"pass_count": "pass_count",
"muted_count": "muted_count",
"new_count": "new_count",
"changed_count": "changed_count",
"resources_total": "resources_total",
"resources_fail": "resources_fail",
"first_seen_at": "first_seen_at",
"last_seen_at": "last_seen_at",
"failing_since": "failing_since",
}
ordering = []
for field in sort_param.split(","):
field = field.strip()
if not field:
continue
is_desc = field.startswith("-")
raw_field = field[1:] if is_desc else field
if raw_field not in sort_field_map:
# Validate sort fields explicitly to return JSON:API 400 instead of FieldError.
raise ValidationError(
[
{
"detail": f"invalid sort parameter: {raw_field}",
"status": "400",
"source": {"pointer": "/data"},
"code": "invalid",
}
]
)
mapped_field = sort_field_map[raw_field]
ordering.append(f"-{mapped_field}" if is_desc else mapped_field)
return ordering
def _build_resource_mapping_queryset(
self, filtered_queryset, resource_ids=None, tenant_id: str | None = None
):
"""
Build resource mapping queryset using a filtered findings subquery.
Starting from ResourceFindingMapping avoids scanning all mappings
before applying check_id/date filters on findings.
"""
finding_ids = filtered_queryset.order_by().values("id")
mapping_queryset = ResourceFindingMapping.objects.filter(
finding_id__in=Subquery(finding_ids)
)
if tenant_id:
mapping_queryset = mapping_queryset.filter(tenant_id=tenant_id)
if resource_ids is not None:
if isinstance(resource_ids, QuerySet):
mapping_queryset = mapping_queryset.filter(
resource_id__in=Subquery(resource_ids)
)
else:
mapping_queryset = mapping_queryset.filter(resource_id__in=resource_ids)
return mapping_queryset
def _build_resource_aggregation(
self, filtered_queryset, resource_ids=None, tenant_id: str | None = None
):
"""Build resource aggregation using a filtered findings subquery."""
mapping_queryset = self._build_resource_mapping_queryset(
filtered_queryset, resource_ids=resource_ids, tenant_id=tenant_id
)
return (
mapping_queryset.values("resource_id")
.annotate(
resource_uid=Max("resource__uid"),
resource_name=Max("resource__name"),
resource_service=Max("resource__service"),
resource_region=Max("resource__region"),
resource_type=Max("resource__type"),
provider_type=Max("resource__provider__provider"),
provider_uid=Max("resource__provider__uid"),
provider_alias=Max("resource__provider__alias"),
status_order=Max(
Case(
When(
finding__status="FAIL",
finding__muted=False,
then=Value(3),
),
When(
finding__status="PASS",
finding__muted=False,
then=Value(2),
),
default=Value(1),
output_field=IntegerField(),
)
),
severity_order=Max(
Case(
*[
When(finding__severity=severity, then=Value(order))
for severity, order in SEVERITY_ORDER.items()
],
output_field=IntegerField(),
)
),
first_seen_at=Min("finding__first_seen_at"),
last_seen_at=Max("finding__inserted_at"),
)
.filter(resource_id__isnull=False)
.order_by("resource_id")
)
def _post_process_resources(self, resource_data):
"""Convert resource aggregation rows to API output."""
results = []
for row in resource_data:
severity_order = row.get("severity_order", 1)
status_order = row.get("status_order", 1)
if status_order == 3:
status = "FAIL"
elif status_order == 2:
status = "PASS"
else:
status = "MUTED"
results.append(
{
"resource_id": row["resource_id"],
"resource_uid": row["resource_uid"],
"resource_name": row["resource_name"],
"resource_service": row["resource_service"],
"resource_region": row["resource_region"],
"resource_type": row["resource_type"],
"provider_type": row["provider_type"],
"provider_uid": row["provider_uid"],
"provider_alias": row["provider_alias"],
"status": status,
"severity": SEVERITY_ORDER_REVERSE.get(
severity_order, "informational"
),
"first_seen_at": row["first_seen_at"],
"last_seen_at": row["last_seen_at"],
}
)
return results
def list(self, request, *args, **kwargs):
"""
List finding groups with aggregation and filtering.
Returns findings grouped by check_id with aggregated metrics.
Requires at least one date filter for performance.
Uses pre-aggregated daily summaries for efficient queries.
"""
queryset = self.get_queryset()
# Apply filters
normalized_params = self._normalize_jsonapi_params(request.query_params)
filterset = self.filterset_class(normalized_params, queryset=queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
# Re-aggregate daily summaries across the date range
aggregated_queryset = self._aggregate_daily_summaries(filtered_queryset)
# Apply ordering (respect JSON:API sort param or use default)
sort_param = request.query_params.get("sort")
if sort_param:
# Convert JSON:API sort notation (prefix '-' for descending)
ordering = self._validate_sort_fields(sort_param)
if ordering:
aggregated_queryset = aggregated_queryset.order_by(*ordering)
else:
# Default ordering: failures first, then severity, then check_id
aggregated_queryset = aggregated_queryset.order_by(
"-fail_count", "-severity_order", "check_id"
)
# Paginate
page = self.paginate_queryset(aggregated_queryset)
if page is not None:
# Post-process the page
processed_data = self._post_process_aggregation(page)
serializer = self.get_serializer(processed_data, many=True)
return self.get_paginated_response(serializer.data)
# Post-process all results (no pagination)
processed_data = self._post_process_aggregation(aggregated_queryset)
serializer = self.get_serializer(processed_data, many=True)
return Response(serializer.data)
@extend_schema(
summary="List latest finding groups",
description="""
Retrieve the latest available state for each finding group (check_id).
This endpoint returns finding groups without requiring date filters,
automatically using the latest available data per check_id.
All other filters (provider_id, provider_type, check_id) are still supported.
""",
tags=["Finding Groups"],
)
@action(detail=False, methods=["get"], url_name="latest")
def latest(self, request):
"""
List the latest finding group state per check_id.
Returns findings grouped by check_id using the latest available
inserted_at date per check_id, without requiring date filters.
"""
queryset = self.get_queryset()
# Apply other filters (provider_id, provider_type, check_id, etc.)
normalized_params = self._normalize_jsonapi_params(request.query_params)
# Remove date filters since we're using latest
for key in list(normalized_params.keys()):
if key.startswith("inserted_at"):
del normalized_params[key]
filterset_class = self.get_filterset_class()
filterset = filterset_class(normalized_params, queryset=queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
# Keep only rows from the latest inserted_at date per check_id
latest_per_check = filtered_queryset.annotate(
latest_inserted_at=Window(
expression=Max("inserted_at"),
partition_by=[F("check_id")],
)
).filter(inserted_at=F("latest_inserted_at"))
# Re-aggregate daily summaries
aggregated_queryset = self._aggregate_daily_summaries(latest_per_check)
# Apply ordering
sort_param = request.query_params.get("sort")
if sort_param:
ordering = self._validate_sort_fields(sort_param)
if ordering:
aggregated_queryset = aggregated_queryset.order_by(*ordering)
else:
aggregated_queryset = aggregated_queryset.order_by(
"-fail_count", "-severity_order", "check_id"
)
# Paginate
page = self.paginate_queryset(aggregated_queryset)
if page is not None:
processed_data = self._post_process_aggregation(page)
serializer = self.get_serializer(processed_data, many=True)
return self.get_paginated_response(serializer.data)
processed_data = self._post_process_aggregation(aggregated_queryset)
serializer = self.get_serializer(processed_data, many=True)
return Response(serializer.data)
@extend_schema(
summary="List resources for a finding group",
description="""
Retrieve resources affected by a specific check (finding group).
Returns individual resources with their current status, severity,
and timing information including how long they have been failing.
""",
tags=["Finding Groups"],
)
@action(detail=True, methods=["get"], url_path="resources")
def resources(self, request, pk=None):
"""
List resources for a specific finding group (check_id).
Returns resources with their status, severity, and provider info
for the specified check_id. Uses Finding table for resource details.
"""
check_id = pk
queryset = self._get_finding_queryset()
# Apply date filters from request to Finding queryset
normalized_params = self._normalize_jsonapi_params(request.query_params)
finding_params, resource_params = self._split_resource_filters(
normalized_params
)
filterset = FindingGroupFilter(finding_params, queryset=queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
# Filter by check_id
filtered_queryset = filtered_queryset.filter(check_id=check_id)
# Check if any findings exist for this check_id
if not filtered_queryset.exists():
raise NotFound(f"Finding group '{check_id}' not found.")
resource_ids = self._resource_ids_from_params(
resource_params, request.tenant_id
)
mapping_queryset = self._build_resource_mapping_queryset(
filtered_queryset,
resource_ids=resource_ids,
tenant_id=request.tenant_id,
)
resource_id_queryset = (
mapping_queryset.values_list("resource_id", flat=True)
.distinct()
.order_by("resource_id")
)
page_ids = self.paginate_queryset(resource_id_queryset)
if page_ids is not None:
resource_data = self._build_resource_aggregation(
filtered_queryset,
resource_ids=page_ids,
tenant_id=request.tenant_id,
)
results = self._post_process_resources(resource_data)
serializer = FindingGroupResourceSerializer(results, many=True)
return self.get_paginated_response(serializer.data)
resource_data = self._build_resource_aggregation(
filtered_queryset,
resource_ids=resource_ids,
tenant_id=request.tenant_id,
)
results = self._post_process_resources(resource_data)
serializer = FindingGroupResourceSerializer(results, many=True)
return Response(serializer.data)
@extend_schema(
summary="List resources for a finding group from latest scans",
description="""
Retrieve resources affected by a specific check (finding group) from the
latest completed scan for each provider.
Returns individual resources with their current status, severity,
and timing information. No date filters required.
""",
tags=["Finding Groups"],
)
@action(
detail=False,
methods=["get"],
url_path="latest/(?P<check_id>[^/.]+)/resources",
url_name="latest_resources",
)
def latest_resources(self, request, check_id=None):
"""
List resources for a specific finding group from the latest scan.
Similar to `resources` but automatically filters to only include
findings from the most recent completed scan for each provider.
"""
tenant_id = request.tenant_id
queryset = self._get_finding_queryset()
# Get latest completed scan for each provider
latest_scan_ids = (
Scan.objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
)
normalized_params = self._normalize_jsonapi_params(request.query_params)
# Remove date filters since we're using latest
for key in list(normalized_params.keys()):
if key.startswith("inserted_at"):
del normalized_params[key]
finding_params, resource_params = self._split_resource_filters(
normalized_params
)
filterset = LatestFindingGroupFilter(finding_params, queryset=queryset)
if not filterset.is_valid():
raise ValidationError(filterset.errors)
filtered_queryset = filterset.qs
# Filter to latest scans and check_id
filtered_queryset = filtered_queryset.filter(
scan_id__in=latest_scan_ids,
check_id=check_id,
)
# Check if any findings exist for this check_id
if not filtered_queryset.exists():
raise NotFound(f"Finding group '{check_id}' not found.")
resource_ids = self._resource_ids_from_params(
resource_params, request.tenant_id
)
mapping_queryset = self._build_resource_mapping_queryset(
filtered_queryset,
resource_ids=resource_ids,
tenant_id=request.tenant_id,
)
resource_id_queryset = (
mapping_queryset.values_list("resource_id", flat=True)
.distinct()
.order_by("resource_id")
)
page_ids = self.paginate_queryset(resource_id_queryset)
if page_ids is not None:
resource_data = self._build_resource_aggregation(
filtered_queryset,
resource_ids=page_ids,
tenant_id=request.tenant_id,
)
results = self._post_process_resources(resource_data)
serializer = FindingGroupResourceSerializer(results, many=True)
return self.get_paginated_response(serializer.data)
resource_data = self._build_resource_aggregation(
filtered_queryset,
resource_ids=resource_ids,
tenant_id=request.tenant_id,
)
results = self._post_process_resources(resource_data)
serializer = FindingGroupResourceSerializer(results, many=True)
return Response(serializer.data)
+277 -4
View File
@@ -678,21 +678,25 @@ def scans_fixture(tenants_fixture, providers_fixture):
tenant, *_ = tenants_fixture
provider, provider2, *_ = providers_fixture
now = datetime.now(timezone.utc)
scan1 = Scan.objects.create(
name="Scan 1",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
started_at="2024-01-02T00:00:00Z",
started_at=now,
completed_at=now,
)
scan2 = Scan.objects.create(
name="Scan 2",
provider=provider,
provider=provider2,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.FAILED,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
started_at="2024-01-02T00:00:00Z",
started_at=now,
completed_at=now,
)
scan3 = Scan.objects.create(
name="Scan 3",
@@ -1954,6 +1958,275 @@ def tenant_compliance_summary_fixture(tenants_fixture):
return summaries
@pytest.fixture
def finding_groups_fixture(
tenants_fixture, providers_fixture, scans_fixture, resources_fixture
):
"""
Create a comprehensive set of findings for testing Finding Groups aggregation.
Creates findings for multiple check_ids with varying:
- Statuses (PASS, FAIL)
- Severities (critical, high, medium, low)
- Deltas (new, changed, None)
- Muted states (True, False)
This fixture tests aggregation logic for:
- Multiple findings per check_id
- Status aggregation (FAIL > PASS > MUTED)
- Severity aggregation (max severity)
- Provider aggregation (distinct list)
- Resource counts
- Finding counts (pass, fail, muted, new, changed)
"""
tenant = tenants_fixture[0]
provider1, provider2, *_ = providers_fixture
scan1, scan2, *_ = scans_fixture
resource1, resource2, *_ = resources_fixture
findings = []
# Check 1: s3_bucket_public_access - Multiple FAIL findings (critical)
# Should aggregate to: status=FAIL, severity=critical, fail_count=2, pass_count=0
finding1a = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_s3_check_1a",
scan=scan1,
delta="new",
status=Status.FAIL,
status_extended="S3 bucket allows public access",
impact=Severity.critical,
impact_extended="Critical security risk",
severity=Severity.critical,
raw_result={"status": Status.FAIL, "severity": Severity.critical},
tags={"env": "prod"},
check_id="s3_bucket_public_access",
check_metadata={
"CheckId": "s3_bucket_public_access",
"checktitle": "Ensure S3 buckets do not allow public access",
"Description": "S3 buckets should be configured to restrict public access.",
},
first_seen_at="2024-01-02T00:00:00Z",
muted=False,
)
finding1a.add_resources([resource1])
findings.append(finding1a)
finding1b = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_s3_check_1b",
scan=scan1,
delta="changed",
status=Status.FAIL,
status_extended="S3 bucket allows public read",
impact=Severity.high,
impact_extended="High security risk",
severity=Severity.high,
raw_result={"status": Status.FAIL, "severity": Severity.high},
tags={"env": "staging"},
check_id="s3_bucket_public_access",
check_metadata={
"CheckId": "s3_bucket_public_access",
"checktitle": "Ensure S3 buckets do not allow public access",
"Description": "S3 buckets should be configured to restrict public access.",
},
first_seen_at="2024-01-03T00:00:00Z",
muted=False,
)
finding1b.add_resources([resource2])
findings.append(finding1b)
# Check 2: ec2_instance_public_ip - Mixed PASS/FAIL (high severity max)
# Should aggregate to: status=FAIL, severity=high, fail_count=1, pass_count=1
finding2a = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_ec2_check_2a",
scan=scan1,
delta=None,
status=Status.PASS,
status_extended="EC2 instance has no public IP",
impact=Severity.medium,
impact_extended="Medium risk",
severity=Severity.medium,
raw_result={"status": Status.PASS, "severity": Severity.medium},
tags={"env": "dev"},
check_id="ec2_instance_public_ip",
check_metadata={
"CheckId": "ec2_instance_public_ip",
"checktitle": "Ensure EC2 instances do not have public IPs",
"Description": "EC2 instances should use private IPs only.",
},
first_seen_at="2024-01-04T00:00:00Z",
muted=False,
)
finding2a.add_resources([resource1])
findings.append(finding2a)
finding2b = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_ec2_check_2b",
scan=scan1,
delta="new",
status=Status.FAIL,
status_extended="EC2 instance has public IP assigned",
impact=Severity.high,
impact_extended="High risk",
severity=Severity.high,
raw_result={"status": Status.FAIL, "severity": Severity.high},
tags={"env": "prod"},
check_id="ec2_instance_public_ip",
check_metadata={
"CheckId": "ec2_instance_public_ip",
"checktitle": "Ensure EC2 instances do not have public IPs",
"Description": "EC2 instances should use private IPs only.",
},
first_seen_at="2024-01-05T00:00:00Z",
muted=False,
)
finding2b.add_resources([resource2])
findings.append(finding2b)
# Check 3: iam_password_policy - All PASS (low severity)
# Should aggregate to: status=PASS, severity=low, fail_count=0, pass_count=2
finding3a = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_iam_check_3a",
scan=scan1,
delta=None,
status=Status.PASS,
status_extended="Password policy is compliant",
impact=Severity.low,
impact_extended="Low risk",
severity=Severity.low,
raw_result={"status": Status.PASS, "severity": Severity.low},
tags={"env": "prod"},
check_id="iam_password_policy",
check_metadata={
"CheckId": "iam_password_policy",
"checktitle": "Ensure IAM password policy is strong",
"Description": "IAM password policy should enforce complexity.",
},
first_seen_at="2024-01-06T00:00:00Z",
muted=False,
)
finding3a.add_resources([resource1])
findings.append(finding3a)
finding3b = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_iam_check_3b",
scan=scan1,
delta=None,
status=Status.PASS,
status_extended="Password policy meets requirements",
impact=Severity.low,
impact_extended="Low risk",
severity=Severity.low,
raw_result={"status": Status.PASS, "severity": Severity.low},
tags={"env": "staging"},
check_id="iam_password_policy",
check_metadata={
"CheckId": "iam_password_policy",
"checktitle": "Ensure IAM password policy is strong",
"Description": "IAM password policy should enforce complexity.",
},
first_seen_at="2024-01-07T00:00:00Z",
muted=False,
)
finding3b.add_resources([resource2])
findings.append(finding3b)
# Check 4: rds_encryption - All muted (medium severity)
# Should aggregate to: status=MUTED, severity=medium, fail_count=0, pass_count=0, muted_count=2
finding4a = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_rds_check_4a",
scan=scan1,
delta=None,
status=Status.FAIL,
status_extended="RDS instance not encrypted",
impact=Severity.medium,
impact_extended="Medium risk",
severity=Severity.medium,
raw_result={"status": Status.FAIL, "severity": Severity.medium},
tags={"env": "dev"},
check_id="rds_encryption",
check_metadata={
"CheckId": "rds_encryption",
"checktitle": "Ensure RDS instances are encrypted",
"Description": "RDS instances should use encryption at rest.",
},
first_seen_at="2024-01-08T00:00:00Z",
muted=True,
)
finding4a.add_resources([resource1])
findings.append(finding4a)
finding4b = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_rds_check_4b",
scan=scan1,
delta=None,
status=Status.FAIL,
status_extended="RDS encryption disabled",
impact=Severity.medium,
impact_extended="Medium risk",
severity=Severity.medium,
raw_result={"status": Status.FAIL, "severity": Severity.medium},
tags={"env": "test"},
check_id="rds_encryption",
check_metadata={
"CheckId": "rds_encryption",
"checktitle": "Ensure RDS instances are encrypted",
"Description": "RDS instances should use encryption at rest.",
},
first_seen_at="2024-01-09T00:00:00Z",
muted=True,
)
finding4b.add_resources([resource2])
findings.append(finding4b)
# Check 5: cloudtrail_enabled - Multiple providers (from scan2 which uses provider2)
# Should aggregate to: impacted_providers contains both provider types
finding5 = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_cloudtrail_check_5",
scan=scan2,
delta="new",
status=Status.FAIL,
status_extended="CloudTrail not enabled",
impact=Severity.critical,
impact_extended="Critical risk",
severity=Severity.critical,
raw_result={"status": Status.FAIL, "severity": Severity.critical},
tags={"env": "prod"},
check_id="cloudtrail_enabled",
check_metadata={
"CheckId": "cloudtrail_enabled",
"checktitle": "Ensure CloudTrail is enabled",
"Description": "CloudTrail should be enabled for audit logging.",
},
first_seen_at="2024-01-10T00:00:00Z",
muted=False,
)
finding5.add_resources([resource1])
findings.append(finding5)
# Aggregate findings into FindingGroupDailySummary for the endpoint to read
from tasks.jobs.scan import aggregate_finding_group_summaries
aggregate_finding_group_summaries(
tenant_id=str(tenant.id),
scan_id=str(scan1.id),
)
aggregate_finding_group_summaries(
tenant_id=str(tenant.id),
scan_id=str(scan2.id),
)
return findings
def pytest_collection_modifyitems(items):
"""Ensure test_rbac.py is executed first."""
items.sort(key=lambda item: 0 if "test_rbac.py" in item.nodeid else 1)
+84 -1
View File
@@ -8,7 +8,11 @@ from tasks.jobs.queries import (
COMPLIANCE_UPSERT_PROVIDER_SCORE_SQL,
COMPLIANCE_UPSERT_TENANT_SUMMARY_ALL_SQL,
)
from tasks.jobs.scan import aggregate_category_counts, aggregate_resource_group_counts
from tasks.jobs.scan import (
aggregate_category_counts,
aggregate_finding_group_summaries,
aggregate_resource_group_counts,
)
from api.db_router import READ_REPLICA_ALIAS, MainRouter
from api.db_utils import (
@@ -552,3 +556,82 @@ def backfill_provider_compliance_scores(tenant_id: str) -> dict:
"total_upserted": total_upserted,
"tenant_summary_count": tenant_summary_count,
}
def backfill_finding_group_summaries(tenant_id: str, days: int = None):
"""
Backfill FindingGroupDailySummary from completed scans.
Iterates over completed scans and aggregates findings by check_id
to create daily summary records.
Args:
tenant_id: Tenant that owns the scans.
days: Optional limit on how many days back to backfill.
Returns:
dict: Statistics about the backfill operation.
"""
scans_processed = 0
scans_skipped = 0
total_created = 0
total_updated = 0
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
scan_filter = {
"tenant_id": tenant_id,
"state": StateChoices.COMPLETED,
"completed_at__isnull": False,
}
if days is not None:
cutoff_date = timezone.now() - timedelta(days=days)
scan_filter["completed_at__gte"] = cutoff_date
completed_scans = (
Scan.objects.filter(**scan_filter)
.order_by("-completed_at")
.values("id", "completed_at")
)
if not completed_scans:
return {"status": "no scans to backfill"}
# Keep only latest scan per day
latest_scans_by_day = {}
for scan in completed_scans:
key = scan["completed_at"].date()
if key not in latest_scans_by_day:
latest_scans_by_day[key] = scan
# Process each day's scan
for scan_date, scan in latest_scans_by_day.items():
scan_id = str(scan["id"])
try:
result = aggregate_finding_group_summaries(tenant_id, scan_id)
if result.get("status") == "completed":
scans_processed += 1
total_created += result.get("created", 0)
total_updated += result.get("updated", 0)
else:
scans_skipped += 1
except Exception as e:
logger.warning(
f"Failed to backfill finding group summaries for scan {scan_id}: {e}"
)
scans_skipped += 1
logger.info(
f"Backfilled finding group summaries for tenant {tenant_id}: "
f"{scans_processed} scans processed, {scans_skipped} skipped, "
f"{total_created} created, {total_updated} updated"
)
return {
"status": "backfilled",
"scans_processed": scans_processed,
"scans_skipped": scans_skipped,
"total_created": total_created,
"total_updated": total_updated,
}
+192 -1
View File
@@ -13,7 +13,8 @@ from celery.utils.log import get_task_logger
from config.env import env
from config.settings.celery import CELERY_DEADLOCK_ATTEMPTS
from django.db import IntegrityError, OperationalError
from django.db.models import Case, Count, IntegerField, Prefetch, Q, Sum, When
from django.db.models import Case, Count, IntegerField, Max, Min, Prefetch, Q, Sum, When
from django.utils import timezone as django_timezone
from tasks.jobs.queries import (
COMPLIANCE_UPSERT_PROVIDER_SCORE_SQL,
COMPLIANCE_UPSERT_TENANT_SUMMARY_SQL,
@@ -21,6 +22,7 @@ from tasks.jobs.queries import (
from tasks.utils import CustomEncoder
from api.compliance import PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE
from api.constants import SEVERITY_ORDER
from api.db_router import READ_REPLICA_ALIAS, MainRouter
from api.db_utils import (
POSTGRES_TENANT_VAR,
@@ -36,6 +38,7 @@ from api.models import (
ComplianceRequirementOverview,
DailySeveritySummary,
Finding,
FindingGroupDailySummary,
MuteRule,
Processor,
Provider,
@@ -1746,3 +1749,191 @@ def update_provider_compliance_scores(tenant_id: str, scan_id: str):
f"Error updating provider compliance scores for scan {scan_id}: {e}"
)
raise
def aggregate_finding_group_summaries(tenant_id: str, scan_id: str):
"""
Aggregate finding group summaries for a completed scan.
Creates or updates FindingGroupDailySummary records for each unique check_id
found in the scan's findings. These pre-aggregated summaries enable efficient
queries over date ranges without scanning millions of findings.
Args:
tenant_id: Tenant that owns the scan.
scan_id: Scan UUID whose findings should be aggregated.
Returns:
dict: Statistics about the aggregation operation.
"""
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
scan = Scan.objects.filter(
tenant_id=tenant_id,
id=scan_id,
state=StateChoices.COMPLETED,
).first()
if not scan:
logger.warning(
f"Scan {scan_id} not found or not completed for finding group summary"
)
return {"status": "skipped", "reason": "scan not completed"}
if not scan.provider:
logger.warning(f"Scan {scan_id} has no provider for finding group summary")
return {"status": "skipped", "reason": "scan has no provider"}
summary_timestamp = scan.completed_at
if django_timezone.is_naive(summary_timestamp):
summary_timestamp = django_timezone.make_aware(
summary_timestamp, timezone.utc
)
summary_timestamp = summary_timestamp.replace(
hour=0, minute=0, second=0, microsecond=0
)
provider_id = scan.provider_id
# Build severity Case/When expression
severity_case = Case(
*[
When(severity=severity, then=order)
for severity, order in SEVERITY_ORDER.items()
],
output_field=IntegerField(),
)
# Aggregate findings by check_id for this scan
aggregated = (
Finding.objects.filter(
tenant_id=tenant_id,
scan_id=scan_id,
)
.values("check_id")
.annotate(
severity_order=Max(severity_case),
pass_count=Count("id", filter=Q(status="PASS", muted=False)),
fail_count=Count("id", filter=Q(status="FAIL", muted=False)),
muted_count=Count("id", filter=Q(muted=True)),
new_count=Count("id", filter=Q(delta="new", muted=False)),
changed_count=Count("id", filter=Q(delta="changed", muted=False)),
resources_total=Count("resources__id", distinct=True),
resources_fail=Count(
"resources__id",
distinct=True,
filter=Q(status="FAIL", muted=False),
),
# Use prefixed names to avoid conflict with model field names
agg_first_seen_at=Min("first_seen_at"),
agg_last_seen_at=Max("inserted_at"),
agg_failing_since=Min(
"first_seen_at", filter=Q(status="FAIL", muted=False)
),
)
)
# Force evaluate queryset while inside RLS transaction (prevents lazy re-query issues)
aggregated_list = list(aggregated)
# Fetch check metadata for all check_ids in one query
check_ids = [row["check_id"] for row in aggregated_list]
check_metadata_map = {}
if check_ids:
findings_with_metadata = (
Finding.objects.filter(
tenant_id=tenant_id,
scan_id=scan_id,
check_id__in=check_ids,
)
.order_by("check_id")
.distinct("check_id")
.values("check_id", "check_metadata")
)
for f in findings_with_metadata:
if f["check_id"] not in check_metadata_map and f["check_metadata"]:
check_metadata_map[f["check_id"]] = f["check_metadata"]
# Upsert summaries in bulk for performance
created_count = 0
updated_count = 0
with rls_transaction(tenant_id):
check_ids = [row["check_id"] for row in aggregated_list]
existing_check_ids = set()
if check_ids:
existing_check_ids = set(
FindingGroupDailySummary.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
check_id__in=check_ids,
inserted_at=summary_timestamp,
).values_list("check_id", flat=True)
)
created_count = len(check_ids) - len(existing_check_ids)
updated_count = len(existing_check_ids)
summaries_to_upsert = []
updated_at = django_timezone.now()
for row in aggregated_list:
check_id = row["check_id"]
metadata = check_metadata_map.get(check_id, {})
summaries_to_upsert.append(
FindingGroupDailySummary(
tenant_id=tenant_id,
provider_id=provider_id,
check_id=check_id,
inserted_at=summary_timestamp,
updated_at=updated_at,
check_title=metadata.get("checktitle", ""),
check_description=metadata.get("Description", ""),
severity_order=row["severity_order"] or 1,
pass_count=row["pass_count"],
fail_count=row["fail_count"],
muted_count=row["muted_count"],
new_count=row["new_count"],
changed_count=row["changed_count"],
resources_total=row["resources_total"],
resources_fail=row["resources_fail"],
first_seen_at=row["agg_first_seen_at"],
last_seen_at=row["agg_last_seen_at"],
failing_since=row["agg_failing_since"],
)
)
if summaries_to_upsert:
FindingGroupDailySummary.objects.bulk_create(
summaries_to_upsert,
update_conflicts=True,
unique_fields=["tenant_id", "provider", "check_id", "inserted_at"],
update_fields=[
"check_title",
"check_description",
"severity_order",
"pass_count",
"fail_count",
"muted_count",
"new_count",
"changed_count",
"resources_total",
"resources_fail",
"first_seen_at",
"last_seen_at",
"failing_since",
"updated_at",
],
)
logger.info(
f"Finding group summaries aggregated for scan {scan_id}: "
f"{created_count} created, {updated_count} updated"
)
return {
"status": "completed",
"scan_id": str(scan_id),
"date": str(summary_timestamp.date()),
"created": created_count,
"updated": updated_count,
}
+19
View File
@@ -16,6 +16,7 @@ from tasks.jobs.attack_paths import (
from tasks.jobs.backfill import (
backfill_compliance_summaries,
backfill_daily_severity_summaries,
backfill_finding_group_summaries,
backfill_provider_compliance_scores,
backfill_resource_scan_summaries,
backfill_scan_category_summaries,
@@ -48,6 +49,7 @@ from tasks.jobs.report import generate_compliance_reports_job
from tasks.jobs.scan import (
aggregate_attack_surface,
aggregate_daily_severity,
aggregate_finding_group_summaries,
aggregate_findings,
create_compliance_requirements,
perform_prowler_scan,
@@ -145,6 +147,9 @@ def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str)
perform_scan_summary_task.si(tenant_id=tenant_id, scan_id=scan_id),
group(
aggregate_daily_severity_task.si(tenant_id=tenant_id, scan_id=scan_id),
aggregate_finding_group_summaries_task.si(
tenant_id=tenant_id, scan_id=scan_id
),
generate_outputs_task.si(
scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
),
@@ -642,6 +647,12 @@ def backfill_daily_severity_summaries_task(tenant_id: str, days: int = None):
return backfill_daily_severity_summaries(tenant_id=tenant_id, days=days)
@shared_task(name="backfill-finding-group-summaries", queue="backfill")
def backfill_finding_group_summaries_task(tenant_id: str, days: int = None):
"""Backfill FindingGroupDailySummary from historical scans. Use days param to limit scope."""
return backfill_finding_group_summaries(tenant_id=tenant_id, days=days)
@shared_task(name="backfill-scan-category-summaries", queue="backfill")
@handle_provider_deletion
def backfill_scan_category_summaries_task(tenant_id: str, scan_id: str):
@@ -741,6 +752,14 @@ def aggregate_daily_severity_task(tenant_id: str, scan_id: str):
return aggregate_daily_severity(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(base=RLSTask, name="scan-finding-group-summaries", queue="overview")
@set_tenant(keep_tenant=True)
@handle_provider_deletion
def aggregate_finding_group_summaries_task(tenant_id: str, scan_id: str):
"""Aggregate findings by check_id into FindingGroupDailySummary for finding-groups endpoint."""
return aggregate_finding_group_summaries(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(base=RLSTask, name="lighthouse-connection-check")
@set_tenant
def check_lighthouse_connection_task(lighthouse_config_id: str, tenant_id: str = None):
+27 -4
View File
@@ -14,11 +14,13 @@ from tasks.jobs.backfill import (
from api.models import (
ComplianceOverviewSummary,
Finding,
ProviderComplianceScore,
ResourceScanSummary,
Scan,
ScanCategorySummary,
ScanGroupSummary,
StateChoices,
StatusChoices,
)
from prowler.lib.check.models import Severity
from prowler.lib.outputs.finding import Status
@@ -364,12 +366,29 @@ class TestBackfillProviderComplianceScores:
def test_no_scans_to_process(self, tenants_fixture, scans_fixture):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
scan.completed_at = None
scan.save()
scan1, scan2, _ = scans_fixture
ProviderComplianceScore.objects.create(
tenant_id=tenant.id,
scan=scan1,
provider=scan1.provider,
compliance_id="aws_cis_1.0",
requirement_id="1.1",
requirement_status=StatusChoices.PASS,
scan_completed_at=scan1.completed_at,
)
ProviderComplianceScore.objects.create(
tenant_id=tenant.id,
scan=scan2,
provider=scan2.provider,
compliance_id="aws_cis_1.0",
requirement_id="1.1",
requirement_status=StatusChoices.PASS,
scan_completed_at=scan2.completed_at,
)
result = backfill_provider_compliance_scores(str(tenant.id))
assert result == {"status": "no completed scans"}
assert result == {"status": "no scans to process"}
@patch("tasks.jobs.backfill.psycopg_connection")
def test_successful_backfill_executes_sql_queries(
@@ -383,10 +402,14 @@ class TestBackfillProviderComplianceScores:
settings.DATABASES.setdefault("admin", settings.DATABASES["default"])
tenant = tenants_fixture[0]
scan = scans_fixture[0]
scan2 = scans_fixture[1]
# Set completed_at to make the scan eligible for backfill
scan.completed_at = datetime.now(timezone.utc)
scan.save()
scan2.state = StateChoices.AVAILABLE
scan2.completed_at = None
scan2.save()
connection = MagicMock()
cursor = MagicMock()
+4
View File
@@ -4093,6 +4093,10 @@ class TestUpdateProviderComplianceScores:
tenant_id = str(tenant.id)
scan_id = str(scan.id)
scan.state = StateChoices.AVAILABLE
scan.completed_at = None
scan.save()
result = update_provider_compliance_scores(tenant_id, scan_id)
assert result["status"] == "skipped"