feat(ens): support PDF reporting (#9158)

Co-authored-by: alejandrobailo <alejandrobailo94@gmail.com>
This commit is contained in:
Pedro Martín
2025-11-19 18:57:58 +01:00
committed by GitHub
parent 219bc12365
commit 94fe87b4a2
20 changed files with 3221 additions and 745 deletions

View File

@@ -14,6 +14,7 @@ All notable changes to the **Prowler API** are documented in this file.
- Support muting findings based on simple rules with custom reason [(#9051)](https://github.com/prowler-cloud/prowler/pull/9051)
- Support C5 compliance framework for the GCP provider [(#9097)](https://github.com/prowler-cloud/prowler/pull/9097)
- Support for Amazon Bedrock and OpenAI compatible providers in Lighthouse AI [(#8957)](https://github.com/prowler-cloud/prowler/pull/8957)
- Support PDF reporting for ENS compliance framework [(#9158)](https://github.com/prowler-cloud/prowler/pull/9158)
- Tenant-wide ThreatScore overview aggregation and snapshot persistence with backfill support [(#9148)](https://github.com/prowler-cloud/prowler/pull/9148)
- Added `metadata`, `details`, and `partition` attributes to `/resources` endpoint & `details`, and `partition` to `/findings` endpoint [(#9098)](https://github.com/prowler-cloud/prowler/pull/9098)
- Support for MongoDB Atlas provider [(#9167)](https://github.com/prowler-cloud/prowler/pull/9167)

View File

@@ -8919,6 +8919,72 @@ paths:
'404':
description: The scan has no threatscore reports, or the threatscore report
generation task has not started yet
/api/v1/scans/{id}/ens:
get:
operationId: scans_ens_retrieve
description: Download a specific ENS compliance report (e.g., 'prowler_ens_aws')
as a PDF file.
summary: Retrieve ENS compliance report
parameters:
- in: query
name: fields[scans]
schema:
type: array
items:
type: string
enum:
- name
- trigger
- state
- unique_resource_count
- progress
- duration
- provider
- task
- inserted_at
- started_at
- completed_at
- scheduled_at
- next_scan_at
- processor
- url
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
- in: path
name: id
schema:
type: string
format: uuid
description: A UUID string identifying this scan.
required: true
- in: query
name: include
schema:
type: array
items:
type: string
enum:
- provider
description: include query parameter to allow the client to customize which
related resources should be returned.
explode: false
tags:
- Scan
security:
- JWT or API Key: []
responses:
'200':
description: PDF file containing the ENS compliance report
'202':
description: The task is in progress
'401':
description: API key missing or user not Authenticated
'403':
description: There is a problem with credentials
'404':
description: The scan has no ENS reports, or the ENS report generation task
has not started yet
/api/v1/schedules/daily:
post:
operationId: schedules_daily_create

View File

@@ -1661,6 +1661,25 @@ class ProviderViewSet(DisablePaginationMixin, BaseRLSViewSet):
),
},
),
ens=extend_schema(
tags=["Scan"],
summary="Retrieve ENS RD2022 compliance report",
description="Download ENS RD2022 compliance report (e.g., 'ens_rd2022_aws') as a PDF file.",
request=None,
responses={
200: OpenApiResponse(
description="PDF file containing the ENS compliance report"
),
202: OpenApiResponse(description="The task is in progress"),
401: OpenApiResponse(
description="API key missing or user not Authenticated"
),
403: OpenApiResponse(description="There is a problem with credentials"),
404: OpenApiResponse(
description="The scan has no ENS reports, or the ENS report generation task has not started yet"
),
},
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
@@ -1720,6 +1739,9 @@ class ScanViewSet(BaseRLSViewSet):
elif self.action == "threatscore":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
elif self.action == "ens":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
return super().get_serializer_class()
def partial_update(self, request, *args, **kwargs):
@@ -1973,6 +1995,7 @@ class ScanViewSet(BaseRLSViewSet):
if running_resp:
return running_resp
# TODO: add detailed response if the compliance framework is not supported for the provider
if not scan.output_location:
return Response(
{
@@ -2001,6 +2024,46 @@ class ScanViewSet(BaseRLSViewSet):
content, filename = loader
return self._serve_file(content, filename, "application/pdf")
@action(
detail=True,
methods=["get"],
url_name="ens",
)
def ens(self, request, pk=None):
scan = self.get_object()
running_resp = self._get_task_status(scan)
if running_resp:
return running_resp
# TODO: add detailed response if the compliance framework is not supported for the provider
if not scan.output_location:
return Response(
{
"detail": "The scan has no reports, or the ENS report generation task has not started yet."
},
status=status.HTTP_404_NOT_FOUND,
)
if scan.output_location.startswith("s3://"):
bucket = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
key_prefix = scan.output_location.removeprefix(f"s3://{bucket}/")
prefix = os.path.join(
os.path.dirname(key_prefix),
"ens",
"*_ens_report.pdf",
)
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
else:
base = os.path.dirname(scan.output_location)
pattern = os.path.join(base, "ens", "*_ens_report.pdf")
loader = self._load_file(pattern, s3=False)
if isinstance(loader, Response):
return loader
content, filename = loader
return self._serve_file(content, filename, "application/pdf")
def create(self, request, *args, **kwargs):
input_serializer = self.get_serializer(data=request.data)
input_serializer.is_valid(raise_exception=True)

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

View File

@@ -241,36 +241,33 @@ def _upload_to_s3(
logger.error(f"S3 upload failed: {str(e)}")
def _generate_output_directory(
output_directory, prowler_provider: object, tenant_id: str, scan_id: str
) -> tuple[str, str, str]:
def _build_output_path(
output_directory: str,
prowler_provider: str,
tenant_id: str,
scan_id: str,
subdirectory: str = None,
) -> str:
"""
Generate a file system path for the output directory of a prowler scan.
This function constructs the output directory path by combining a base
temporary output directory, the tenant ID, the scan ID, and details about
the prowler provider along with a timestamp. The resulting path is used to
store the output files of a prowler scan.
Note:
This function depends on one external variable:
- `output_file_timestamp`: A timestamp (as a string) used to uniquely identify the output.
Build a file system path for the output directory of a prowler scan.
Args:
output_directory (str): The base output directory.
prowler_provider (object): An identifier or descriptor for the prowler provider.
Typically, this is a string indicating the provider (e.g., "aws").
prowler_provider (str): An identifier or descriptor for the prowler provider.
Typically, this is a string indicating the provider (e.g., "aws").
tenant_id (str): The unique identifier for the tenant.
scan_id (str): The unique identifier for the scan.
subdirectory (str, optional): Optional subdirectory to include in the path
(e.g., "compliance", "threatscore", "ens").
Returns:
str: The constructed file system path for the prowler scan output directory.
str: The constructed path with directory created.
Example:
>>> _generate_output_directory("/tmp", "aws", "tenant-1234", "scan-5678")
'/tmp/tenant-1234/aws/scan-5678/prowler-output-2023-02-15T12:34:56',
'/tmp/tenant-1234/aws/scan-5678/compliance/prowler-output-2023-02-15T12:34:56'
'/tmp/tenant-1234/aws/scan-5678/threatscore/prowler-output-2023-02-15T12:34:56'
>>> _build_output_path("/tmp", "aws", "tenant-1234", "scan-5678")
'/tmp/tenant-1234/scan-5678/prowler-output-aws-20230215123456'
>>> _build_output_path("/tmp", "aws", "tenant-1234", "scan-5678", "threatscore")
'/tmp/tenant-1234/scan-5678/threatscore/prowler-output-aws-20230215123456'
"""
# Sanitize the prowler provider name to ensure it is a valid directory name
prowler_provider_sanitized = re.sub(r"[^\w\-]", "-", prowler_provider)
@@ -279,22 +276,102 @@ def _generate_output_directory(
started_at = Scan.objects.get(id=scan_id).started_at
timestamp = started_at.strftime("%Y%m%d%H%M%S")
path = (
f"{output_directory}/{tenant_id}/{scan_id}/prowler-output-"
f"{prowler_provider_sanitized}-{timestamp}"
)
if subdirectory:
path = (
f"{output_directory}/{tenant_id}/{scan_id}/{subdirectory}/prowler-output-"
f"{prowler_provider_sanitized}-{timestamp}"
)
else:
path = (
f"{output_directory}/{tenant_id}/{scan_id}/prowler-output-"
f"{prowler_provider_sanitized}-{timestamp}"
)
# Create directory for the path if it doesn't exist
os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
compliance_path = (
f"{output_directory}/{tenant_id}/{scan_id}/compliance/prowler-output-"
f"{prowler_provider_sanitized}-{timestamp}"
)
os.makedirs("/".join(compliance_path.split("/")[:-1]), exist_ok=True)
return path
threatscore_path = (
f"{output_directory}/{tenant_id}/{scan_id}/threatscore/prowler-output-"
f"{prowler_provider_sanitized}-{timestamp}"
)
os.makedirs("/".join(threatscore_path.split("/")[:-1]), exist_ok=True)
return path, compliance_path, threatscore_path
def _generate_compliance_output_directory(
output_directory: str,
prowler_provider: str,
tenant_id: str,
scan_id: str,
compliance_framework: str,
) -> str:
"""
Generate a file system path for a compliance framework output directory.
This function constructs the output directory path specifically for a compliance
framework (e.g., "threatscore", "ens") by combining a base temporary output directory,
the tenant ID, the scan ID, the compliance framework name, and details about the
prowler provider along with a timestamp.
Args:
output_directory (str): The base output directory.
prowler_provider (str): An identifier or descriptor for the prowler provider.
Typically, this is a string indicating the provider (e.g., "aws").
tenant_id (str): The unique identifier for the tenant.
scan_id (str): The unique identifier for the scan.
compliance_framework (str): The compliance framework name (e.g., "threatscore", "ens").
Returns:
str: The path for the compliance framework output directory.
Example:
>>> _generate_compliance_output_directory("/tmp", "aws", "tenant-1234", "scan-5678", "threatscore")
'/tmp/tenant-1234/scan-5678/threatscore/prowler-output-aws-20230215123456'
>>> _generate_compliance_output_directory("/tmp", "aws", "tenant-1234", "scan-5678", "ens")
'/tmp/tenant-1234/scan-5678/ens/prowler-output-aws-20230215123456'
"""
return _build_output_path(
output_directory,
prowler_provider,
tenant_id,
scan_id,
subdirectory=compliance_framework,
)
def _generate_output_directory(
output_directory: str,
prowler_provider: str,
tenant_id: str,
scan_id: str,
) -> tuple[str, str]:
"""
Generate file system paths for the standard and compliance output directories of a prowler scan.
This function constructs both the standard output directory path and the compliance
output directory path by combining a base temporary output directory, the tenant ID,
the scan ID, and details about the prowler provider along with a timestamp.
Args:
output_directory (str): The base output directory.
prowler_provider (str): An identifier or descriptor for the prowler provider.
Typically, this is a string indicating the provider (e.g., "aws").
tenant_id (str): The unique identifier for the tenant.
scan_id (str): The unique identifier for the scan.
Returns:
tuple[str, str]: A tuple containing (standard_path, compliance_path).
Example:
>>> _generate_output_directory("/tmp", "aws", "tenant-1234", "scan-5678")
('/tmp/tenant-1234/scan-5678/prowler-output-aws-20230215123456',
'/tmp/tenant-1234/scan-5678/compliance/prowler-output-aws-20230215123456')
"""
standard_path = _build_output_path(
output_directory, prowler_provider, tenant_id, scan_id
)
compliance_path = _build_output_path(
output_directory,
prowler_provider,
tenant_id,
scan_id,
subdirectory="compliance",
)
return standard_path, compliance_path

File diff suppressed because it is too large Load Diff

View File

@@ -35,7 +35,7 @@ from tasks.jobs.lighthouse_providers import (
refresh_lighthouse_provider_models,
)
from tasks.jobs.muting import mute_historical_findings
from tasks.jobs.report import generate_threatscore_report_job
from tasks.jobs.report import generate_compliance_reports_job
from tasks.jobs.scan import (
aggregate_findings,
create_compliance_requirements,
@@ -75,7 +75,8 @@ def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str)
scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
),
group(
generate_threatscore_report_task.si(
# Use optimized task that generates both reports with shared queries
generate_compliance_reports_task.si(
tenant_id=tenant_id, scan_id=scan_id, provider_id=provider_id
),
check_integrations_task.si(
@@ -319,7 +320,7 @@ def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
frameworks_bulk = Compliance.get_bulk(provider_type)
frameworks_avail = get_compliance_frameworks(provider_type)
out_dir, comp_dir, _ = _generate_output_directory(
out_dir, comp_dir = _generate_output_directory(
DJANGO_TMP_OUTPUT_DIRECTORY, provider_uid, tenant_id, scan_id
)
@@ -686,19 +687,33 @@ def jira_integration_task(
@shared_task(
base=RLSTask,
name="scan-threatscore-report",
name="scan-compliance-reports",
queue="scan-reports",
)
def generate_threatscore_report_task(tenant_id: str, scan_id: str, provider_id: str):
def generate_compliance_reports_task(tenant_id: str, scan_id: str, provider_id: str):
"""
Task to generate a threatscore report for a given scan.
Optimized task to generate both ThreatScore and ENS reports with shared queries.
This task is more efficient than running generate_threatscore_report_task and
generate_ens_report_task separately because it reuses database queries:
- Provider object fetched once (instead of twice)
- Requirement statistics aggregated once (instead of twice)
- Can reduce database load by up to 50%
Args:
tenant_id (str): The tenant identifier.
scan_id (str): The scan identifier.
provider_id (str): The provider identifier.
Returns:
dict: Results for both reports containing upload status and paths.
"""
return generate_threatscore_report_job(
tenant_id=tenant_id, scan_id=scan_id, provider_id=provider_id
return generate_compliance_reports_job(
tenant_id=tenant_id,
scan_id=scan_id,
provider_id=provider_id,
generate_threatscore=True,
generate_ens=True,
)

View File

@@ -9,6 +9,7 @@ import pytest
from botocore.exceptions import ClientError
from tasks.jobs.export import (
_compress_output_files,
_generate_compliance_output_directory,
_generate_output_directory,
_upload_to_s3,
get_s3_client,
@@ -168,17 +169,34 @@ class TestOutputs:
provider = "aws"
expected_timestamp = "20230615103045"
path, compliance, threatscore = _generate_output_directory(
# Test _generate_output_directory (returns standard and compliance paths)
path, compliance = _generate_output_directory(
base_dir, provider, tenant_id, scan_id
)
assert os.path.isdir(os.path.dirname(path))
assert os.path.isdir(os.path.dirname(compliance))
assert os.path.isdir(os.path.dirname(threatscore))
assert path.endswith(f"{provider}-{expected_timestamp}")
assert compliance.endswith(f"{provider}-{expected_timestamp}")
assert "/compliance/" in compliance
# Test _generate_compliance_output_directory with "threatscore"
threatscore = _generate_compliance_output_directory(
base_dir, provider, tenant_id, scan_id, compliance_framework="threatscore"
)
assert os.path.isdir(os.path.dirname(threatscore))
assert threatscore.endswith(f"{provider}-{expected_timestamp}")
assert "/threatscore/" in threatscore
# Test _generate_compliance_output_directory with "ens"
ens = _generate_compliance_output_directory(
base_dir, provider, tenant_id, scan_id, compliance_framework="ens"
)
assert os.path.isdir(os.path.dirname(ens))
assert ens.endswith(f"{provider}-{expected_timestamp}")
assert "/ens/" in ens
@patch("tasks.jobs.export.rls_transaction")
@patch("tasks.jobs.export.Scan")
@@ -201,14 +219,25 @@ class TestOutputs:
provider = "aws/test@check"
expected_timestamp = "20230615103045"
path, compliance, threatscore = _generate_output_directory(
# Test provider name sanitization with _generate_output_directory
path, compliance = _generate_output_directory(
base_dir, provider, tenant_id, scan_id
)
assert os.path.isdir(os.path.dirname(path))
assert os.path.isdir(os.path.dirname(compliance))
assert os.path.isdir(os.path.dirname(threatscore))
assert path.endswith(f"aws-test-check-{expected_timestamp}")
assert compliance.endswith(f"aws-test-check-{expected_timestamp}")
# Test provider name sanitization with _generate_compliance_output_directory
threatscore = _generate_compliance_output_directory(
base_dir, provider, tenant_id, scan_id, compliance_framework="threatscore"
)
ens = _generate_compliance_output_directory(
base_dir, provider, tenant_id, scan_id, compliance_framework="ens"
)
assert os.path.isdir(os.path.dirname(threatscore))
assert os.path.isdir(os.path.dirname(ens))
assert threatscore.endswith(f"aws-test-check-{expected_timestamp}")
assert ens.endswith(f"aws-test-check-{expected_timestamp}")

File diff suppressed because it is too large Load Diff

View File

@@ -109,7 +109,6 @@ class TestGenerateOutputs:
return_value=(
"/tmp/test/out-dir",
"/tmp/test/comp-dir",
"/tmp/test/threat-dir",
),
),
patch("tasks.tasks.Scan.all_objects.filter") as mock_scan_update,
@@ -139,7 +138,7 @@ class TestGenerateOutputs:
patch("tasks.tasks.Finding.all_objects.filter") as mock_findings,
patch(
"tasks.tasks._generate_output_directory",
return_value=("/tmp/test/out", "/tmp/test/comp", "/tmp/test/threat"),
return_value=("/tmp/test/out", "/tmp/test/comp"),
),
patch("tasks.tasks.FindingOutput._transform_findings_stats"),
patch("tasks.tasks.FindingOutput.transform_api_finding"),
@@ -209,7 +208,7 @@ class TestGenerateOutputs:
patch("tasks.tasks.Finding.all_objects.filter") as mock_findings,
patch(
"tasks.tasks._generate_output_directory",
return_value=("/tmp/test/out", "/tmp/test/comp", "/tmp/test/threat"),
return_value=("/tmp/test/out", "/tmp/test/comp"),
),
patch(
"tasks.tasks.FindingOutput._transform_findings_stats",
@@ -289,7 +288,6 @@ class TestGenerateOutputs:
return_value=(
"/tmp/test/outdir",
"/tmp/test/compdir",
"/tmp/test/threatdir",
),
),
patch("tasks.tasks._compress_output_files", return_value="outdir.zip"),
@@ -368,7 +366,6 @@ class TestGenerateOutputs:
return_value=(
"/tmp/test/outdir",
"/tmp/test/compdir",
"/tmp/test/threatdir",
),
),
patch("tasks.tasks.FindingOutput._transform_findings_stats"),
@@ -436,7 +433,7 @@ class TestGenerateOutputs:
patch("tasks.tasks.Finding.all_objects.filter") as mock_findings,
patch(
"tasks.tasks._generate_output_directory",
return_value=("/tmp/test/out", "/tmp/test/comp", "/tmp/test/threat"),
return_value=("/tmp/test/out", "/tmp/test/comp"),
),
patch(
"tasks.tasks.FindingOutput._transform_findings_stats",
@@ -494,7 +491,7 @@ class TestGenerateOutputs:
patch("tasks.tasks.Finding.all_objects.filter") as mock_findings,
patch(
"tasks.tasks._generate_output_directory",
return_value=("/tmp/test/out", "/tmp/test/comp", "/tmp/test/threat"),
return_value=("/tmp/test/out", "/tmp/test/comp"),
),
patch("tasks.tasks.FindingOutput._transform_findings_stats"),
patch("tasks.tasks.FindingOutput.transform_api_finding"),
@@ -535,34 +532,45 @@ class TestScanCompleteTasks:
@patch("tasks.tasks.create_compliance_requirements_task.apply_async")
@patch("tasks.tasks.perform_scan_summary_task.si")
@patch("tasks.tasks.generate_outputs_task.si")
@patch("tasks.tasks.generate_threatscore_report_task.si")
@patch("tasks.tasks.generate_compliance_reports_task.si")
@patch("tasks.tasks.check_integrations_task.si")
def test_scan_complete_tasks(
self,
mock_check_integrations_task,
mock_threatscore_task,
mock_compliance_reports_task,
mock_outputs_task,
mock_scan_summary_task,
mock_compliance_tasks,
mock_compliance_requirements_task,
):
"""Test that scan complete tasks are properly orchestrated with optimized reports."""
_perform_scan_complete_tasks("tenant-id", "scan-id", "provider-id")
mock_compliance_tasks.assert_called_once_with(
# Verify compliance requirements task is called
mock_compliance_requirements_task.assert_called_once_with(
kwargs={"tenant_id": "tenant-id", "scan_id": "scan-id"},
)
# Verify scan summary task is called
mock_scan_summary_task.assert_called_once_with(
scan_id="scan-id",
tenant_id="tenant-id",
)
# Verify outputs task is called
mock_outputs_task.assert_called_once_with(
scan_id="scan-id",
provider_id="provider-id",
tenant_id="tenant-id",
)
mock_threatscore_task.assert_called_once_with(
# Verify optimized compliance reports task is called (replaces individual tasks)
mock_compliance_reports_task.assert_called_once_with(
tenant_id="tenant-id",
scan_id="scan-id",
provider_id="provider-id",
)
# Verify integrations task is called
mock_check_integrations_task.assert_called_once_with(
tenant_id="tenant-id",
provider_id="provider-id",
@@ -738,7 +746,7 @@ class TestCheckIntegrationsTask:
mock_initialize_provider.return_value = MagicMock()
mock_compliance_bulk.return_value = {}
mock_get_frameworks.return_value = []
mock_generate_dir.return_value = ("out-dir", "comp-dir", "threat-dir")
mock_generate_dir.return_value = ("out-dir", "comp-dir")
mock_transform_stats.return_value = {"stats": "data"}
# Mock findings
@@ -863,7 +871,7 @@ class TestCheckIntegrationsTask:
mock_initialize_provider.return_value = MagicMock()
mock_compliance_bulk.return_value = {}
mock_get_frameworks.return_value = []
mock_generate_dir.return_value = ("out-dir", "comp-dir", "threat-dir")
mock_generate_dir.return_value = ("out-dir", "comp-dir")
mock_transform_stats.return_value = {"stats": "data"}
# Mock findings
@@ -979,7 +987,7 @@ class TestCheckIntegrationsTask:
mock_initialize_provider.return_value = MagicMock()
mock_compliance_bulk.return_value = {}
mock_get_frameworks.return_value = []
mock_generate_dir.return_value = ("out-dir", "comp-dir", "threat-dir")
mock_generate_dir.return_value = ("out-dir", "comp-dir")
mock_transform_stats.return_value = {"stats": "data"}
# Mock findings