fix(ocsf): serialization errors non-serializable resource meta (#10129)

This commit is contained in:
Pedro Martín
2026-02-20 14:44:03 +01:00
committed by GitHub
parent 9d4f68fa70
commit 9f6121bc05
3 changed files with 192 additions and 12 deletions

View File

@@ -56,7 +56,15 @@ All notable changes to the **Prowler SDK** are documented in this file.
---
## [5.18.3] (Prowler UNRELEASED)
## [5.18.4] (Prowler v5.18.4)
### 🐞 Fixed
- Handle serialization errors in OCSF output for non-serializable resource metadata [(#10129)](https://github.com/prowler-cloud/prowler/pull/10129)
---
## [5.18.3] (Prowler v5.18.3)
### 🐞 Fixed

View File

@@ -1,3 +1,4 @@
import json
import os
from datetime import datetime
from typing import List
@@ -115,10 +116,10 @@ class OCSF(Output):
# TODO: this should be included only if using the Cloud profile
cloud_partition=finding.partition,
region=finding.region,
data={
"details": finding.resource_details,
"metadata": finding.resource_metadata,
},
data=self._sanitize_resource_data(
finding.resource_details,
finding.resource_metadata,
),
)
]
if finding.metadata.Provider != "kubernetes"
@@ -129,10 +130,10 @@ class OCSF(Output):
uid=finding.resource_uid,
group=Group(name=finding.metadata.ServiceName),
type=finding.metadata.ResourceType,
data={
"details": finding.resource_details,
"metadata": finding.resource_metadata,
},
data=self._sanitize_resource_data(
finding.resource_details,
finding.resource_metadata,
),
namespace=finding.region.replace("namespace: ", ""),
)
]
@@ -200,9 +201,13 @@ class OCSF(Output):
self._file_descriptor.write("[")
for finding in self._data:
try:
self._file_descriptor.write(
finding.model_dump_json(exclude_none=True, indent=4)
)
if hasattr(finding, "model_dump_json"):
json_output = finding.model_dump_json(
exclude_none=True, indent=4
)
else:
json_output = finding.json(exclude_none=True, indent=4)
self._file_descriptor.write(json_output)
self._file_descriptor.write(",")
except Exception as error:
logger.error(
@@ -221,6 +226,40 @@ class OCSF(Output):
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
@staticmethod
def _sanitize_resource_data(resource_details: str, resource_metadata: dict) -> dict:
"""Ensures resource data is JSON-serializable.
The resource_metadata dict may contain non-serializable objects
(e.g., Pydantic models passed as raw dicts with model values)
from service resource conversion. This method converts them to
plain dicts and roundtrips through JSON to guarantee serializability.
"""
def _make_serializable(obj):
if hasattr(obj, "model_dump") and callable(obj.model_dump):
return _make_serializable(obj.model_dump())
if hasattr(obj, "dict") and callable(obj.dict):
return _make_serializable(obj.dict())
if isinstance(obj, dict):
return {str(k): _make_serializable(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [_make_serializable(v) for v in obj]
return obj
try:
converted = _make_serializable(resource_metadata)
sanitized_metadata = json.loads(json.dumps(converted, default=str))
except (TypeError, ValueError) as error:
logger.warning(
f"Failed to serialize resource metadata, defaulting to empty: {error}"
)
sanitized_metadata = {}
return {
"details": resource_details,
"metadata": sanitized_metadata,
}
@staticmethod
def get_account_type_id_by_provider(provider: str) -> TypeID:
"""

View File

@@ -1,6 +1,7 @@
import json
from datetime import datetime, timezone
from io import StringIO
from typing import Optional
import requests
from freezegun import freeze_time
@@ -19,6 +20,7 @@ from py_ocsf_models.objects.organization import Organization
from py_ocsf_models.objects.product import Product
from py_ocsf_models.objects.remediation import Remediation
from py_ocsf_models.objects.resource_details import ResourceDetails
from pydantic.v1 import BaseModel as V1BaseModel
from prowler.config.config import prowler_version
from prowler.lib.outputs.ocsf.ocsf import OCSF
@@ -461,3 +463,134 @@ class TestOCSF:
def test_suppressed_when_muted(self):
muted = True
assert OCSF.get_finding_status_id(muted) == StatusID.Suppressed
def test_sanitize_resource_data_plain_dict(self):
result = OCSF._sanitize_resource_data("details", {"key": "value"})
assert result == {
"details": "details",
"metadata": {"key": "value"},
}
def test_sanitize_resource_data_empty_dict(self):
result = OCSF._sanitize_resource_data("details", {})
assert result == {
"details": "details",
"metadata": {},
}
def test_sanitize_resource_data_with_pydantic_v1_models(self):
"""Reproduces the Trail serialization bug: resource_metadata is a
dict[str, PydanticModel] when checks pass cloudtrail_client.trails."""
class EventSelector(V1BaseModel):
name: str = None
is_all: bool = False
class Trail(V1BaseModel):
name: str = None
region: str = "us-east-1"
is_logging: bool = True
latest_cloudwatch_delivery_time: datetime = None
data_events: list = []
tags: Optional[list] = []
trails = {
"arn:aws:cloudtrail:us-east-1:123456:trail/main": Trail(
name="main",
latest_cloudwatch_delivery_time=datetime(2026, 1, 15, 10, 30),
data_events=[EventSelector(name="s3", is_all=True)],
),
"arn:aws:cloudtrail:eu-west-1:123456:trail/secondary": Trail(
name="secondary",
),
}
result = OCSF._sanitize_resource_data("resource details", trails)
assert result["details"] == "resource details"
metadata = result["metadata"]
# Trail objects are converted to dicts, not strings
main_trail = metadata["arn:aws:cloudtrail:us-east-1:123456:trail/main"]
assert isinstance(main_trail, dict)
assert main_trail["name"] == "main"
assert main_trail["region"] == "us-east-1"
assert main_trail["is_logging"] is True
# datetime converted to string
assert "2026-01-15" in main_trail["latest_cloudwatch_delivery_time"]
# Nested models are also converted
assert main_trail["data_events"] == [{"name": "s3", "is_all": True}]
secondary_trail = metadata[
"arn:aws:cloudtrail:eu-west-1:123456:trail/secondary"
]
assert isinstance(secondary_trail, dict)
assert secondary_trail["name"] == "secondary"
assert secondary_trail["latest_cloudwatch_delivery_time"] is None
# Entire result must be JSON-serializable
json.dumps(result)
def test_sanitize_resource_data_with_nested_non_serializable_types(self):
"""Ensures datetimes and enums nested in dicts are handled."""
resource_metadata = {
"created_at": datetime(2026, 6, 15, 12, 0, 0),
"nested": {
"timestamp": datetime(2026, 1, 1),
"values": [1, "two", datetime(2025, 12, 31)],
},
}
result = OCSF._sanitize_resource_data("details", resource_metadata)
assert "2026-06-15" in result["metadata"]["created_at"]
assert "2026-01-01" in result["metadata"]["nested"]["timestamp"]
assert result["metadata"]["nested"]["values"][0] == 1
assert result["metadata"]["nested"]["values"][1] == "two"
assert "2025-12-31" in result["metadata"]["nested"]["values"][2]
json.dumps(result)
@freeze_time(datetime.now())
def test_batch_write_data_to_file_with_pydantic_model_in_resource_metadata(self):
"""End-to-end test: OCSF output succeeds when resource_metadata
contains Pydantic v1 model objects (the Trail serialization bug)."""
class Trail(V1BaseModel):
name: str = None
region: str = "us-east-1"
is_logging: bool = True
finding = generate_finding_output(
status="FAIL",
severity="low",
muted=False,
region=AWS_REGION_EU_WEST_1,
timestamp=datetime.now(),
resource_details="trail details",
resource_name="main-trail",
resource_uid="arn:aws:cloudtrail:eu-west-1:123456:trail/main",
status_extended="CloudTrail trail is not logging",
)
# Simulate what happens when Check_Report receives
# resource=cloudtrail_client.trails (a dict of Trail models)
finding.resource_metadata = {
"arn:trail/main": Trail(name="main"),
"arn:trail/secondary": Trail(name="secondary", is_logging=False),
}
mock_file = StringIO()
output = OCSF([finding])
output._file_descriptor = mock_file
with patch.object(mock_file, "close", return_value=None):
output.batch_write_data_to_file()
mock_file.seek(0)
content = mock_file.read()
parsed = json.loads(content)
assert len(parsed) == 1
resource_data = parsed[0]["resources"][0]["data"]
assert resource_data["details"] == "trail details"
# Trail models should be serialized as proper dicts
assert resource_data["metadata"]["arn:trail/main"]["name"] == "main"
assert resource_data["metadata"]["arn:trail/secondary"]["is_logging"] is False