feat(sdk): add --export-ocsf flag for OCSF ingestion to Prowler Cloud (#10095)

This commit is contained in:
Adrián Peña
2026-02-24 17:47:35 +01:00
committed by GitHub
parent 247bde1ef4
commit 2a4ee830cc
9 changed files with 203 additions and 11 deletions

27
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.3.0 and should not be changed by hand.
[[package]]
name = "about-time"
@@ -1908,6 +1908,7 @@ files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
markers = {dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}
[[package]]
name = "contextlib2"
@@ -3155,7 +3156,7 @@ files = [
[package.dependencies]
attrs = ">=22.2.0"
jsonschema-specifications = ">=2023.03.6"
jsonschema-specifications = ">=2023.3.6"
referencing = ">=0.28.4"
rpds-py = ">=0.7.1"
@@ -3264,7 +3265,7 @@ files = [
]
[package.dependencies]
certifi = ">=14.05.14"
certifi = ">=14.5.14"
durationpy = ">=0.7"
google-auth = ">=1.0.1"
oauthlib = ">=3.2.2"
@@ -5117,7 +5118,7 @@ files = [
]
[package.dependencies]
astroid = ">=3.3.8,<=3.4.0-dev0"
astroid = ">=3.3.8,<=3.4.0.dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
{version = ">=0.2", markers = "python_version < \"3.11\""},
@@ -5964,10 +5965,10 @@ files = [
]
[package.dependencies]
botocore = ">=1.37.4,<2.0a.0"
botocore = ">=1.37.4,<2.0a0"
[package.extras]
crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
crt = ["botocore[crt] (>=1.37.4,<2.0a0)"]
[[package]]
name = "safety"
@@ -6430,6 +6431,18 @@ h2 = ["h2 (>=4,<5)"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""]
[[package]]
name = "uuid6"
version = "2024.7.10"
description = "New time-based UUID formats which are suited for use as a database key"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "uuid6-2024.7.10-py3-none-any.whl", hash = "sha256:93432c00ba403751f722829ad21759ff9db051dea140bf81493271e8e4dd18b7"},
{file = "uuid6-2024.7.10.tar.gz", hash = "sha256:2d29d7f63f593caaeea0e0d0dd0ad8129c9c663b29e19bdf882e864bedf18fb0"},
]
[[package]]
name = "virtualenv"
version = "20.32.0"
@@ -6893,4 +6906,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">3.9.1,<3.13"
content-hash = "509440ff7a10d735686d330ac032f824fc92cf2dbacc66371e688ae1dd25dc2f"
content-hash = "386f6cf2bed49290cc4661aa2093ceb018aa6cdaf6864bdfab36f6c2c50e241e"

View File

@@ -22,6 +22,8 @@ All notable changes to the **Prowler SDK** are documented in this file.
- OpenStack compute 7 new checks [(#9944)](https://github.com/prowler-cloud/prowler/pull/9944)
- CSA CCM 4.0 for the Alibaba Cloud provider [(#10061)](https://github.com/prowler-cloud/prowler/pull/10061)
- ECS Exec (ECS-006) privilege escalation detection via `ecs:ExecuteCommand` + `ecs:DescribeTasks` [(#10066)](https://github.com/prowler-cloud/prowler/pull/10066)
- `--export-ocsf` CLI flag to upload OCSF scan results to Prowler Cloud [(#10095)](https://github.com/prowler-cloud/prowler/pull/10095)
- `scan_id` field in OCSF `unmapped` output for ingestion correlation [(#10095)](https://github.com/prowler-cloud/prowler/pull/10095)
- `defenderxdr_endpoint_privileged_user_exposed_credentials` check for M365 provider [(#10084)](https://github.com/prowler-cloud/prowler/pull/10084)
- `defenderxdr_critical_asset_management_pending_approvals` check for M365 provider [(#10085)](https://github.com/prowler-cloud/prowler/pull/10085)
- `entra_seamless_sso_disabled` check for m365 provider [(#10086)](https://github.com/prowler-cloud/prowler/pull/10086)

View File

@@ -2,13 +2,16 @@
# -*- coding: utf-8 -*-
import sys
import tempfile
from os import environ
import requests
from colorama import Fore, Style
from colorama import init as colorama_init
from prowler.config.config import (
EXTERNAL_TOOL_PROVIDERS,
cloud_api_base_url,
csv_file_suffix,
get_available_compliance_frameworks,
html_file_suffix,
@@ -110,6 +113,7 @@ from prowler.lib.outputs.compliance.prowler_threatscore.prowler_threatscore_m365
from prowler.lib.outputs.csv.csv import CSV
from prowler.lib.outputs.finding import Finding
from prowler.lib.outputs.html.html import HTML
from prowler.lib.outputs.ocsf.ingestion import send_ocsf_to_api
from prowler.lib.outputs.ocsf.ocsf import OCSF
from prowler.lib.outputs.outputs import extract_findings_statistics, report
from prowler.lib.outputs.slack.slack import Slack
@@ -477,6 +481,7 @@ def prowler():
sys.exit(1)
generated_outputs = {"regular": [], "compliance": []}
ocsf_output = None
if args.output_formats:
for mode in args.output_formats:
@@ -507,6 +512,7 @@ def prowler():
file_path=f"{filename}{json_ocsf_file_suffix}",
)
generated_outputs["regular"].append(json_output)
ocsf_output = json_output
json_output.batch_write_data_to_file()
if mode == "html":
html_output = HTML(
@@ -518,6 +524,57 @@ def prowler():
provider=global_provider, stats=stats
)
if getattr(args, "export_ocsf", False):
if not ocsf_output or not getattr(ocsf_output, "file_path", None):
tmp_ocsf = tempfile.NamedTemporaryFile(
suffix=json_ocsf_file_suffix, delete=False
)
ocsf_output = OCSF(
findings=finding_outputs,
file_path=tmp_ocsf.name,
)
tmp_ocsf.close()
ocsf_output.batch_write_data_to_file()
print(
f"{Style.BRIGHT}\nExporting OCSF to Prowler Cloud, please wait...{Style.RESET_ALL}"
)
try:
response = send_ocsf_to_api(ocsf_output.file_path)
except ValueError:
logger.warning(
"OCSF export skipped: no API key configured. "
"Set the PROWLER_API_KEY environment variable to enable it. "
f"Scan results were saved to {ocsf_output.file_path}"
)
except requests.ConnectionError:
logger.warning(
"OCSF export skipped: could not reach the Prowler Cloud API at "
f"{cloud_api_base_url}. Check the URL and your network connection. "
f"Scan results were saved to {ocsf_output.file_path}"
)
except requests.HTTPError as http_err:
logger.warning(
f"OCSF export failed: the API returned HTTP {http_err.response.status_code}. "
"Verify your API key is valid and has the right permissions. "
f"Scan results were saved to {ocsf_output.file_path}"
)
except Exception as error:
logger.warning(
f"OCSF export failed unexpectedly: {error}. "
f"Scan results were saved to {ocsf_output.file_path}"
)
else:
job_id = response.get("data", {}).get("id") if response else None
if job_id:
print(
f"{Style.BRIGHT}{Fore.GREEN}\nOCSF export accepted. Ingestion job: {job_id}{Style.RESET_ALL}"
)
else:
logger.warning(
"OCSF export: unexpected API response (missing ingestion job ID). "
f"Scan results were saved to {ocsf_output.file_path}"
)
# Compliance Frameworks
input_compliance_frameworks = set(output_options.output_modes).intersection(
get_available_compliance_frameworks(provider)

View File

@@ -120,6 +120,11 @@ default_redteam_config_file_path = (
encoding_format_utf_8 = "utf-8"
available_output_formats = ["csv", "json-asff", "json-ocsf", "html"]
# Prowler Cloud API settings
cloud_api_base_url = os.getenv("PROWLER_CLOUD_API_BASE", "https://api.prowler.com")
cloud_api_key = os.getenv("PROWLER_API_KEY", "")
cloud_api_ingestion_path = "/api/v1/ingestions"
def set_output_timestamp(
new_timestamp: datetime,

View File

@@ -215,6 +215,14 @@ Detailed documentation at https://docs.prowler.com
default=False,
help="Set the output timestamp format as unix timestamps instead of iso format timestamps (default mode).",
)
common_outputs_parser.add_argument(
"--export-ocsf",
action="store_true",
help=(
"Send OCSF output to Prowler Cloud ingestion endpoint. "
"Requires PROWLER_API_KEY environment variable."
),
)
def __init_logging_parser__(self):
# Logging Options

View File

@@ -0,0 +1,65 @@
import os
from typing import Any, Dict, Optional
import requests
from prowler.config.config import (
cloud_api_base_url,
cloud_api_ingestion_path,
cloud_api_key,
)
def send_ocsf_to_api(
file_path: str,
*,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
timeout: int = 60,
) -> Dict[str, Any]:
"""Send OCSF file to the Prowler Cloud ingestion endpoint.
Args:
file_path: Path to the OCSF JSON file to upload.
base_url: API base URL. Falls back to PROWLER_CLOUD_API_BASE env var,
then to https://api.prowler.com.
api_key: API key. Falls back to PROWLER_API_KEY env var.
timeout: Request timeout in seconds.
Returns:
Parsed JSON:API response dict.
Raises:
FileNotFoundError: If the OCSF file does not exist.
ValueError: If no API key is available.
requests.HTTPError: If the API returns an error status.
"""
if not file_path:
raise ValueError("No OCSF file path provided.")
if not os.path.isfile(file_path):
raise FileNotFoundError(f"OCSF file not found: {file_path}")
api_key = api_key or cloud_api_key
if not api_key:
raise ValueError("Missing API key. Set PROWLER_API_KEY environment variable.")
base_url = base_url or cloud_api_base_url
base_url = base_url.rstrip("/")
if not base_url.lower().startswith(("http://", "https://")):
base_url = f"https://{base_url}"
url = f"{base_url}{cloud_api_ingestion_path}"
with open(file_path, "rb") as fh:
response = requests.post(
url,
headers={
"Authorization": f"Api-Key {api_key}",
"Accept": "application/vnd.api+json",
},
files={"file": (os.path.basename(file_path), fh, "application/json")},
timeout=timeout,
)
response.raise_for_status()
return response.json() if response.text else {}

View File

@@ -1,6 +1,7 @@
import json
import os
from datetime import datetime
from datetime import datetime, timezone
from random import getrandbits
from typing import List
from py_ocsf_models.events.base_event import SeverityID, StatusID
@@ -17,6 +18,7 @@ from py_ocsf_models.objects.organization import Organization
from py_ocsf_models.objects.product import Product
from py_ocsf_models.objects.remediation import Remediation
from py_ocsf_models.objects.resource_details import ResourceDetails
from uuid6 import UUID
from prowler.lib.logger import logger
from prowler.lib.outputs.finding import Finding
@@ -52,6 +54,10 @@ class OCSF(Output):
findings (List[Finding]): a list of Finding objects
"""
try:
if not findings:
return
scan_id = _uuid7_from_timestamp(findings[0].timestamp)
for finding in findings:
finding_activity = ActivityID.Create
cloud_account_type = self.get_account_type_id_by_provider(
@@ -163,6 +169,7 @@ class OCSF(Output):
"additional_urls": finding.metadata.AdditionalURLs,
"notes": finding.metadata.Notes,
"compliance": finding.compliance,
"scan_id": str(scan_id),
},
)
if finding.provider != "kubernetes":
@@ -295,3 +302,26 @@ class OCSF(Output):
if muted:
status_id = StatusID.Suppressed
return status_id
# NOTE: Copied from api/src/backend/api/uuid_utils.py (datetime_to_uuid7)
# Adapted to accept datetime/epoch inputs.
def _uuid7_from_timestamp(value) -> UUID:
if isinstance(value, datetime):
dt = value
else:
dt = datetime.fromtimestamp(int(value), tz=timezone.utc)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
timestamp_ms = int(dt.timestamp() * 1000) & 0xFFFFFFFFFFFF
rand_seq = getrandbits(12)
rand_node = getrandbits(62)
uuid_int = timestamp_ms << 80
uuid_int |= 0x7 << 76
uuid_int |= rand_seq << 64
uuid_int |= 0x2 << 62
uuid_int |= rand_node
return UUID(int=uuid_int)

View File

@@ -70,6 +70,7 @@ dependencies = [
"slack-sdk==3.39.0",
"tabulate==0.9.0",
"tzlocal==5.3.1",
"uuid6==2024.7.10",
"py-iam-expand==0.1.0",
"h2==4.3.0",
"oci==2.160.3",

View File

@@ -2,6 +2,7 @@ import json
from datetime import datetime, timezone
from io import StringIO
from typing import Optional
from uuid import UUID
import requests
from freezegun import freeze_time
@@ -101,7 +102,10 @@ class TestOCSF:
output_data.type_name
== f"Detection Finding: {DetectionFindingTypeID.Create.name}"
)
assert output_data.unmapped == {
unmapped = output_data.unmapped
scan_id = unmapped.pop("scan_id")
assert UUID(scan_id) # Valid UUID
assert unmapped == {
"related_url": findings[0].metadata.RelatedUrl,
"categories": findings[0].metadata.Categories,
"depends_on": findings[0].metadata.DependsOn,
@@ -260,7 +264,11 @@ class TestOCSF:
mock_file.seek(0)
content = mock_file.read()
assert json.loads(content) == expected_json_output
actual_output = json.loads(content)
# scan_id is non-deterministic (UUID7), validate and remove before comparison
actual_scan_id = actual_output[0]["unmapped"].pop("scan_id")
assert UUID(actual_scan_id)
assert actual_output == expected_json_output
def test_batch_write_data_to_file_without_findings(self):
assert not OCSF([])._file_descriptor
@@ -318,7 +326,10 @@ class TestOCSF:
assert finding_ocsf.risk_details == finding_output.metadata.Risk
# Unmapped Data
assert finding_ocsf.unmapped == {
unmapped = finding_ocsf.unmapped
scan_id = unmapped.pop("scan_id")
assert UUID(scan_id) # Valid UUID
assert unmapped == {
"related_url": finding_output.metadata.RelatedUrl,
"categories": finding_output.metadata.Categories,
"depends_on": finding_output.metadata.DependsOn,