chore(revison): resolve comments

This commit is contained in:
pedrooot
2026-04-13 15:23:18 +02:00
parent 5503431c7f
commit 38d9fb85ed
3 changed files with 66 additions and 11 deletions

View File

@@ -698,8 +698,8 @@ python assets/query_checks.py aws --id kms_cmk_rotation_enabled # full metadata
**Step 3 — Dump a framework section with current mappings** — see [assets/dump_section.py](assets/dump_section.py):
```bash
python assets/dump_section.py "CCC.Core." # all Core ARs across 3 providers
python assets/dump_section.py "CCC.AuditLog." # all AuditLog ARs
python assets/dump_section.py ccc "CCC.Core." # all Core ARs across 3 providers
python assets/dump_section.py ccc "CCC.AuditLog." # all AuditLog ARs
```
**Step 4 — Encode explicit REPLACE decisions** — see [assets/audit_framework_template.py](assets/audit_framework_template.py). Structure:

View File

@@ -26,9 +26,8 @@ Usage:
python skills/prowler-compliance/assets/build_inventory.py aws
Output:
/tmp/checks_aws.json (~586 checks)
/tmp/checks_azure.json (~167 checks)
/tmp/checks_gcp.json (~102 checks)
/tmp/checks_{provider}.json for every provider discovered under
prowler/providers/ with a services/ directory.
"""
from __future__ import annotations
@@ -36,9 +35,22 @@ import json
import sys
from pathlib import Path
DEFAULT_PROVIDERS = ["aws", "azure", "gcp", "kubernetes", "m365", "github",
"oraclecloud", "alibabacloud", "mongodbatlas", "nhn",
"iac", "llm", "googleworkspace", "cloudflare"]
PROVIDERS_ROOT = Path("prowler/providers")
def discover_providers() -> list[str]:
"""Return every provider that currently has a services/ directory.
Derived from the filesystem so new providers are picked up automatically
and stale hard-coded lists cannot drift from the repo.
"""
if not PROVIDERS_ROOT.exists():
return []
return sorted(
p.name
for p in PROVIDERS_ROOT.iterdir()
if p.is_dir() and (p / "services").is_dir()
)
def build_for_provider(provider: str) -> dict:
@@ -68,7 +80,13 @@ def build_for_provider(provider: str) -> dict:
def main() -> int:
providers = sys.argv[1:] or DEFAULT_PROVIDERS
providers = sys.argv[1:] or discover_providers()
if not providers:
print(
f"error: no providers found under {PROVIDERS_ROOT}/",
file=sys.stderr,
)
return 1
for provider in providers:
inv = build_for_provider(provider)
out_path = Path(f"/tmp/checks_{provider}.json")

View File

@@ -206,6 +206,13 @@ def load_legacy_check_maps(
"""Read the existing Prowler JSON and build lookup tables for check
preservation.
Fails fast on ambiguous preservation keys. If two distinct legacy
requirements share the same primary value or the same fallback tuple,
merging their ``Checks`` silently would corrupt the preserved mapping
for unrelated requirements. Raises ``ValueError`` listing every
conflict so the user can either dedupe the legacy data or strengthen
``check_preservation`` in the sync config.
Returns
-------
by_primary : dict
@@ -223,17 +230,23 @@ def load_legacy_check_maps(
with open(legacy_path) as f:
data = json.load(f)
# Track which legacy requirement Ids contributed to each bucket so we
# can surface ambiguity after the scan completes.
primary_sources: dict[str, list[str]] = {}
fallback_sources: list[dict[tuple, list[str]]] = [{} for _ in fallback_keys]
for req in data.get("Requirements") or []:
legacy_id = req.get("Id") or "<missing-Id>"
checks = req.get("Checks") or []
# Primary index
pv = req.get(primary_key)
if pv:
primary_sources.setdefault(pv, []).append(legacy_id)
bucket = by_primary.setdefault(pv, [])
for c in checks:
if c not in bucket:
bucket.append(c)
# Fallback indexes — read from Attributes[0]
attributes = req.get("Attributes") or []
if not attributes:
continue
@@ -242,11 +255,35 @@ def load_legacy_check_maps(
key = _build_fallback_key(attrs, field_names)
if key is None:
continue
fallback_sources[i].setdefault(key, []).append(legacy_id)
bucket = by_fallback[i].setdefault(key, [])
for c in checks:
if c not in bucket:
bucket.append(c)
conflicts: list[str] = []
for pv, ids in primary_sources.items():
if len(ids) > 1:
conflicts.append(
f"primary_key={primary_key!r} value={pv!r} shared by {ids}"
)
for i, field_names in enumerate(fallback_keys):
for key, ids in fallback_sources[i].items():
if len(ids) > 1:
conflicts.append(
f"fallback_key={field_names} value={key!r} shared by {ids}"
)
if conflicts:
details = "\n - ".join(conflicts)
raise ValueError(
f"ambiguous preservation keys in {legacy_path} — cannot "
f"faithfully preserve Checks across distinct requirements:\n"
f" - {details}\n"
f"Fix: dedupe the legacy JSON, or strengthen "
f"'post_processing.check_preservation' in the sync config "
f"(e.g. add a more discriminating field to fallback_keys)."
)
return by_primary, by_fallback