Compare commits

..

4 Commits

Author SHA1 Message Date
Alejandro Bailo 5e2962f753 Merge branch 'master' into feat/finding-groups-mcp 2026-05-12 17:52:03 +02:00
alejandrobailo ee71d26c8b docs(ui): add finding groups lighthouse changelog 2026-05-12 17:51:09 +02:00
alejandrobailo 0591ea0c8d docs(mcp): add finding groups changelog entry 2026-05-12 17:33:11 +02:00
alejandrobailo 481b44e606 feat(mcp): add finding groups tools 2026-05-12 17:31:29 +02:00
59 changed files with 1128 additions and 4225 deletions
+3 -11
View File
@@ -15,7 +15,7 @@ Use these skills for detailed patterns on-demand:
|-------|-------------|-----|
| `typescript` | Const types, flat interfaces, utility types | [SKILL.md](skills/typescript/SKILL.md) |
| `react-19` | No useMemo/useCallback, React Compiler | [SKILL.md](skills/react-19/SKILL.md) |
| `nextjs-16` | App Router, Server Actions, proxy.ts, streaming | [SKILL.md](skills/nextjs-16/SKILL.md) |
| `nextjs-15` | App Router, Server Actions, streaming | [SKILL.md](skills/nextjs-15/SKILL.md) |
| `tailwind-4` | cn() utility, no var() in className | [SKILL.md](skills/tailwind-4/SKILL.md) |
| `playwright` | Page Object Model, MCP workflow, selectors | [SKILL.md](skills/playwright/SKILL.md) |
| `pytest` | Fixtures, mocking, markers, parametrize | [SKILL.md](skills/pytest/SKILL.md) |
@@ -60,14 +60,11 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
|--------|-------|
| Add changelog entry for a PR or feature | `prowler-changelog` |
| Adding DRF pagination or permissions | `django-drf` |
| Adding a compliance output formatter (per-provider class + table dispatcher) | `prowler-compliance` |
| Adding indexes or constraints to database tables | `django-migration-psql` |
| Adding new providers | `prowler-provider` |
| Adding privilege escalation detection queries | `prowler-attack-paths-query` |
| Adding services to existing providers | `prowler-provider` |
| After creating/modifying a skill | `skill-sync` |
| App Router / Server Actions | `nextjs-16` |
| Auditing check-to-requirement mappings as a cloud auditor | `prowler-compliance` |
| App Router / Server Actions | `nextjs-15` |
| Building AI chat features | `ai-sdk-5` |
| Committing changes | `prowler-commit` |
| Configuring MCP servers in agentic workflows | `gh-aw` |
@@ -81,7 +78,6 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Creating a git commit | `prowler-commit` |
| Creating new checks | `prowler-sdk-check` |
| Creating new skills | `skill-creator` |
| Creating or reviewing Django migrations | `django-migration-psql` |
| Creating/modifying Prowler UI components | `prowler-ui` |
| Creating/modifying models, views, serializers | `prowler-api` |
| Creating/updating compliance frameworks | `prowler-compliance` |
@@ -89,7 +85,6 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Debugging gh-aw compilation errors | `gh-aw` |
| Fill .github/pull_request_template.md (Context/Description/Steps to review/Checklist) | `prowler-pr` |
| Fixing bug | `tdd` |
| Fixing compliance JSON bugs (duplicate IDs, empty Section, stale refs) | `prowler-compliance` |
| General Prowler development questions | `prowler` |
| Implementing JSON:API endpoints | `django-drf` |
| Implementing feature | `tdd` |
@@ -107,8 +102,6 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Review changelog format and conventions | `prowler-changelog` |
| Reviewing JSON:API compliance | `jsonapi` |
| Reviewing compliance framework PRs | `prowler-compliance-review` |
| Running makemigrations or pgmakemigrations | `django-migration-psql` |
| Syncing compliance framework with upstream catalog | `prowler-compliance` |
| Testing RLS tenant isolation | `prowler-test-api` |
| Testing hooks or utilities | `vitest` |
| Troubleshoot why a skill is missing from AGENTS.md auto-invoke | `skill-sync` |
@@ -136,7 +129,6 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Writing React components | `react-19` |
| Writing TypeScript types/interfaces | `typescript` |
| Writing Vitest tests | `vitest` |
| Writing data backfill or data migration | `django-migration-psql` |
| Writing documentation | `prowler-docs` |
| Writing unit tests for UI | `vitest` |
@@ -150,7 +142,7 @@ Prowler is an open-source cloud security assessment tool supporting AWS, Azure,
|-----------|----------|------------|
| SDK | `prowler/` | Python 3.10+, Poetry 2.3+ |
| API | `api/` | Django 5.1, DRF, Celery |
| UI | `ui/` | Next.js 16, React 19, Tailwind 4 |
| UI | `ui/` | Next.js 15, React 19, Tailwind 4 |
| MCP Server | `mcp_server/` | FastMCP, Python 3.12+ |
| Dashboard | `dashboard/` | Dash, Plotly |
-9
View File
@@ -11,15 +11,6 @@ All notable changes to the **Prowler API** are documented in this file.
### 🔄 Changed
- Remove orphaned `gin_resources_search_idx` declaration from `Resource.Meta.indexes` (DB index dropped in `0072_drop_unused_indexes`) [(#11001)](https://github.com/prowler-cloud/prowler/pull/11001)
- PDF compliance reports cap detail tables at 100 failed findings per check (configurable via `DJANGO_PDF_MAX_FINDINGS_PER_CHECK`) to bound worker memory on large scans [(#11160)](https://github.com/prowler-cloud/prowler/pull/11160)
---
## [1.27.2] (Prowler UNRELEASED)
### 🐞 Fixed
- Attack Paths: BEDROCK-001 and BEDROCK-002 now target roles trusting `bedrock-agentcore.amazonaws.com` instead of `bedrock.amazonaws.com`, eliminating false positives against regular Bedrock service roles (Agents, Knowledge Bases, model invocation) [(#11141)](https://github.com/prowler-cloud/prowler/pull/11141)
---
@@ -484,8 +484,8 @@ AWS_BEDROCK_PRIVESC_PASSROLE_CODE_INTERPRETER = AttackPathsQueryDefinition(
OR action = '*'
)
// Find roles that trust the Bedrock AgentCore service (can be passed to a code interpreter)
MATCH path_target = (aws)--(target_role:AWSRole)-[:TRUSTS_AWS_PRINCIPAL]->(:AWSPrincipal {{arn: 'bedrock-agentcore.amazonaws.com'}})
// Find roles that trust Bedrock service (can be passed to Bedrock)
MATCH path_target = (aws)--(target_role:AWSRole)-[:TRUSTS_AWS_PRINCIPAL]->(:AWSPrincipal {{arn: 'bedrock.amazonaws.com'}})
WHERE any(resource IN stmt_passrole.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
@@ -536,8 +536,8 @@ AWS_BEDROCK_PRIVESC_INVOKE_CODE_INTERPRETER = AttackPathsQueryDefinition(
OR action = '*'
)
// Find roles that trust the Bedrock AgentCore service (already attached to existing code interpreters)
MATCH path_target = (aws)--(target_role:AWSRole)-[:TRUSTS_AWS_PRINCIPAL]->(:AWSPrincipal {{arn: 'bedrock-agentcore.amazonaws.com'}})
// Find roles that trust Bedrock service (already attached to existing code interpreters)
MATCH path_target = (aws)--(target_role:AWSRole)-[:TRUSTS_AWS_PRINCIPAL]->(:AWSPrincipal {{arn: 'bedrock.amazonaws.com'}})
WITH collect(path_principal) + collect(path_target) AS paths
UNWIND paths AS p
+10 -145
View File
@@ -20,15 +20,11 @@ from tasks.jobs.reports import (
ThreatScoreReportGenerator,
)
from tasks.jobs.threatscore import compute_threatscore_metrics
from tasks.jobs.threatscore_utils import (
_aggregate_requirement_statistics_from_database,
_get_compliance_check_ids,
)
from tasks.jobs.threatscore_utils import _aggregate_requirement_statistics_from_database
from api.db_router import READ_REPLICA_ALIAS, MainRouter
from api.db_utils import rls_transaction
from api.models import Provider, Scan, ScanSummary, StateChoices, ThreatScoreSnapshot
from api.utils import initialize_prowler_provider
from prowler.lib.check.compliance_models import Compliance
from prowler.lib.outputs.finding import Finding as FindingOutput
@@ -431,7 +427,6 @@ def generate_threatscore_report(
provider_obj: Provider | None = None,
requirement_statistics: dict[str, dict[str, int]] | None = None,
findings_cache: dict[str, list[FindingOutput]] | None = None,
prowler_provider=None,
) -> None:
"""
Generate a PDF compliance report based on Prowler ThreatScore framework.
@@ -460,7 +455,6 @@ def generate_threatscore_report(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
only_failed=only_failed,
)
@@ -475,7 +469,6 @@ def generate_ens_report(
provider_obj: Provider | None = None,
requirement_statistics: dict[str, dict[str, int]] | None = None,
findings_cache: dict[str, list[FindingOutput]] | None = None,
prowler_provider=None,
) -> None:
"""
Generate a PDF compliance report for ENS RD2022 framework.
@@ -502,7 +495,6 @@ def generate_ens_report(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
include_manual=include_manual,
)
@@ -518,7 +510,6 @@ def generate_nis2_report(
provider_obj: Provider | None = None,
requirement_statistics: dict[str, dict[str, int]] | None = None,
findings_cache: dict[str, list[FindingOutput]] | None = None,
prowler_provider=None,
) -> None:
"""
Generate a PDF compliance report for NIS2 Directive (EU) 2022/2555.
@@ -546,7 +537,6 @@ def generate_nis2_report(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
only_failed=only_failed,
include_manual=include_manual,
)
@@ -563,7 +553,6 @@ def generate_csa_report(
provider_obj: Provider | None = None,
requirement_statistics: dict[str, dict[str, int]] | None = None,
findings_cache: dict[str, list[FindingOutput]] | None = None,
prowler_provider=None,
) -> None:
"""
Generate a PDF compliance report for CSA Cloud Controls Matrix (CCM) v4.0.
@@ -591,7 +580,6 @@ def generate_csa_report(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
only_failed=only_failed,
include_manual=include_manual,
)
@@ -608,7 +596,6 @@ def generate_cis_report(
provider_obj: Provider | None = None,
requirement_statistics: dict[str, dict[str, int]] | None = None,
findings_cache: dict[str, list[FindingOutput]] | None = None,
prowler_provider=None,
) -> None:
"""
Generate a PDF compliance report for a specific CIS Benchmark variant.
@@ -640,7 +627,6 @@ def generate_cis_report(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
only_failed=only_failed,
include_manual=include_manual,
)
@@ -785,17 +771,6 @@ def generate_compliance_reports(
results["csa"] = {"upload": False, "path": ""}
generate_csa = False
# Load the framework definitions for this provider once. We use this map
# both to pick the latest CIS variant and to precompute the set of
# check_ids each framework consumes (for findings_cache eviction).
frameworks_bulk: dict = {}
try:
frameworks_bulk = Compliance.get_bulk(provider_type)
except Exception as e:
logger.error("Error loading compliance frameworks for %s: %s", provider_type, e)
# Fall through; individual frameworks will still try and fail
# gracefully if their compliance_id is missing.
# For CIS we do NOT pre-check the provider against a hard-coded whitelist
# (that list drifts the moment a new CIS JSON ships). Instead, we inspect
# the dynamically loaded framework map and pick the latest available CIS
@@ -803,6 +778,7 @@ def generate_compliance_reports(
latest_cis: str | None = None
if generate_cis:
try:
frameworks_bulk = Compliance.get_bulk(provider_type)
latest_cis = _pick_latest_cis_variant(
name for name in frameworks_bulk.keys() if name.startswith("cis_")
)
@@ -839,84 +815,10 @@ def generate_compliance_reports(
tenant_id, scan_id
)
# Initialize the Prowler provider once for the whole report batch. Each
# generator used to re-init this in _load_compliance_data, paying the
# boto3/Azure-SDK construction cost 5 times per scan. The instance is
# only used by FindingOutput.transform_api_finding to enrich findings,
# so a single shared instance is correct.
logger.info("Initializing prowler_provider once for all reports (scan %s)", scan_id)
try:
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
prowler_provider = initialize_prowler_provider(provider_obj)
except Exception as init_error:
# If init fails the generators will fall back to lazy init in
# _load_compliance_data; we just log and continue.
logger.warning(
"Could not pre-initialize prowler_provider for scan %s: %s",
scan_id,
init_error,
)
prowler_provider = None
# Create shared findings cache up front so the eviction closure below
# can reference it. Defined BEFORE the closure to avoid the UnboundLocalError
# trap if an early-return is later inserted between the closure and its
# first use.
findings_cache: dict[str, list[FindingOutput]] = {}
# Create shared findings cache
findings_cache = {}
logger.info("Created shared findings cache for all reports")
# Precompute the set of check_ids each framework consumes. After a
# framework finishes, every check_id that no remaining framework still
# needs is evicted from findings_cache so the dict does not keep
# growing through the batch (PROWLER-1733).
pending_checks_by_framework: dict[str, set[str]] = {}
if generate_threatscore:
pending_checks_by_framework["threatscore"] = _get_compliance_check_ids(
frameworks_bulk.get(f"prowler_threatscore_{provider_type}")
)
if generate_ens:
pending_checks_by_framework["ens"] = _get_compliance_check_ids(
frameworks_bulk.get(f"ens_rd2022_{provider_type}")
)
if generate_nis2:
pending_checks_by_framework["nis2"] = _get_compliance_check_ids(
frameworks_bulk.get(f"nis2_{provider_type}")
)
if generate_csa:
pending_checks_by_framework["csa"] = _get_compliance_check_ids(
frameworks_bulk.get(f"csa_ccm_4.0_{provider_type}")
)
if generate_cis and latest_cis:
pending_checks_by_framework["cis"] = _get_compliance_check_ids(
frameworks_bulk.get(latest_cis)
)
def _evict_after_framework(done_key: str) -> int:
"""Drop from findings_cache every check_id no pending framework still needs."""
done = pending_checks_by_framework.pop(done_key, set())
still_needed: set[str] = (
set().union(*pending_checks_by_framework.values())
if pending_checks_by_framework
else set()
)
exclusive = done - still_needed
evicted = 0
for cid in exclusive:
if findings_cache.pop(cid, None) is not None:
evicted += 1
if evicted:
logger.info(
"Evicted %d exclusive check entries from findings_cache after %s "
"(remaining cache size: %d)",
evicted,
done_key,
len(findings_cache),
)
# Release the lists' memory now instead of waiting for the next
# gc cycle; FindingOutput instances retain quite a bit of state.
gc.collect()
return evicted
generated_report_keys: list[str] = []
output_paths: dict[str, str] = {}
out_dir: str | None = None
@@ -1005,7 +907,6 @@ def generate_compliance_reports(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
)
# Compute and store ThreatScore metrics snapshot
@@ -1083,15 +984,9 @@ def generate_compliance_reports(
logger.warning("ThreatScore report saved locally at %s", out_dir)
except Exception as e:
logger.exception(
"compliance_report_failed framework=threatscore scan_id=%s tenant_id=%s",
scan_id,
tenant_id,
)
logger.error("Error generating ThreatScore report: %s", e)
results["threatscore"] = {"upload": False, "path": "", "error": str(e)}
_evict_after_framework("threatscore")
# Generate ENS report
if generate_ens:
generated_report_keys.append("ens")
@@ -1111,7 +1006,6 @@ def generate_compliance_reports(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
)
upload_uri_ens = _upload_to_s3(
@@ -1126,15 +1020,9 @@ def generate_compliance_reports(
logger.warning("ENS report saved locally at %s", out_dir)
except Exception as e:
logger.exception(
"compliance_report_failed framework=ens scan_id=%s tenant_id=%s",
scan_id,
tenant_id,
)
logger.error("Error generating ENS report: %s", e)
results["ens"] = {"upload": False, "path": "", "error": str(e)}
_evict_after_framework("ens")
# Generate NIS2 report
if generate_nis2:
generated_report_keys.append("nis2")
@@ -1155,7 +1043,6 @@ def generate_compliance_reports(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
)
upload_uri_nis2 = _upload_to_s3(
@@ -1170,15 +1057,9 @@ def generate_compliance_reports(
logger.warning("NIS2 report saved locally at %s", out_dir)
except Exception as e:
logger.exception(
"compliance_report_failed framework=nis2 scan_id=%s tenant_id=%s",
scan_id,
tenant_id,
)
logger.error("Error generating NIS2 report: %s", e)
results["nis2"] = {"upload": False, "path": "", "error": str(e)}
_evict_after_framework("nis2")
# Generate CSA CCM report
if generate_csa:
generated_report_keys.append("csa")
@@ -1199,7 +1080,6 @@ def generate_compliance_reports(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
)
upload_uri_csa = _upload_to_s3(
@@ -1214,15 +1094,9 @@ def generate_compliance_reports(
logger.warning("CSA CCM report saved locally at %s", out_dir)
except Exception as e:
logger.exception(
"compliance_report_failed framework=csa scan_id=%s tenant_id=%s",
scan_id,
tenant_id,
)
logger.error("Error generating CSA CCM report: %s", e)
results["csa"] = {"upload": False, "path": "", "error": str(e)}
_evict_after_framework("csa")
# Generate CIS Benchmark report for the latest available version only.
# CIS ships multiple versions per provider (e.g. cis_1.4_aws, cis_5.0_aws,
# cis_6.0_aws); we dynamically pick the highest semantic version at run
@@ -1245,7 +1119,6 @@ def generate_compliance_reports(
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
)
upload_uri_cis = _upload_to_s3(
@@ -1274,22 +1147,14 @@ def generate_compliance_reports(
)
except Exception as e:
logger.exception(
"compliance_report_failed framework=cis variant=%s scan_id=%s tenant_id=%s",
latest_cis,
scan_id,
tenant_id,
)
logger.error("Error generating CIS report %s: %s", latest_cis, e)
results["cis"] = {
"upload": False,
"path": "",
"error": str(e),
}
finally:
# Free ReportLab/matplotlib memory before moving on. CIS is
# always the last framework, so evicting its entries clears the
# cache entirely (subject to its check_ids set).
_evict_after_framework("cis")
# Free ReportLab/matplotlib memory before moving on.
gc.collect()
# Clean up temporary files only if all generated reports were
+75 -288
View File
@@ -1,9 +1,6 @@
import gc
import os
import resource as _resource_module
import time
from abc import ABC, abstractmethod
from contextlib import contextmanager
from dataclasses import dataclass, field
from typing import Any
@@ -44,7 +41,6 @@ from .config import (
COLOR_LIGHT_BLUE,
COLOR_LIGHTER_BLUE,
COLOR_PROWLER_DARK_GREEN,
FINDINGS_TABLE_CHUNK_SIZE,
PADDING_LARGE,
PADDING_SMALL,
FrameworkConfig,
@@ -52,53 +48,6 @@ from .config import (
logger = get_task_logger(__name__)
@contextmanager
def _log_phase(phase: str, *, scan_id: str, framework: str):
"""Log start/end timing and RSS deltas around a report-building section.
Emits structured key=value logs so Grafana/Datadog/CloudWatch queries
can pivot by ``phase``, ``framework`` and ``scan_id`` to find the
slow/heavy section on any given scan. ``getrusage`` returns KB on
Linux and bytes on macOS; the values are still useful in relative
terms even though units differ across platforms.
"""
start = time.perf_counter()
rss_before = _resource_module.getrusage(_resource_module.RUSAGE_SELF).ru_maxrss
logger.info(
"phase_start phase=%s scan_id=%s framework=%s rss_kb=%d",
phase,
scan_id,
framework,
rss_before,
)
try:
yield
except Exception:
elapsed = time.perf_counter() - start
logger.exception(
"phase_failed phase=%s scan_id=%s framework=%s elapsed_s=%.2f",
phase,
scan_id,
framework,
elapsed,
)
raise
else:
elapsed = time.perf_counter() - start
rss_after = _resource_module.getrusage(_resource_module.RUSAGE_SELF).ru_maxrss
logger.info(
"phase_end phase=%s scan_id=%s framework=%s elapsed_s=%.2f "
"rss_kb=%d delta_rss_kb=%d",
phase,
scan_id,
framework,
elapsed,
rss_after,
rss_after - rss_before,
)
# Register fonts (done once at module load)
_fonts_registered: bool = False
@@ -386,7 +335,6 @@ class BaseComplianceReportGenerator(ABC):
provider_obj: Provider | None = None,
requirement_statistics: dict[str, dict[str, int]] | None = None,
findings_cache: dict[str, list[FindingOutput]] | None = None,
prowler_provider: Any | None = None,
**kwargs,
) -> None:
"""Generate the PDF compliance report.
@@ -403,35 +351,23 @@ class BaseComplianceReportGenerator(ABC):
provider_obj: Optional pre-fetched Provider object
requirement_statistics: Optional pre-aggregated statistics
findings_cache: Optional pre-loaded findings cache
prowler_provider: Optional pre-initialized Prowler provider. When
generating multiple reports for the same scan the master
function initializes this once and passes it in to avoid
re-running boto3/Azure-SDK setup per framework.
**kwargs: Additional framework-specific arguments
"""
framework = self.config.display_name
logger.info(
"report_generation_start framework=%s scan_id=%s compliance_id=%s",
framework,
scan_id,
compliance_id,
"Generating %s report for scan %s", self.config.display_name, scan_id
)
try:
# 1. Load compliance data
with _log_phase(
"load_compliance_data", scan_id=scan_id, framework=framework
):
data = self._load_compliance_data(
tenant_id=tenant_id,
scan_id=scan_id,
compliance_id=compliance_id,
provider_id=provider_id,
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
prowler_provider=prowler_provider,
)
data = self._load_compliance_data(
tenant_id=tenant_id,
scan_id=scan_id,
compliance_id=compliance_id,
provider_id=provider_id,
provider_obj=provider_obj,
requirement_statistics=requirement_statistics,
findings_cache=findings_cache,
)
# 2. Create PDF document
doc = self._create_document(output_path, data)
@@ -441,54 +377,37 @@ class BaseComplianceReportGenerator(ABC):
elements = []
# Cover page (lightweight)
with _log_phase("cover_page", scan_id=scan_id, framework=framework):
elements.extend(self.create_cover_page(data))
elements.append(PageBreak())
elements.extend(self.create_cover_page(data))
elements.append(PageBreak())
# Executive summary (framework-specific)
with _log_phase("executive_summary", scan_id=scan_id, framework=framework):
elements.extend(self.create_executive_summary(data))
elements.extend(self.create_executive_summary(data))
# Body sections (charts + requirements index)
# Override _build_body_sections() in subclasses to change section order
with _log_phase("body_sections", scan_id=scan_id, framework=framework):
elements.extend(self._build_body_sections(data))
elements.extend(self._build_body_sections(data))
# Detailed findings - heaviest section, loads findings on-demand
with _log_phase("detailed_findings", scan_id=scan_id, framework=framework):
elements.extend(self.create_detailed_findings(data, **kwargs))
gc.collect() # Free findings data after processing
logger.info("Building detailed findings section...")
elements.extend(self.create_detailed_findings(data, **kwargs))
gc.collect() # Free findings data after processing
# 4. Build the PDF
logger.info(
"doc_build_about_to_run framework=%s scan_id=%s elements=%d",
framework,
scan_id,
len(elements),
)
with _log_phase("doc_build", scan_id=scan_id, framework=framework):
self._build_pdf(doc, elements, data)
logger.info("Building PDF document with %d elements...", len(elements))
self._build_pdf(doc, elements, data)
# Final cleanup
del elements
gc.collect()
logger.info(
"report_generation_end framework=%s scan_id=%s output_path=%s",
framework,
scan_id,
output_path,
)
logger.info("Successfully generated report at %s", output_path)
except Exception:
# logger.exception captures the full traceback; the contextual
# keys keep production search-by-scan-id viable.
logger.exception(
"report_generation_failed framework=%s scan_id=%s compliance_id=%s",
framework,
scan_id,
compliance_id,
)
except Exception as e:
import traceback
tb_lineno = e.__traceback__.tb_lineno if e.__traceback__ else "unknown"
logger.error("Error generating report, line %s -- %s", tb_lineno, e)
logger.error("Full traceback:\n%s", traceback.format_exc())
raise
def _build_body_sections(self, data: ComplianceData) -> list:
@@ -719,25 +638,15 @@ class BaseComplianceReportGenerator(ABC):
for req in requirements:
check_ids_to_load.extend(req.checks)
# Load findings on-demand only for the checks that will be displayed.
# When ``only_failed`` is active at requirement level, also push the
# FAIL filter down to the finding level: a requirement marked FAIL
# because 1/1000 findings failed must not render a table dominated by
# 999 PASS rows. That hides the actual failure under noise and
# makes the per-check cap truncate the wrong rows.
# ``total_counts`` is populated with the pre-cap total per check_id
# (FAIL-only when only_failed is active) so the "Showing first N of
# M" banner uses the same denominator the reader cares about.
# Load findings on-demand only for the checks that will be displayed
# Uses the shared findings cache to avoid duplicate queries across reports
logger.info("Loading findings on-demand for %d requirements", len(requirements))
total_counts: dict[str, int] = {}
findings_by_check_id = _load_findings_for_requirement_checks(
data.tenant_id,
data.scan_id,
check_ids_to_load,
data.prowler_provider,
data.findings_by_check_id, # Pass the cache to update it
total_counts_out=total_counts,
only_failed_findings=only_failed,
)
for req in requirements:
@@ -769,31 +678,9 @@ class BaseComplianceReportGenerator(ABC):
)
)
else:
# Surface truncation BEFORE the tables so readers see it
# at the same scroll position as the data itself, not
# after thousands of rendered rows.
loaded = len(findings)
total = total_counts.get(check_id, loaded)
if total > loaded:
kind = "failed findings" if only_failed else "findings"
elements.append(
Paragraph(
f"<b>&#9888; Showing first {loaded:,} of "
f"{total:,} {kind} for this check.</b> "
f"Use the CSV or JSON export for the full "
f"list. The PDF caps detail rows to keep "
f"the report readable and bounded in size.",
self.styles["normal"],
)
)
elements.append(Spacer(1, 0.05 * inch))
# Create chunked findings tables to prevent OOM when a
# single check has thousands of findings (ReportLab
# resolves layout per Flowable, so many small tables
# render contiguously with a bounded memory peak).
findings_tables = self._create_findings_tables(findings)
elements.extend(findings_tables)
# Create findings table
findings_table = self._create_findings_table(findings)
elements.append(findings_table)
elements.append(Spacer(1, 0.1 * inch))
@@ -848,7 +735,6 @@ class BaseComplianceReportGenerator(ABC):
provider_obj: Provider | None,
requirement_statistics: dict | None,
findings_cache: dict | None,
prowler_provider: Any | None = None,
) -> ComplianceData:
"""Load and aggregate compliance data from the database.
@@ -860,9 +746,6 @@ class BaseComplianceReportGenerator(ABC):
provider_obj: Optional pre-fetched Provider
requirement_statistics: Optional pre-aggregated statistics
findings_cache: Optional pre-loaded findings
prowler_provider: Optional pre-initialized Prowler provider. When
the master function initializes it once and passes it in,
we skip the per-report ``initialize_prowler_provider`` call.
Returns:
Aggregated ComplianceData object
@@ -872,8 +755,7 @@ class BaseComplianceReportGenerator(ABC):
if provider_obj is None:
provider_obj = Provider.objects.get(id=provider_id)
if prowler_provider is None:
prowler_provider = initialize_prowler_provider(provider_obj)
prowler_provider = initialize_prowler_provider(provider_obj)
provider_type = provider_obj.provider
# Load compliance framework
@@ -941,32 +823,13 @@ class BaseComplianceReportGenerator(ABC):
) -> SimpleDocTemplate:
"""Create the PDF document template.
Validates that ``output_path`` is a filesystem path string with an
existing parent directory. SimpleDocTemplate technically accepts a
BytesIO too, but we want every report to land on disk so the
Celery worker doesn't hold the full PDF in memory while uploading
to S3.
Args:
output_path: Path for the output PDF
data: Compliance data for metadata
Returns:
Configured SimpleDocTemplate
Raises:
TypeError: ``output_path`` is not a string.
FileNotFoundError: The parent directory does not exist.
"""
if not isinstance(output_path, str):
raise TypeError(
"output_path must be a filesystem path string; "
f"got {type(output_path).__name__}"
)
parent_dir = os.path.dirname(output_path)
if parent_dir and not os.path.isdir(parent_dir):
raise FileNotFoundError(f"Output directory does not exist: {parent_dir}")
return SimpleDocTemplate(
output_path,
pagesize=letter,
@@ -1013,10 +876,47 @@ class BaseComplianceReportGenerator(ABC):
onLaterPages=add_footer,
)
# Column layout shared by all findings sub-tables. Defined as a method so
# subclasses can override it without re-implementing the chunking logic.
def _findings_table_columns(self) -> list[ColumnConfig]:
return [
def _create_findings_table(self, findings: list[FindingOutput]) -> Any:
"""Create a findings table.
Args:
findings: List of finding objects
Returns:
ReportLab Table element
"""
def get_finding_title(f):
metadata = getattr(f, "metadata", None)
if metadata:
return getattr(metadata, "CheckTitle", getattr(f, "check_id", ""))
return getattr(f, "check_id", "")
def get_resource_name(f):
name = getattr(f, "resource_name", "")
if not name:
name = getattr(f, "resource_uid", "")
return name
def get_severity(f):
metadata = getattr(f, "metadata", None)
if metadata:
return getattr(metadata, "Severity", "").capitalize()
return ""
# Convert findings to dicts for the table
data = []
for f in findings:
item = {
"title": get_finding_title(f),
"resource_name": get_resource_name(f),
"severity": get_severity(f),
"status": getattr(f, "status", "").upper(),
"region": getattr(f, "region", "global"),
}
data.append(item)
columns = [
ColumnConfig("Finding", 2.5 * inch, "title"),
ColumnConfig("Resource", 3 * inch, "resource_name"),
ColumnConfig("Severity", 0.9 * inch, "severity"),
@@ -1024,122 +924,9 @@ class BaseComplianceReportGenerator(ABC):
ColumnConfig("Region", 0.9 * inch, "region"),
]
@staticmethod
def _finding_to_row(f: FindingOutput) -> dict[str, str]:
"""Project a FindingOutput onto the row dict the table expects.
Kept defensive: missing metadata or attributes return empty strings
rather than raising, so a single malformed finding never breaks the
whole report.
"""
metadata = getattr(f, "metadata", None)
title = (
getattr(metadata, "CheckTitle", getattr(f, "check_id", ""))
if metadata
else getattr(f, "check_id", "")
)
resource_name = getattr(f, "resource_name", "") or getattr(
f, "resource_uid", ""
)
severity = getattr(metadata, "Severity", "").capitalize() if metadata else ""
return {
"title": title,
"resource_name": resource_name,
"severity": severity,
"status": getattr(f, "status", "").upper(),
"region": getattr(f, "region", "global"),
}
def _create_findings_tables(
self,
findings: list[FindingOutput],
chunk_size: int | None = None,
) -> list[Any]:
"""Build a list of small findings tables to keep ``doc.build()`` memory bounded.
ReportLab resolves layout (column widths, row heights, page-breaks)
per Flowable. A single ``LongTable`` of 15k rows forces all of that
to be computed at once and reliably OOMs the worker on large scans.
Splitting into chunks of ``chunk_size`` rows produces an equivalent-
looking PDF (LongTable repeats headers; chunks render contiguously)
with a bounded memory peak per chunk.
Args:
findings: List of finding objects for a single check.
chunk_size: Rows per sub-table. ``None`` uses
``FINDINGS_TABLE_CHUNK_SIZE`` from config.
Returns:
List of ReportLab flowables (interleaved ``Table``/``LongTable``
and small ``Spacer`` between chunks). Empty list when there are
no findings.
"""
if not findings:
return []
chunk_size = chunk_size or FINDINGS_TABLE_CHUNK_SIZE
# Build all rows first so we can chunk without re-walking the
# FindingOutput list. Malformed findings are skipped with a logged
# exception, never enough to abort the entire report.
rows: list[dict[str, str]] = []
for f in findings:
try:
rows.append(self._finding_to_row(f))
except Exception:
logger.exception(
"Skipping malformed finding while building table for check %s",
getattr(f, "check_id", "unknown"),
)
if not rows:
return []
columns = self._findings_table_columns()
flowables: list = []
total = len(rows)
for start in range(0, total, chunk_size):
chunk = rows[start : start + chunk_size]
flowables.append(
create_data_table(
data=chunk,
columns=columns,
header_color=self.config.primary_color,
normal_style=self.styles["normal_center"],
)
)
# A tiny spacer between chunks keeps them visually contiguous
# without forcing a page-break (KeepTogether would negate the
# memory benefit of chunking).
if start + chunk_size < total:
flowables.append(Spacer(1, 0.05 * inch))
if total > chunk_size:
logger.debug(
"Built %d findings sub-tables (chunk_size=%d, total_findings=%d)",
(total + chunk_size - 1) // chunk_size,
chunk_size,
total,
)
return flowables
def _create_findings_table(self, findings: list[FindingOutput]) -> Any:
"""Deprecated alias kept for backwards compatibility.
Returns the first chunk produced by ``_create_findings_tables``.
New callers MUST use ``_create_findings_tables``, which returns a
list of flowables and is what ``create_detailed_findings`` invokes.
"""
flowables = self._create_findings_tables(findings)
if flowables:
return flowables[0]
# Empty input → return an empty (header-only) table so callers that
# used to receive a Table never get None.
return create_data_table(
data=[],
columns=self._findings_table_columns(),
data=data,
columns=columns,
header_color=self.config.primary_color,
normal_style=self.styles["normal_center"],
)
@@ -1,11 +1,9 @@
import gc
import io
import math
import time
from typing import Callable
import matplotlib
from celery.utils.log import get_task_logger
# Use non-interactive Agg backend for memory efficiency in server environments
# This MUST be set before importing pyplot
@@ -22,26 +20,6 @@ from .config import ( # noqa: E402
CHART_DPI_DEFAULT,
)
logger = get_task_logger(__name__)
def _log_chart_built(name: str, dpi: int, buffer: io.BytesIO, started: float) -> None:
"""Emit a structured DEBUG line summarising a chart render.
Centralised so the formatting stays consistent across all chart helpers
and so we never accidentally pay for buffer.getbuffer().nbytes when
debug logging is disabled.
"""
if logger.isEnabledFor(10): # logging.DEBUG
logger.debug(
"chart_built name=%s dpi=%d bytes=%d elapsed_s=%.2f",
name,
dpi,
buffer.getbuffer().nbytes,
time.perf_counter() - started,
)
# Use centralized DPI setting from config
DEFAULT_CHART_DPI = CHART_DPI_DEFAULT
@@ -99,7 +77,6 @@ def create_vertical_bar_chart(
Returns:
BytesIO buffer containing the PNG image
"""
_started = time.perf_counter()
if color_func is None:
color_func = get_chart_color_for_percentage
@@ -145,7 +122,6 @@ def create_vertical_bar_chart(
plt.close(fig)
gc.collect() # Force garbage collection after heavy matplotlib operation
_log_chart_built("vertical_bar", dpi, buffer, _started)
return buffer
@@ -180,7 +156,6 @@ def create_horizontal_bar_chart(
Returns:
BytesIO buffer containing the PNG image
"""
_started = time.perf_counter()
if color_func is None:
color_func = get_chart_color_for_percentage
@@ -232,7 +207,6 @@ def create_horizontal_bar_chart(
plt.close(fig)
gc.collect() # Force garbage collection after heavy matplotlib operation
_log_chart_built("horizontal_bar", dpi, buffer, _started)
return buffer
@@ -265,7 +239,6 @@ def create_radar_chart(
Returns:
BytesIO buffer containing the PNG image
"""
_started = time.perf_counter()
num_vars = len(labels)
angles = [n / float(num_vars) * 2 * math.pi for n in range(num_vars)]
@@ -302,7 +275,6 @@ def create_radar_chart(
plt.close(fig)
gc.collect() # Force garbage collection after heavy matplotlib operation
_log_chart_built("radar", dpi, buffer, _started)
return buffer
@@ -331,7 +303,6 @@ def create_pie_chart(
Returns:
BytesIO buffer containing the PNG image
"""
_started = time.perf_counter()
fig, ax = plt.subplots(figsize=figsize)
_, _, autotexts = ax.pie(
@@ -359,7 +330,6 @@ def create_pie_chart(
plt.close(fig)
gc.collect() # Force garbage collection after heavy matplotlib operation
_log_chart_built("pie", dpi, buffer, _started)
return buffer
@@ -392,7 +362,6 @@ def create_stacked_bar_chart(
Returns:
BytesIO buffer containing the PNG image
"""
_started = time.perf_counter()
fig, ax = plt.subplots(figsize=figsize)
# Default colors if not provided
@@ -432,5 +401,4 @@ def create_stacked_bar_chart(
plt.close(fig)
gc.collect() # Force garbage collection after heavy matplotlib operation
_log_chart_built("stacked_bar", dpi, buffer, _started)
return buffer
@@ -475,15 +475,8 @@ def create_data_table(
else:
value = item.get(col.field, "")
# Wrap every string cell in Paragraph so the data rows keep the
# caller-supplied font/colour/alignment. Skipping Paragraph for
# short cells (a tempting micro-optimisation) breaks visual
# consistency: ReportLab Table falls back to Helvetica/black for
# raw strings, mixing fonts within the same table.
# ``escape_html`` keeps ``<``/``>``/``&`` in resource names from
# breaking Paragraph's mini-HTML parser.
if normal_style and isinstance(value, str):
value = Paragraph(escape_html(value), normal_style)
value = Paragraph(value, normal_style)
row.append(value)
table_data.append(row)
@@ -515,26 +508,17 @@ def create_data_table(
for idx, col in enumerate(columns):
styles.append(("ALIGN", (idx, 0), (idx, -1), col.align))
# Alternate row backgrounds: single O(1) ROWBACKGROUNDS style entry.
# The previous implementation appended N per-row BACKGROUND commands,
# which scaled the TableStyle list linearly with row count. ReportLab
# cycles through the colour list row-by-row so the visual is identical.
# The ALTERNATE_ROWS_MAX_SIZE cap is preserved to mirror legacy
# behaviour (very large tables stay plain), but the memory cost of the
# styles list is now constant regardless of row count.
# Alternate row backgrounds - skip for very large tables as it adds memory overhead
if (
alternate_rows
and len(table_data) > 1
and len(table_data) <= ALTERNATE_ROWS_MAX_SIZE
):
styles.append(
(
"ROWBACKGROUNDS",
(0, 1),
(-1, -1),
[colors.white, colors.Color(0.98, 0.98, 0.98)],
)
)
for i in range(1, len(table_data)):
if i % 2 == 0:
styles.append(
("BACKGROUND", (0, i), (-1, i), colors.Color(0.98, 0.98, 0.98))
)
table.setStyle(TableStyle(styles))
return table
@@ -1,4 +1,3 @@
import os
from dataclasses import dataclass, field
from reportlab.lib import colors
@@ -24,47 +23,6 @@ ALTERNATE_ROWS_MAX_SIZE = 200
# Larger = fewer queries but more memory per batch
FINDINGS_BATCH_SIZE = 2000
# Maximum rows per findings sub-table. ReportLab resolves layout per Flowable;
# splitting a huge findings list into multiple smaller tables keeps the peak
# memory of doc.build() bounded. A single 15k-row LongTable would force
# ReportLab to compute all column widths/row heights/page-breaks at once and
# OOM the worker; 300-row chunks are rendered contiguously with negligible
# visual impact.
FINDINGS_TABLE_CHUNK_SIZE = 300
# Maximum findings rendered per check in the detailed-findings section.
#
# Product behaviour: compliance PDFs render at most ``MAX_FINDINGS_PER_CHECK``
# **failed** findings per check (PASS rows are excluded at SQL level by the
# ``only_failed`` flag that all four list-rendering frameworks default to:
# ThreatScore, NIS2, CSA, CIS; ENS does not render finding tables). Above
# this cap each affected check renders an in-PDF banner
# ("Showing first 100 of N failed findings for this check. Use the CSV
# or JSON export for the full list") so the reader knows the table is
# truncated and where to find the full data.
#
# Why a cap exists at all:
# * ``FindingOutput.transform_api_finding`` is O(N) per finding (Pydantic
# v1 validation + nested model construction).
# * ReportLab resolves layout per Flowable; thousands of sub-tables make
# ``doc.build()`` very slow and grow the PDF unboundedly.
# * A human-readable executive/auditor PDF does not need 12,000 rows for
# one check; that is forensic data and lives in the CSV/JSON exports.
#
# Why 100 specifically:
# * Covers ~99% of real scans without truncation (most checks emit far
# fewer than 100 findings even in enterprise estates).
# * Worst-case rendered rows = 100 × ~500 checks = 50k rows across all
# frameworks, which keeps RSS bounded and a 5-framework run completes
# in minutes instead of hours.
#
# Override at runtime via ``DJANGO_PDF_MAX_FINDINGS_PER_CHECK``:
# * Set to ``0`` to disable the cap entirely (load every finding; only
# advisable for small scans).
# * Set to a larger value (e.g. ``500``) for forensic detail in big runs;
# watch RSS in the Celery worker.
MAX_FINDINGS_PER_CHECK = int(os.environ.get("DJANGO_PDF_MAX_FINDINGS_PER_CHECK", "100"))
# =============================================================================
# Base colors
+12 -127
View File
@@ -1,8 +1,6 @@
from celery.utils.log import get_task_logger
from config.django.base import DJANGO_FINDINGS_BATCH_SIZE
from django.db.models import Count, F, Q, Window
from django.db.models.functions import RowNumber
from tasks.jobs.reports.config import MAX_FINDINGS_PER_CHECK
from django.db.models import Count, Q
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
@@ -156,8 +154,6 @@ def _load_findings_for_requirement_checks(
check_ids: list[str],
prowler_provider,
findings_cache: dict[str, list[FindingOutput]] | None = None,
total_counts_out: dict[str, int] | None = None,
only_failed_findings: bool = False,
) -> dict[str, list[FindingOutput]]:
"""
Load findings for specific check IDs on-demand with optional caching.
@@ -182,23 +178,6 @@ def _load_findings_for_requirement_checks(
prowler_provider: The initialized Prowler provider instance.
findings_cache (dict, optional): Cache of already loaded findings.
If provided, checks are first looked up in cache before querying database.
total_counts_out (dict, optional): If provided, populated with
``{check_id: total_findings_in_db}`` BEFORE any per-check cap is
applied. Lets callers render a "Showing first N of M" banner for
truncated checks. Only populated for ``check_ids`` actually
queried (cache hits keep whatever value the caller already had).
When ``only_failed_findings=True`` the total is FAIL-only.
only_failed_findings (bool): When True, push the ``status=FAIL``
filter down into the SQL query so PASS rows are never loaded
from the DB nor pydantic-transformed. This matches the
``only_failed`` requirement-level filter applied at PDF render
time: a requirement marked FAIL because 1/1000 findings failed
shouldn't render a table of 999 PASS rows. That hides the
actual failure under noise and wastes the per-check cap on
irrelevant data. NOTE: the findings cache stores whatever the
first caller asked for, so all callers in a single
``generate_compliance_reports`` run MUST pass the same flag
(which they do: it threads from ``only_failed`` defaults).
Returns:
dict[str, list[FindingOutput]]: Dictionary mapping check_id to list of FindingOutput objects.
@@ -243,70 +222,17 @@ def _load_findings_for_requirement_checks(
)
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
base_qs = Finding.all_objects.filter(
tenant_id=tenant_id,
scan_id=scan_id,
check_id__in=check_ids_to_load,
# Use iterator with chunk_size for memory-efficient streaming
# chunk_size controls how many rows Django fetches from DB at once
findings_queryset = (
Finding.all_objects.filter(
tenant_id=tenant_id,
scan_id=scan_id,
check_id__in=check_ids_to_load,
)
.order_by("check_id", "uid")
.iterator(chunk_size=DJANGO_FINDINGS_BATCH_SIZE)
)
if only_failed_findings:
# Push the FAIL filter down into SQL: DB returns ~N×FAIL
# rows instead of N×ALL, and we never spend pydantic CPU on
# PASS findings the PDF would never render.
base_qs = base_qs.filter(status=StatusChoices.FAIL)
# Aggregate totals once so we (a) know which checks need capping
# and (b) can surface "Showing first N of M" in the PDF banner.
# Cheap: a single COUNT grouped by check_id.
totals: dict[str, int] = {
row["check_id"]: row["total"]
for row in base_qs.values("check_id").annotate(total=Count("id"))
}
if total_counts_out is not None:
total_counts_out.update(totals)
cap = MAX_FINDINGS_PER_CHECK
checks_over_cap = (
{cid for cid, n in totals.items() if n > cap} if cap > 0 else set()
)
# Use iterator with chunk_size for memory-efficient streaming.
# FindingOutput.transform_api_finding (prowler/lib/outputs/finding.py)
# reads finding.resources.first() and resource.tags.all() per
# finding, which without prefetch generates 2N queries per chunk.
# prefetch_related runs once per iterator chunk (Django >=4.1) and
# collapses that into a constant 2 extra queries per chunk.
if checks_over_cap:
# Top-N per check via a window function: PostgreSQL only
# materialises ``cap * |checks_over_cap| + sum(uncapped)``
# rows, vs the full table scan the previous path did.
ranked = base_qs.annotate(
rn=Window(
expression=RowNumber(),
partition_by=[F("check_id")],
order_by=F("uid").asc(),
)
)
findings_queryset = (
Finding.all_objects.filter(
id__in=ranked.filter(rn__lte=cap).values("id")
)
.prefetch_related("resources", "resources__tags")
.order_by("check_id", "uid")
.iterator(chunk_size=DJANGO_FINDINGS_BATCH_SIZE)
)
logger.info(
"Per-check cap=%d active for %d checks (max %d each); "
"skipping transform for surplus rows",
cap,
len(checks_over_cap),
cap,
)
else:
findings_queryset = (
base_qs.prefetch_related("resources", "resources__tags")
.order_by("check_id", "uid")
.iterator(chunk_size=DJANGO_FINDINGS_BATCH_SIZE)
)
# Pre-initialize empty lists for all check_ids to load
# This avoids repeated dict lookups and 'if not in' checks
@@ -322,11 +248,7 @@ def _load_findings_for_requirement_checks(
findings_count += 1
logger.info(
"Loaded %d findings for %d checks (truncated %d checks total=%d)",
findings_count,
len(check_ids_to_load),
len(checks_over_cap),
sum(totals.values()),
f"Loaded {findings_count} findings for {len(check_ids_to_load)} checks"
)
# Build result dict using cache references (no data duplication)
@@ -336,40 +258,3 @@ def _load_findings_for_requirement_checks(
}
return result
def _get_compliance_check_ids(compliance_obj) -> set[str]:
"""Return the union of all check_ids referenced by a compliance framework.
Used by the master report orchestrator to know which checks each
framework consumes from the shared ``findings_cache``, so that once a
framework finishes the entries no other pending framework needs can be
evicted from the cache (PROWLER-1733).
Args:
compliance_obj: A loaded Compliance framework object exposing a
``Requirements`` iterable, each requirement carrying ``Checks``.
``None`` is treated as "no checks" rather than raising, so the
caller can pass ``frameworks_bulk.get(...)`` directly without
an extra existence check.
Returns:
Set of check_id strings (empty if ``compliance_obj`` is ``None``).
"""
if compliance_obj is None:
return set()
checks: set[str] = set()
requirements = getattr(compliance_obj, "Requirements", None) or []
try:
# Defensive: Mock objects (used in unit tests) return another Mock
# for any attribute access, which is truthy but not iterable. Treat
# any non-iterable Requirements value as "no checks".
for req in requirements:
req_checks = getattr(req, "Checks", None) or []
try:
checks.update(req_checks)
except TypeError:
continue
except TypeError:
return set()
return checks
-488
View File
@@ -44,8 +44,6 @@ from api.models import (
Finding,
Resource,
ResourceFindingMapping,
ResourceTag,
ResourceTagMapping,
StateChoices,
StatusChoices,
)
@@ -369,317 +367,6 @@ class TestLoadFindingsForChecks:
assert result == {}
def test_prefetch_avoids_n_plus_one(self, tenants_fixture, scans_fixture):
"""Loading N findings must NOT execute O(N) extra queries for resources/tags.
Regression test for PROWLER-1733. ``FindingOutput.transform_api_finding``
reads ``finding.resources.first()`` and ``resource.tags.all()`` per
finding. Without ``prefetch_related`` that's 2N additional queries;
with prefetch it collapses to a small constant per iterator chunk.
"""
from django.test.utils import CaptureQueriesContext
from django.db import connections
tenant = tenants_fixture[0]
scan = scans_fixture[0]
# Build N findings, each linked to one resource that owns 2 tags.
N = 20
for i in range(N):
finding = Finding.objects.create(
tenant_id=tenant.id,
scan=scan,
uid=f"f-prefetch-{i}",
check_id="aws_check_prefetch",
status=StatusChoices.FAIL,
severity=Severity.high,
impact=Severity.high,
check_metadata={
"provider": "aws",
"checkid": "aws_check_prefetch",
"checktitle": "t",
"checktype": [],
"servicename": "s",
"subservicename": "",
"severity": "high",
"resourcetype": "r",
"description": "",
"risk": "",
"relatedurl": "",
"remediation": {
"recommendation": {"text": "", "url": ""},
"code": {
"nativeiac": "",
"terraform": "",
"cli": "",
"other": "",
},
},
"resourceidtemplate": "",
"categories": [],
"dependson": [],
"relatedto": [],
"notes": "",
},
raw_result={},
)
resource = Resource.objects.create(
tenant_id=tenant.id,
provider=scan.provider,
uid=f"r-prefetch-{i}",
name=f"r-prefetch-{i}",
metadata="{}",
details="",
region="us-east-1",
service="s",
type="t::r",
)
ResourceFindingMapping.objects.create(
tenant_id=tenant.id, finding=finding, resource=resource
)
for k in ("env", "owner"):
tag, _ = ResourceTag.objects.get_or_create(
tenant_id=tenant.id, key=k, value=f"v-{i}-{k}"
)
ResourceTagMapping.objects.create(
tenant_id=tenant.id, resource=resource, tag=tag
)
mock_provider = Mock()
mock_provider.type = "aws"
mock_provider.identity.account = "test"
# Patch transform_api_finding to a no-op so the test isolates queries
# to the queryset/prefetch path (transform itself is exercised by
# the integration tests above and not by this regression check).
with patch(
"tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
side_effect=lambda model, provider: Mock(check_id=model.check_id),
):
with CaptureQueriesContext(
connections["default_read_replica"]
if "default_read_replica" in connections.databases
else connections["default"]
) as ctx:
_load_findings_for_requirement_checks(
str(tenant.id),
str(scan.id),
["aws_check_prefetch"],
mock_provider,
)
# Expected: a small constant number of queries irrespective of N.
# Pre-fix this would be ~1 + 2*N. We give some slack for RLS SET
# LOCAL statements that the rls_transaction emits.
assert len(ctx.captured_queries) < N, (
f"Expected O(1) queries with prefetch_related; got "
f"{len(ctx.captured_queries)} for N={N} (N+1 regression?)"
)
def test_max_findings_per_check_cap(self, tenants_fixture, scans_fixture):
"""When a check exceeds ``MAX_FINDINGS_PER_CHECK``, only ``cap`` rows
are loaded AND ``total_counts_out`` reports the pre-cap total.
Guards the PROWLER-1733 truncation knob: prevents both runaway memory
and silent data loss in the PDF (the banner relies on knowing the
real total).
"""
from unittest.mock import patch as _patch
tenant = tenants_fixture[0]
scan = scans_fixture[0]
# Create 12 findings for a single check; cap to 5.
check_id = "aws_check_cap_test"
for i in range(12):
finding = Finding.objects.create(
tenant_id=tenant.id,
scan=scan,
uid=f"f-cap-{i:02d}",
check_id=check_id,
status=StatusChoices.FAIL,
severity=Severity.high,
impact=Severity.high,
check_metadata={},
raw_result={},
)
resource = Resource.objects.create(
tenant_id=tenant.id,
provider=scan.provider,
uid=f"r-cap-{i:02d}",
name=f"r-cap-{i:02d}",
metadata="{}",
details="",
region="us-east-1",
service="s",
type="t::r",
)
ResourceFindingMapping.objects.create(
tenant_id=tenant.id, finding=finding, resource=resource
)
mock_provider = Mock(type="aws")
mock_provider.identity.account = "test"
totals: dict = {}
# Patch the cap to a small value AND skip the heavy transform so we
# only assert on row counts and totals.
with (
_patch("tasks.jobs.threatscore_utils.MAX_FINDINGS_PER_CHECK", 5),
_patch(
"tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
side_effect=lambda model, provider: Mock(check_id=model.check_id),
),
):
result = _load_findings_for_requirement_checks(
str(tenant.id),
str(scan.id),
[check_id],
mock_provider,
total_counts_out=totals,
)
assert len(result[check_id]) == 5, (
f"cap=5 should yield exactly 5 loaded findings, got {len(result[check_id])}"
)
assert totals[check_id] == 12, (
f"total_counts_out should report the pre-cap total (12), got {totals[check_id]}"
)
def test_only_failed_findings_pushes_down_to_sql(
self, tenants_fixture, scans_fixture
):
"""When ``only_failed_findings=True``, PASS rows are excluded by the
DB filter, not just visually hidden afterwards.
Regression for the consistency fix: previously the requirement-level
``only_failed`` flag filtered which requirements appeared, but inside
each rendered requirement the table still showed PASS rows mixed
with FAIL, which combined with ``MAX_FINDINGS_PER_CHECK`` could
truncate to 1000 PASS findings and hide the actual failure.
"""
from unittest.mock import patch as _patch
tenant = tenants_fixture[0]
scan = scans_fixture[0]
check_id = "aws_check_only_failed_test"
# Mix PASS and FAIL so the filter has something to drop.
for i in range(6):
status = StatusChoices.FAIL if i % 2 == 0 else StatusChoices.PASS
finding = Finding.objects.create(
tenant_id=tenant.id,
scan=scan,
uid=f"f-of-{i:02d}",
check_id=check_id,
status=status,
severity=Severity.high,
impact=Severity.high,
check_metadata={},
raw_result={},
)
resource = Resource.objects.create(
tenant_id=tenant.id,
provider=scan.provider,
uid=f"r-of-{i:02d}",
name=f"r-of-{i:02d}",
metadata="{}",
details="",
region="us-east-1",
service="s",
type="t::r",
)
ResourceFindingMapping.objects.create(
tenant_id=tenant.id, finding=finding, resource=resource
)
mock_provider = Mock(type="aws")
mock_provider.identity.account = "test"
totals: dict = {}
with _patch(
"tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
side_effect=lambda model, provider: Mock(
check_id=model.check_id, status=model.status
),
):
result = _load_findings_for_requirement_checks(
str(tenant.id),
str(scan.id),
[check_id],
mock_provider,
total_counts_out=totals,
only_failed_findings=True,
)
# 3 FAIL + 3 PASS in DB; FAIL-only filter should load just 3.
loaded = result[check_id]
assert len(loaded) == 3, f"expected 3 FAIL findings, got {len(loaded)}"
statuses = {getattr(f, "status", None) for f in loaded}
assert statuses == {StatusChoices.FAIL}, (
f"expected all loaded findings to be FAIL; got statuses {statuses}"
)
# total_counts must reflect the FAIL-only total, not the global total.
assert totals[check_id] == 3, (
f"total_counts should be FAIL-only (3), got {totals[check_id]}"
)
def test_max_findings_per_check_disabled(self, tenants_fixture, scans_fixture):
"""``MAX_FINDINGS_PER_CHECK=0`` disables the cap; load all rows."""
from unittest.mock import patch as _patch
tenant = tenants_fixture[0]
scan = scans_fixture[0]
check_id = "aws_check_uncapped"
for i in range(8):
f = Finding.objects.create(
tenant_id=tenant.id,
scan=scan,
uid=f"f-unc-{i:02d}",
check_id=check_id,
status=StatusChoices.FAIL,
severity=Severity.high,
impact=Severity.high,
check_metadata={},
raw_result={},
)
r = Resource.objects.create(
tenant_id=tenant.id,
provider=scan.provider,
uid=f"r-unc-{i:02d}",
name=f"r-unc-{i:02d}",
metadata="{}",
details="",
region="us-east-1",
service="s",
type="t::r",
)
ResourceFindingMapping.objects.create(
tenant_id=tenant.id, finding=f, resource=r
)
mock_provider = Mock(type="aws")
mock_provider.identity.account = "test"
totals: dict = {}
with (
_patch("tasks.jobs.threatscore_utils.MAX_FINDINGS_PER_CHECK", 0),
_patch(
"tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
side_effect=lambda model, provider: Mock(check_id=model.check_id),
),
):
result = _load_findings_for_requirement_checks(
str(tenant.id),
str(scan.id),
[check_id],
mock_provider,
total_counts_out=totals,
)
assert len(result[check_id]) == 8
assert totals[check_id] == 8
class TestCleanupStaleTmpOutputDirectories:
"""Unit tests for opportunistic stale cleanup under tmp output root."""
@@ -1168,181 +855,6 @@ class TestGenerateComplianceReportsOptimized:
assert result["cis"] == {"upload": False, "path": ""}
mock_cis.assert_not_called()
@patch("api.utils.initialize_prowler_provider")
@patch("tasks.jobs.report.rmtree")
@patch("tasks.jobs.report._upload_to_s3")
@patch("tasks.jobs.report.generate_cis_report")
@patch("tasks.jobs.report.generate_csa_report")
@patch("tasks.jobs.report.generate_nis2_report")
@patch("tasks.jobs.report.generate_ens_report")
@patch("tasks.jobs.report.generate_threatscore_report")
@patch("tasks.jobs.report._generate_compliance_output_directory")
@patch("tasks.jobs.report._aggregate_requirement_statistics_from_database")
@patch("tasks.jobs.report.Compliance.get_bulk")
@patch("tasks.jobs.report.Provider.objects.get")
@patch("tasks.jobs.report.ScanSummary.objects.filter")
def test_findings_cache_eviction_after_framework(
self,
mock_scan_summary_filter,
mock_provider_get,
mock_get_bulk,
mock_aggregate_stats,
mock_generate_output_dir,
mock_threatscore,
mock_ens,
mock_nis2,
mock_csa,
mock_cis,
mock_upload_to_s3,
mock_rmtree,
mock_init_provider,
):
"""After each framework finishes, exclusive entries are evicted.
Threat scenario for PROWLER-1733: the shared ``findings_cache`` used
to grow monotonically through all 5 frameworks. With the new
eviction logic, check_ids only used by ThreatScore are dropped when
ThreatScore finishes, before ENS runs.
"""
from types import SimpleNamespace
from tasks.jobs import report as report_mod
mock_scan_summary_filter.return_value.exists.return_value = True
mock_provider_get.return_value = Mock(uid="provider-uid", provider="aws")
# ThreatScore consumes {tsc_only, shared}; ENS consumes {ens_only,
# shared}. After ThreatScore evicts, tsc_only must be gone but
# shared and ens_only must remain.
mock_get_bulk.return_value = {
"prowler_threatscore_aws": SimpleNamespace(
Requirements=[SimpleNamespace(Checks=["tsc_only", "shared"])]
),
"ens_rd2022_aws": SimpleNamespace(
Requirements=[SimpleNamespace(Checks=["ens_only", "shared"])]
),
}
mock_aggregate_stats.return_value = {}
mock_generate_output_dir.return_value = "/tmp/tenant/scan/x/prowler-out"
mock_upload_to_s3.return_value = "s3://bucket/tenant/scan/x/report.pdf"
mock_init_provider.return_value = Mock(name="prowler_provider")
# Seed the cache as if both frameworks had already loaded their
# findings. We mutate it indirectly: each generator wrapper is a
# Mock: make ThreatScore populate the cache, and have ENS observe
# the state at call time so we can introspect post-eviction.
observed_state: dict = {}
def _threatscore_side_effect(**kwargs):
cache = kwargs["findings_cache"]
cache["tsc_only"] = ["tsc-finding"]
cache["shared"] = ["shared-finding"]
def _ens_side_effect(**kwargs):
# ENS runs AFTER threatscore's _evict_after_framework("threatscore").
observed_state["cache_keys_when_ens_runs"] = set(
kwargs["findings_cache"].keys()
)
kwargs["findings_cache"]["ens_only"] = ["ens-finding"]
mock_threatscore.side_effect = _threatscore_side_effect
mock_ens.side_effect = _ens_side_effect
report_mod.generate_compliance_reports(
tenant_id=str(uuid.uuid4()),
scan_id=str(uuid.uuid4()),
provider_id=str(uuid.uuid4()),
generate_threatscore=True,
generate_ens=True,
generate_nis2=False,
generate_csa=False,
generate_cis=False,
)
# ``tsc_only`` was exclusive to ThreatScore → evicted before ENS ran.
# ``shared`` is still pending for ENS → must remain.
assert "tsc_only" not in observed_state["cache_keys_when_ens_runs"], (
"tsc_only should have been evicted before ENS ran"
)
assert "shared" in observed_state["cache_keys_when_ens_runs"], (
"shared must remain in cache because ENS still needs it"
)
@patch("api.utils.initialize_prowler_provider")
@patch("tasks.jobs.report.rmtree")
@patch("tasks.jobs.report._upload_to_s3")
@patch("tasks.jobs.report.generate_cis_report")
@patch("tasks.jobs.report.generate_csa_report")
@patch("tasks.jobs.report.generate_nis2_report")
@patch("tasks.jobs.report.generate_ens_report")
@patch("tasks.jobs.report.generate_threatscore_report")
@patch("tasks.jobs.report._generate_compliance_output_directory")
@patch("tasks.jobs.report._aggregate_requirement_statistics_from_database")
@patch("tasks.jobs.report.Compliance.get_bulk")
@patch("tasks.jobs.report.Provider.objects.get")
@patch("tasks.jobs.report.ScanSummary.objects.filter")
def test_prowler_provider_initialized_once(
self,
mock_scan_summary_filter,
mock_provider_get,
mock_get_bulk,
mock_aggregate_stats,
mock_generate_output_dir,
mock_threatscore,
mock_ens,
mock_nis2,
mock_csa,
mock_cis,
mock_upload_to_s3,
mock_rmtree,
mock_init_provider,
):
"""``initialize_prowler_provider`` must be called exactly once for
the whole batch (PROWLER-1733). Previously each generator re-init'd
the SDK provider in ``_load_compliance_data`` → 5 inits per scan.
"""
mock_scan_summary_filter.return_value.exists.return_value = True
mock_provider_get.return_value = Mock(uid="provider-uid", provider="aws")
# CIS variant discovery needs at least one cis_* key.
mock_get_bulk.return_value = {"cis_6.0_aws": Mock()}
mock_aggregate_stats.return_value = {}
mock_generate_output_dir.return_value = "/tmp/tenant/scan/x/prowler-out"
mock_upload_to_s3.return_value = "s3://bucket/tenant/scan/x/report.pdf"
mock_init_provider.return_value = Mock(name="prowler_provider")
generate_compliance_reports(
tenant_id=str(uuid.uuid4()),
scan_id=str(uuid.uuid4()),
provider_id=str(uuid.uuid4()),
generate_threatscore=True,
generate_ens=True,
generate_nis2=True,
generate_csa=True,
generate_cis=True,
)
# All 5 wrappers were invoked once each…
mock_threatscore.assert_called_once()
mock_ens.assert_called_once()
mock_nis2.assert_called_once()
mock_csa.assert_called_once()
mock_cis.assert_called_once()
# …but the SDK provider was initialized only once.
assert mock_init_provider.call_count == 1, (
f"expected 1 init, got {mock_init_provider.call_count} "
f"(prowler_provider must be shared across reports)"
)
# The shared instance must reach every wrapper as kwargs.
shared = mock_init_provider.return_value
for mock_wrapper in (
mock_threatscore,
mock_ens,
mock_nis2,
mock_csa,
mock_cis,
):
_, call_kwargs = mock_wrapper.call_args
assert call_kwargs.get("prowler_provider") is shared
@patch("tasks.jobs.report.rmtree")
@patch("tasks.jobs.report._upload_to_s3")
@patch("tasks.jobs.report.generate_threatscore_report")
@@ -1269,48 +1269,6 @@ class TestComponentEdgeCases:
# Should be a LongTable for large datasets
assert isinstance(table, LongTable)
def test_zebra_uses_rowbackgrounds_not_per_row_background(self, monkeypatch):
"""The styles list must contain exactly one ROWBACKGROUNDS entry
regardless of row count, never N per-row BACKGROUND entries.
"""
captured: dict = {}
# Capture the list passed to TableStyle. create_data_table builds a
# list of style tuples and wraps it in a TableStyle exactly once;
# by patching TableStyle we intercept that list.
import tasks.jobs.reports.components as comp_mod
original_table_style = comp_mod.TableStyle
def _capture_table_style(style_list):
captured["styles"] = list(style_list)
return original_table_style(style_list)
monkeypatch.setattr(comp_mod, "TableStyle", _capture_table_style)
data = [{"name": f"Item {i}"} for i in range(60)]
columns = [ColumnConfig("Name", 2 * inch, "name")]
comp_mod.create_data_table(data, columns, alternate_rows=True)
styles = captured["styles"]
# Count by command name.
names = [s[0] for s in styles if isinstance(s, tuple) and s]
# Exactly one ROWBACKGROUNDS entry.
assert names.count("ROWBACKGROUNDS") == 1
# Zero per-row BACKGROUND entries on data rows. (The header row
# BACKGROUND command is intentional and lives at coords (0,0)/(-1,0).)
data_row_bg = [
s
for s in styles
if isinstance(s, tuple)
and s[0] == "BACKGROUND"
and not (s[1] == (0, 0) and s[2] == (-1, 0))
]
assert data_row_bg == [], (
f"expected no per-row BACKGROUND entries on data rows; "
f"got {len(data_row_bg)}"
)
def test_create_risk_component_zero_values(self):
"""Test risk component with zero values."""
component = create_risk_component(risk_level=0, weight=0, score=0)
@@ -1386,194 +1344,3 @@ class TestFrameworkConfigEdgeCases:
assert get_framework_config("my_custom_threatscore_compliance") is not None
assert get_framework_config("ens_something_else") is not None
assert get_framework_config("nis2_gcp") is not None
# =============================================================================
# Findings Table Chunking Tests (PROWLER-1733)
# =============================================================================
#
# These tests guard the OOM-prevention behaviour added in PROWLER-1733:
# ``_create_findings_tables`` must split a list of findings into multiple
# small sub-tables instead of producing one giant Table, which would force
# ReportLab to resolve layout for all rows at once and OOM the worker on
# scans with thousands of findings per check.
class _DummyMetadata:
"""Lightweight stand-in for FindingOutput.metadata used in chunking tests."""
def __init__(self, check_title: str = "Title", severity: str = "high"):
self.CheckTitle = check_title
self.Severity = severity
class _DummyFinding:
"""Lightweight stand-in for FindingOutput used in chunking tests.
The chunking code only reads a small set of attributes via ``getattr``,
so a duck-typed object is enough and lets the tests run without touching
the DB or pydantic deserialisation.
"""
def __init__(
self,
check_id: str = "aws_check",
resource_name: str = "res-1",
resource_uid: str = "",
status: str = "FAIL",
region: str = "us-east-1",
with_metadata: bool = True,
):
self.check_id = check_id
self.resource_name = resource_name
self.resource_uid = resource_uid
self.status = status
self.region = region
if with_metadata:
self.metadata = _DummyMetadata()
else:
self.metadata = None
def _make_concrete_generator():
"""Return a minimal concrete subclass of BaseComplianceReportGenerator."""
class _Concrete(BaseComplianceReportGenerator):
def create_executive_summary(self, data):
return []
def create_charts_section(self, data):
return []
def create_requirements_index(self, data):
return []
return _Concrete(FrameworkConfig(name="test", display_name="Test"))
class TestFindingsTableChunking:
"""Tests for ``_create_findings_tables`` (PROWLER-1733)."""
def test_chunking_produces_expected_number_of_subtables(self):
"""5000 findings @ chunk_size=300 → 17 sub-tables + 16 spacers."""
generator = _make_concrete_generator()
findings = [_DummyFinding(check_id="c1") for _ in range(5000)]
flowables = generator._create_findings_tables(findings, chunk_size=300)
tables = [f for f in flowables if isinstance(f, (Table, LongTable))]
spacers = [f for f in flowables if isinstance(f, Spacer)]
# ceil(5000 / 300) == 17
assert len(tables) == 17
# Spacer between every pair of contiguous tables, not after the last
assert len(spacers) == 16
def test_chunk_size_param_overrides_default(self):
"""250 findings @ chunk_size=100 → 3 sub-tables."""
generator = _make_concrete_generator()
findings = [_DummyFinding(check_id="c2") for _ in range(250)]
flowables = generator._create_findings_tables(findings, chunk_size=100)
tables = [f for f in flowables if isinstance(f, (Table, LongTable))]
assert len(tables) == 3
def test_empty_findings_returns_empty_list(self):
"""No findings → no flowables. Callers can extend(...) safely."""
generator = _make_concrete_generator()
assert generator._create_findings_tables([]) == []
def test_single_chunk_has_no_spacer(self):
"""A single sub-table must not emit a trailing spacer."""
generator = _make_concrete_generator()
findings = [_DummyFinding(check_id="c3") for _ in range(10)]
flowables = generator._create_findings_tables(findings, chunk_size=300)
assert len(flowables) == 1
assert isinstance(flowables[0], (Table, LongTable))
def test_malformed_finding_is_skipped(self):
"""A broken finding must not abort the report; it is logged and skipped."""
generator = _make_concrete_generator()
class _Broken:
# No attributes at all; getattr() defaults will mostly cope, but
# we force an explicit error by making the metadata attribute
# itself raise on access.
@property
def metadata(self):
raise RuntimeError("boom")
check_id = "broken"
findings = [
_DummyFinding(check_id="c4"),
_Broken(),
_DummyFinding(check_id="c4"),
]
flowables = generator._create_findings_tables(findings, chunk_size=300)
# Two good rows → one sub-table containing them; the broken one is
# logged and dropped, not propagated.
tables = [f for f in flowables if isinstance(f, (Table, LongTable))]
assert len(tables) == 1
def test_create_findings_table_alias_returns_first_chunk(self):
"""The deprecated alias must keep returning a single Table flowable."""
generator = _make_concrete_generator()
findings = [_DummyFinding(check_id="c5") for _ in range(700)]
first = generator._create_findings_table(findings)
assert isinstance(first, (Table, LongTable))
def test_create_findings_table_alias_empty(self):
"""Alias on empty input returns an empty (header-only) Table, not None."""
generator = _make_concrete_generator()
result = generator._create_findings_table([])
# The legacy alias never returned None; an empty header-only table
# is a strict superset of that contract.
assert isinstance(result, (Table, LongTable))
# =============================================================================
# Logging Context Manager Tests (PROWLER-1733)
# =============================================================================
class TestLogPhaseContextManager:
"""Tests for ``_log_phase`` (PROWLER-1733).
The context manager emits structured ``phase_start`` / ``phase_end``
logs with ``scan_id``, ``framework`` and ``elapsed_s``, so Datadog/
CloudWatch queries can pivot by scan and find the slow section.
"""
def test_emits_start_and_end_with_elapsed_and_rss(self, caplog):
from tasks.jobs.reports.base import _log_phase
caplog.set_level("INFO", logger="tasks.jobs.reports.base")
with _log_phase("unit_test_phase", scan_id="s-1", framework="Test FW"):
pass
messages = [r.getMessage() for r in caplog.records]
starts = [m for m in messages if "phase_start" in m]
ends = [m for m in messages if "phase_end" in m]
assert len(starts) == 1 and len(ends) == 1
assert "phase=unit_test_phase" in starts[0]
assert "scan_id=s-1" in starts[0]
assert "framework=Test FW" in starts[0]
assert "elapsed_s=" in ends[0]
assert "rss_kb=" in ends[0]
assert "delta_rss_kb=" in ends[0]
def test_failure_logs_phase_failed_and_reraises(self, caplog):
from tasks.jobs.reports.base import _log_phase
caplog.set_level("INFO", logger="tasks.jobs.reports.base")
with pytest.raises(RuntimeError, match="boom"):
with _log_phase("failing_phase", scan_id="s-2", framework="FW"):
raise RuntimeError("boom")
messages = [r.getMessage() for r in caplog.records]
assert any("phase_failed" in m and "failing_phase" in m for m in messages)
# No phase_end on the failure path.
assert not any("phase_end" in m for m in messages)
-335
View File
@@ -1,335 +0,0 @@
# AWS Inventory Connectivity Graph
A community-contributed tool that generates interactive connectivity graphs from Prowler AWS scans, visualizing relationships between AWS resources with zero additional API calls.
## Overview
This tool extends Prowler by producing two artifacts after a scan completes:
- **`<output>.inventory.json`** Machine-readable graph (nodes + edges)
- **`<output>.inventory.html`** Interactive D3.js force-directed visualization
### Why?
Prowler's existing outputs (CSV, ASFF, OCSF, HTML) report individual check findings but provide no cross-service topology view. Security engineers need to understand **how** resources are connected—which Lambda functions sit inside which VPC, which IAM roles can be assumed by which services, which event sources trigger which functions—before they can reason about attack paths, blast-radius, or lateral-movement risk.
This tool fills that gap by building a connectivity graph from the service clients that are already loaded during a Prowler scan.
## Features
### Supported AWS Services
The tool currently extracts connectivity information from:
- **Lambda** Functions, VPC/subnet/SG edges, event source mappings, layers, DLQ, KMS
- **EC2** Instances, security groups, subnet/VPC edges
- **VPC** VPCs, subnets, peering connections
- **RDS** DB instances, VPC/SG/cluster/KMS edges
- **ELBv2** ALB/NLB load balancers, SG and VPC edges
- **S3** Buckets, replication targets, logging buckets, KMS keys
- **IAM** Roles, trust-relationship edges (who can assume what)
### Edge Semantic Types
Edges are typed for downstream filtering and attack-path analysis:
- `network` Resources share a network path (VPC/subnet/SG)
- `iam` IAM trust or permission relationship
- `triggers` One resource can invoke another (event source → Lambda)
- `data_flow` Data is written/read (Lambda → SQS dead-letter queue)
- `depends_on` Soft dependency (Lambda layer, subnet belongs to VPC)
- `routes_to` Traffic routing (LB → target)
- `replicates_to` S3 replication
- `encrypts` KMS key encrypts the resource
- `logs_to` Logging relationship
### Interactive HTML Graph Features
- Force-directed layout with drag-and-drop node pinning
- Zoom / pan (mouse wheel + click-drag on background)
- Per-service color-coded nodes with a legend
- Hover tooltips showing ARN + all metadata properties
- Service filter dropdown (show only Lambda, EC2, RDS, etc.)
- Adjustable link-distance and charge-strength physics sliders
- Edge labels on every arrow
## Installation
### Prerequisites
- Python 3.9.1 or higher
- Prowler installed and configured (see [Prowler documentation](https://docs.prowler.com/))
### Setup
1. Clone or download this directory to your local machine
2. Ensure Prowler is installed and working
3. No additional dependencies required beyond Prowler's existing requirements
## Usage
### Basic Usage
Run Prowler with your desired checks, then use the inventory graph script:
```bash
# Run Prowler scan (example)
prowler aws --output-formats csv
# Generate inventory graph from the scan
python contrib/inventory-graph/inventory_graph.py --output-directory ./output
```
### Command-Line Options
```bash
python contrib/inventory-graph/inventory_graph.py [OPTIONS]
Options:
--output-directory DIR Directory to save output files (default: ./output)
--output-filename NAME Base filename without extension (default: prowler-inventory-<timestamp>)
--help Show this help message and exit
```
### Example Workflow
```bash
# 1. Run a Prowler scan on your AWS account
prowler aws --profile my-aws-profile --output-formats csv html
# 2. Generate the inventory graph
python contrib/inventory-graph/inventory_graph.py \
--output-directory ./output \
--output-filename my-aws-inventory
# 3. Open the HTML file in your browser
open output/my-aws-inventory.inventory.html
```
### Integration with Prowler Scans
The tool reads from already-loaded AWS service clients in memory (via `sys.modules`). This means:
- **Zero extra AWS API calls** Uses data already collected during the Prowler scan
- **Graceful degradation** Services not scanned are silently skipped
- **Flexible** Works with any subset of Prowler checks
## Output Files
### JSON Output (`*.inventory.json`)
Machine-readable graph structure:
```json
{
"generated_at": "2026-03-19T12:34:56Z",
"nodes": [
{
"id": "arn:aws:lambda:us-east-1:123456789012:function:my-function",
"type": "lambda_function",
"name": "my-function",
"service": "lambda",
"region": "us-east-1",
"account_id": "123456789012",
"properties": {
"runtime": "python3.9",
"vpc_id": "vpc-abc123"
}
}
],
"edges": [
{
"source_id": "arn:aws:lambda:...",
"target_id": "arn:aws:ec2:...:vpc/vpc-abc123",
"edge_type": "network",
"label": "in-vpc"
}
],
"stats": {
"node_count": 42,
"edge_count": 87
}
}
```
### HTML Output (`*.inventory.html`)
Self-contained interactive visualization that opens in any modern browser. No server or build step required.
## Architecture
### Design Decisions
| Decision | Rationale |
|----------|-----------|
| **Read from sys.modules** | Zero extra AWS API calls; services not scanned are silently skipped |
| **Self-contained HTML** | D3.js v7 via CDN; no server, no build step; opens in any browser |
| **One extractor per service** | Each extractor is independently testable; adding a new service = one new file + one line in the registry |
| **Typed edges** | Semantic types allow downstream consumers (attack-path tools, Neo4j import) to filter by relationship class |
### Project Structure
```
contrib/inventory-graph/
├── README.md # This file
├── inventory_graph.py # Main entry point script
├── lib/
│ ├── __init__.py
│ ├── models.py # ResourceNode, ResourceEdge, ConnectivityGraph dataclasses
│ ├── graph_builder.py # Reads loaded service clients from sys.modules
│ ├── inventory_output.py # write_json(), write_html()
│ └── extractors/
│ ├── __init__.py
│ ├── lambda_extractor.py # Lambda functions → VPC/subnet/SG/event-sources/layers/DLQ/KMS
│ ├── ec2_extractor.py # EC2 instances + security groups → subnet/VPC
│ ├── vpc_extractor.py # VPCs, subnets, peering connections
│ ├── rds_extractor.py # RDS instances → VPC/SG/cluster/KMS
│ ├── elbv2_extractor.py # ALB/NLB load balancers → SG/VPC
│ ├── s3_extractor.py # S3 buckets → replication targets/logging buckets/KMS keys
│ └── iam_extractor.py # IAM roles + trust-relationship edges
└── examples/
└── sample_output.html # Example output (optional)
```
## Testing
### Smoke Test (No AWS Credentials Needed)
```python
import sys
from unittest.mock import MagicMock
# Wire a fake Lambda client
mock_module = MagicMock()
mock_fn = MagicMock()
mock_fn.arn = "arn:aws:lambda:us-east-1:123:function:test"
mock_fn.name = "test"
mock_fn.region = "us-east-1"
mock_fn.vpc_id = "vpc-abc"
mock_fn.security_groups = ["sg-111"]
mock_fn.subnet_ids = {"subnet-aaa"}
mock_fn.environment = None
mock_fn.kms_key_arn = None
mock_fn.layers = []
mock_fn.dead_letter_config = None
mock_fn.event_source_mappings = []
mock_module.awslambda_client.functions = {mock_fn.arn: mock_fn}
mock_module.awslambda_client.audited_account = "123"
sys.modules["prowler.providers.aws.services.awslambda.awslambda_client"] = mock_module
from contrib.inventory_graph.lib.graph_builder import build_graph
from contrib.inventory_graph.lib.inventory_output import write_json, write_html
graph = build_graph()
write_json(graph, "/tmp/test.inventory.json")
write_html(graph, "/tmp/test.inventory.html")
# Open /tmp/test.inventory.html in a browser
```
## Extending
### Adding a New Service
1. Create a new extractor file in `lib/extractors/` (e.g., `dynamodb_extractor.py`)
2. Implement the `extract(client)` function that returns `(nodes, edges)`
3. Register it in `lib/graph_builder.py` in the `_SERVICE_REGISTRY` tuple
Example extractor template:
```python
from typing import List, Tuple
from prowler.lib.outputs.inventory.models import ResourceNode, ResourceEdge
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""Extract DynamoDB tables and their relationships."""
nodes = []
edges = []
for table in client.tables:
nodes.append(
ResourceNode(
id=table.arn,
type="dynamodb_table",
name=table.name,
service="dynamodb",
region=table.region,
account_id=client.audited_account,
properties={"billing_mode": table.billing_mode}
)
)
# Add edges for KMS encryption, streams, etc.
if table.kms_key_arn:
edges.append(
ResourceEdge(
source_id=table.kms_key_arn,
target_id=table.arn,
edge_type="encrypts",
label="encrypts"
)
)
return nodes, edges
```
## Troubleshooting
### No nodes discovered
**Problem:** The tool reports "no nodes discovered" after running.
**Solution:** Ensure you've run a Prowler scan first. The tool reads from in-memory service clients loaded during the scan. If no services were scanned, no nodes will be discovered.
### Missing services in the graph
**Problem:** Some AWS services are not appearing in the graph.
**Solution:** The tool only includes services that have been scanned by Prowler. Run Prowler with the services you want to include, or run without service filters to scan all available services.
### HTML file doesn't display properly
**Problem:** The HTML visualization doesn't load or shows errors.
**Solution:**
- Ensure you're opening the file in a modern browser (Chrome, Firefox, Safari, Edge)
- Check your browser's console for JavaScript errors
- Verify the file was generated completely (check file size > 0)
- The HTML requires internet access to load D3.js from CDN
## Roadmap
Potential future enhancements:
- [ ] Support for additional AWS services (DynamoDB, SQS, SNS, etc.)
- [ ] Export to Neo4j / Cartography format
- [ ] Attack path analysis integration
- [ ] Multi-account/multi-region aggregation
- [ ] Custom edge type filtering in HTML UI
- [ ] Graph diff between two scans
## Contributing
This is a community contribution. If you'd like to enhance it:
1. Fork the Prowler repository
2. Make your changes in `contrib/inventory-graph/`
3. Test thoroughly
4. Submit a pull request with a clear description
## License
This tool is part of the Prowler project and is licensed under the Apache License 2.0.
## Credits
- **Author:** [@sandiyochristan](https://github.com/sandiyochristan)
- **Related PR:** [#10382](https://github.com/prowler-cloud/prowler/pull/10382)
- **Prowler Project:** [prowler-cloud/prowler](https://github.com/prowler-cloud/prowler)
## Support
For issues or questions:
- Open an issue in the [Prowler repository](https://github.com/prowler-cloud/prowler/issues)
- Join the [Prowler Community Slack](https://goto.prowler.com/slack)
- Tag your issue with `contrib:inventory-graph`
@@ -1,181 +0,0 @@
#!/usr/bin/env python3
"""
Example: Generate AWS Inventory Graph with Mock Data
This example demonstrates how to use the inventory graph tool with mock AWS data.
No AWS credentials required.
"""
import sys
from pathlib import Path
from unittest.mock import MagicMock
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from lib.graph_builder import build_graph
from lib.inventory_output import write_json, write_html
def create_mock_lambda_client():
"""Create a mock Lambda client with sample data."""
mock_module = MagicMock()
# Create a mock Lambda function
mock_fn = MagicMock()
mock_fn.arn = "arn:aws:lambda:us-east-1:123456789012:function:my-test-function"
mock_fn.name = "my-test-function"
mock_fn.region = "us-east-1"
mock_fn.vpc_id = "vpc-abc123"
mock_fn.security_groups = ["sg-111222"]
mock_fn.subnet_ids = {"subnet-aaa111", "subnet-bbb222"}
mock_fn.environment = {"Variables": {"ENV": "production"}}
mock_fn.kms_key_arn = (
"arn:aws:kms:us-east-1:123456789012:key/12345678-1234-1234-1234-123456789012"
)
mock_fn.layers = []
mock_fn.dead_letter_config = None
mock_fn.event_source_mappings = []
mock_module.awslambda_client.functions = {mock_fn.arn: mock_fn}
mock_module.awslambda_client.audited_account = "123456789012"
return mock_module
def create_mock_ec2_client():
"""Create a mock EC2 client with sample data."""
mock_module = MagicMock()
# Create a mock EC2 instance
mock_instance = MagicMock()
mock_instance.arn = (
"arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0"
)
mock_instance.id = "i-1234567890abcdef0"
mock_instance.region = "us-east-1"
mock_instance.vpc_id = "vpc-abc123"
mock_instance.subnet_id = "subnet-aaa111"
mock_instance.security_groups = [MagicMock(id="sg-111222")]
mock_instance.state = "running"
mock_instance.type = "t3.micro"
mock_instance.tags = [{"Key": "Name", "Value": "test-instance"}]
# Create a mock security group
mock_sg = MagicMock()
mock_sg.arn = "arn:aws:ec2:us-east-1:123456789012:security-group/sg-111222"
mock_sg.id = "sg-111222"
mock_sg.name = "test-security-group"
mock_sg.region = "us-east-1"
mock_sg.vpc_id = "vpc-abc123"
mock_module.ec2_client.instances = [mock_instance]
mock_module.ec2_client.security_groups = [mock_sg]
mock_module.ec2_client.audited_account = "123456789012"
return mock_module
def create_mock_vpc_client():
"""Create a mock VPC client with sample data."""
mock_module = MagicMock()
# Create a mock VPC
mock_vpc = MagicMock()
mock_vpc.arn = "arn:aws:ec2:us-east-1:123456789012:vpc/vpc-abc123"
mock_vpc.id = "vpc-abc123"
mock_vpc.region = "us-east-1"
mock_vpc.cidr_block = "10.0.0.0/16"
mock_vpc.tags = [{"Key": "Name", "Value": "test-vpc"}]
# Create mock subnets
mock_subnet1 = MagicMock()
mock_subnet1.arn = "arn:aws:ec2:us-east-1:123456789012:subnet/subnet-aaa111"
mock_subnet1.id = "subnet-aaa111"
mock_subnet1.region = "us-east-1"
mock_subnet1.vpc_id = "vpc-abc123"
mock_subnet1.cidr_block = "10.0.1.0/24"
mock_subnet1.availability_zone = "us-east-1a"
mock_subnet2 = MagicMock()
mock_subnet2.arn = "arn:aws:ec2:us-east-1:123456789012:subnet/subnet-bbb222"
mock_subnet2.id = "subnet-bbb222"
mock_subnet2.region = "us-east-1"
mock_subnet2.vpc_id = "vpc-abc123"
mock_subnet2.cidr_block = "10.0.2.0/24"
mock_subnet2.availability_zone = "us-east-1b"
mock_module.vpc_client.vpcs = [mock_vpc]
mock_module.vpc_client.subnets = [mock_subnet1, mock_subnet2]
mock_module.vpc_client.vpc_peering_connections = []
mock_module.vpc_client.audited_account = "123456789012"
return mock_module
def main():
"""Main function to demonstrate the inventory graph generation."""
print("=" * 70)
print("AWS Inventory Graph - Mock Data Example")
print("=" * 70)
print()
# Create mock clients and inject them into sys.modules
print("Creating mock AWS service clients...")
sys.modules["prowler.providers.aws.services.awslambda.awslambda_client"] = (
create_mock_lambda_client()
)
sys.modules["prowler.providers.aws.services.ec2.ec2_client"] = (
create_mock_ec2_client()
)
sys.modules["prowler.providers.aws.services.vpc.vpc_client"] = (
create_mock_vpc_client()
)
print("✓ Mock clients created")
print()
# Build the graph
print("Building connectivity graph...")
graph = build_graph()
print(f"✓ Graph built: {len(graph.nodes)} nodes, {len(graph.edges)} edges")
print()
# Display discovered nodes
print("Discovered nodes:")
for node in graph.nodes:
print(f" - {node.type}: {node.name} ({node.region})")
print()
# Display discovered edges
print("Discovered edges:")
for edge in graph.edges:
source_node = next((n for n in graph.nodes if n.id == edge.source_id), None)
target_node = next((n for n in graph.nodes if n.id == edge.target_id), None)
source_name = source_node.name if source_node else edge.source_id
target_name = target_node.name if target_node else edge.target_id
print(f" - {source_name} --[{edge.edge_type}]--> {target_name}")
print()
# Write outputs
output_dir = Path(__file__).parent
json_path = output_dir / "example_output.inventory.json"
html_path = output_dir / "example_output.inventory.html"
print("Writing output files...")
write_json(graph, str(json_path))
write_html(graph, str(html_path))
print(f"✓ JSON written to: {json_path}")
print(f"✓ HTML written to: {html_path}")
print()
print("=" * 70)
print("✓ Example complete!")
print("=" * 70)
print()
print(f"Open the HTML file to view the interactive graph:")
print(f" open {html_path}")
print()
if __name__ == "__main__":
main()
-158
View File
@@ -1,158 +0,0 @@
#!/usr/bin/env python3
"""
AWS Inventory Connectivity Graph Generator
A standalone tool that generates interactive connectivity graphs from Prowler AWS scans.
This tool reads from already-loaded AWS service clients in memory and produces:
- JSON graph (nodes + edges)
- Interactive HTML visualization
Usage:
python inventory_graph.py --output-directory ./output --output-filename my-inventory
For more information, see README.md
"""
import argparse
import os
import sys
from datetime import datetime
from pathlib import Path
# Add the contrib directory to the path so we can import the lib modules
CONTRIB_DIR = Path(__file__).parent
sys.path.insert(0, str(CONTRIB_DIR))
from lib.graph_builder import build_graph
from lib.inventory_output import write_json, write_html
def parse_arguments():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
description="Generate AWS inventory connectivity graph from Prowler scan data",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Generate graph with default settings
python inventory_graph.py
# Specify custom output directory and filename
python inventory_graph.py --output-directory ./my-output --output-filename aws-inventory
# After running a Prowler scan
prowler aws --profile my-profile
python inventory_graph.py --output-directory ./output
For more information, see README.md
""",
)
parser.add_argument(
"--output-directory",
"-o",
default="./output",
help="Directory to save output files (default: ./output)",
)
parser.add_argument(
"--output-filename",
"-f",
default=None,
help="Base filename without extension (default: prowler-inventory-<timestamp>)",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Enable verbose output",
)
return parser.parse_args()
def main():
"""Main entry point for the inventory graph generator."""
args = parse_arguments()
# Set up output paths
output_dir = Path(args.output_directory)
output_dir.mkdir(parents=True, exist_ok=True)
# Generate filename with timestamp if not provided
if args.output_filename:
base_filename = args.output_filename
else:
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
base_filename = f"prowler-inventory-{timestamp}"
json_path = output_dir / f"{base_filename}.inventory.json"
html_path = output_dir / f"{base_filename}.inventory.html"
print("=" * 70)
print("AWS Inventory Connectivity Graph Generator")
print("=" * 70)
print()
# Build the graph from loaded service clients
if args.verbose:
print("Building connectivity graph from loaded AWS service clients...")
graph = build_graph()
# Check if any nodes were discovered
if not graph.nodes:
print("⚠️ WARNING: No nodes discovered!")
print()
print("This usually means:")
print(" 1. No Prowler scan has been run yet in this Python session")
print(" 2. No AWS service clients are loaded in memory")
print()
print("To fix this:")
print(" 1. Run a Prowler scan first: prowler aws --output-formats csv")
print(" 2. Then run this script in the same session")
print()
print(
"Alternatively, integrate this tool directly into Prowler's output pipeline."
)
sys.exit(1)
print(f"✓ Discovered {len(graph.nodes)} nodes and {len(graph.edges)} edges")
print()
# Write outputs
if args.verbose:
print(f"Writing JSON output to: {json_path}")
write_json(graph, str(json_path))
if args.verbose:
print(f"Writing HTML output to: {html_path}")
write_html(graph, str(html_path))
print()
print("=" * 70)
print("✓ Graph generation complete!")
print("=" * 70)
print()
print(f"📄 JSON: {json_path}")
print(f"🌐 HTML: {html_path}")
print()
print(f"Open the HTML file in your browser to explore the interactive graph:")
print(f" open {html_path}")
print()
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\nInterrupted by user. Exiting...")
sys.exit(130)
except Exception as e:
print(f"\n❌ Error: {e}", file=sys.stderr)
if "--verbose" in sys.argv or "-v" in sys.argv:
import traceback
traceback.print_exc()
sys.exit(1)
@@ -1,94 +0,0 @@
from typing import List, Tuple
from lib.models import ResourceEdge, ResourceNode
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""
Extract EC2 instance and security-group nodes with their edges.
Edges produced:
- instance security-group [network]
- instance subnet [network]
- security-group VPC [network]
"""
nodes: List[ResourceNode] = []
edges: List[ResourceEdge] = []
# EC2 Instances
for instance in client.instances:
name = instance.id
for tag in instance.tags or []:
if tag.get("Key") == "Name":
name = tag["Value"]
break
props = {
"instance_type": getattr(instance, "type", None),
"state": getattr(instance, "state", None),
"vpc_id": getattr(instance, "vpc_id", None),
"subnet_id": getattr(instance, "subnet_id", None),
"public_ip": getattr(instance, "public_ip_address", None),
"private_ip": getattr(instance, "private_ip_address", None),
}
nodes.append(
ResourceNode(
id=instance.arn,
type="ec2_instance",
name=name,
service="ec2",
region=instance.region,
account_id=client.audited_account,
properties={k: v for k, v in props.items() if v is not None},
)
)
for sg_id in instance.security_groups or []:
edges.append(
ResourceEdge(
source_id=instance.arn,
target_id=sg_id,
edge_type="network",
label="sg",
)
)
if instance.subnet_id:
edges.append(
ResourceEdge(
source_id=instance.arn,
target_id=instance.subnet_id,
edge_type="network",
label="subnet",
)
)
# Security Groups
for sg in client.security_groups.values():
name = (
sg.name if hasattr(sg, "name") else sg.id if hasattr(sg, "id") else sg.arn
)
nodes.append(
ResourceNode(
id=sg.arn,
type="security_group",
name=name,
service="ec2",
region=sg.region,
account_id=client.audited_account,
properties={"vpc_id": sg.vpc_id},
)
)
if sg.vpc_id:
edges.append(
ResourceEdge(
source_id=sg.arn,
target_id=sg.vpc_id,
edge_type="network",
label="in-vpc",
)
)
return nodes, edges
@@ -1,60 +0,0 @@
from typing import List, Tuple
from lib.models import ResourceEdge, ResourceNode
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""
Extract ELBv2 (ALB/NLB) load balancer nodes and their edges.
Edges produced:
- load_balancer security-group [network]
- load_balancer VPC [network]
"""
nodes: List[ResourceNode] = []
edges: List[ResourceEdge] = []
for lb in client.loadbalancersv2.values():
props = {
"type": getattr(lb, "type", None),
"scheme": getattr(lb, "scheme", None),
"dns_name": getattr(lb, "dns", None),
"vpc_id": getattr(lb, "vpc_id", None),
}
name = getattr(lb, "name", lb.arn.split("/")[-2] if "/" in lb.arn else lb.arn)
nodes.append(
ResourceNode(
id=lb.arn,
type="load_balancer",
name=name,
service="elbv2",
region=lb.region,
account_id=client.audited_account,
properties={k: v for k, v in props.items() if v is not None},
)
)
for sg_id in lb.security_groups or []:
edges.append(
ResourceEdge(
source_id=lb.arn,
target_id=sg_id,
edge_type="network",
label="sg",
)
)
vpc_id = getattr(lb, "vpc_id", None)
if vpc_id:
edges.append(
ResourceEdge(
source_id=lb.arn,
target_id=vpc_id,
edge_type="network",
label="in-vpc",
)
)
return nodes, edges
@@ -1,84 +0,0 @@
import json
from typing import Any, Dict, List, Tuple
from prowler.lib.logger import logger
from lib.models import ResourceEdge, ResourceNode
def _parse_trust_principals(assume_role_policy: Any) -> List[str]:
"""
Return a flat list of principal strings from an IAM assume-role policy document.
The policy may be a dict already or a JSON string.
"""
if not assume_role_policy:
return []
if isinstance(assume_role_policy, str):
try:
assume_role_policy = json.loads(assume_role_policy)
except (json.JSONDecodeError, ValueError):
return []
principals = []
for statement in assume_role_policy.get("Statement", []):
principal = statement.get("Principal", {})
if isinstance(principal, str):
principals.append(principal)
elif isinstance(principal, dict):
for v in principal.values():
if isinstance(v, list):
principals.extend(v)
else:
principals.append(v)
elif isinstance(principal, list):
principals.extend(principal)
return principals
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""
Extract IAM role nodes and their trust-relationship edges.
Edges produced:
- trusted-principal role [iam] (who can assume this role)
"""
nodes: List[ResourceNode] = []
edges: List[ResourceEdge] = []
for role in client.roles:
props: Dict[str, Any] = {
"path": getattr(role, "path", None),
"create_date": str(getattr(role, "create_date", "") or ""),
}
nodes.append(
ResourceNode(
id=role.arn,
type="iam_role",
name=role.name,
service="iam",
region="global",
account_id=client.audited_account,
properties={k: v for k, v in props.items() if v},
)
)
# Trust-relationship edges: principal → role (principal CAN assume role)
try:
for principal in _parse_trust_principals(role.assume_role_policy):
if principal and principal != "*":
edges.append(
ResourceEdge(
source_id=principal,
target_id=role.arn,
edge_type="iam",
label="can-assume",
)
)
except Exception as e:
logger.debug(
f"inventory iam_extractor: could not parse trust policy for {role.arn}: {e}"
)
return nodes, edges
@@ -1,118 +0,0 @@
from typing import List, Tuple
from lib.models import ResourceEdge, ResourceNode
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""
Extract Lambda function nodes and their edges from an awslambda_client.
Edges produced:
- lambda VPC [network]
- lambda subnet [network]
- lambda sg [network]
- lambda event-source[triggers] (from EventSourceMapping)
- lambda layer ARN [depends_on]
- lambda DLQ target [data_flow]
- lambda KMS key [encrypts]
"""
nodes: List[ResourceNode] = []
edges: List[ResourceEdge] = []
for fn in client.functions.values():
props = {
"runtime": fn.runtime,
"vpc_id": fn.vpc_id,
}
if fn.environment:
props["has_env_vars"] = True
if fn.kms_key_arn:
props["kms_key_arn"] = fn.kms_key_arn
nodes.append(
ResourceNode(
id=fn.arn,
type="lambda_function",
name=fn.name,
service="lambda",
region=fn.region,
account_id=client.audited_account,
properties=props,
)
)
# Network edges → VPC, subnets, security groups
if fn.vpc_id:
edges.append(
ResourceEdge(
source_id=fn.arn,
target_id=fn.vpc_id,
edge_type="network",
label="in-vpc",
)
)
for sg_id in fn.security_groups or []:
edges.append(
ResourceEdge(
source_id=fn.arn,
target_id=sg_id,
edge_type="network",
label="sg",
)
)
for subnet_id in fn.subnet_ids or set():
edges.append(
ResourceEdge(
source_id=fn.arn,
target_id=subnet_id,
edge_type="network",
label="subnet",
)
)
# Trigger edges from event source mappings
for esm in getattr(fn, "event_source_mappings", []):
edges.append(
ResourceEdge(
source_id=esm.event_source_arn,
target_id=fn.arn,
edge_type="triggers",
label=f"esm:{esm.state}",
)
)
# Layer dependency edges
for layer in getattr(fn, "layers", []):
edges.append(
ResourceEdge(
source_id=fn.arn,
target_id=layer.arn,
edge_type="depends_on",
label="layer",
)
)
# Dead-letter queue data-flow edge
dlq = getattr(fn, "dead_letter_config", None)
if dlq and dlq.target_arn:
edges.append(
ResourceEdge(
source_id=fn.arn,
target_id=dlq.target_arn,
edge_type="data_flow",
label="dlq",
)
)
# KMS encryption edge
if fn.kms_key_arn:
edges.append(
ResourceEdge(
source_id=fn.kms_key_arn,
target_id=fn.arn,
edge_type="encrypts",
label="kms",
)
)
return nodes, edges
@@ -1,86 +0,0 @@
from typing import List, Tuple
from lib.models import ResourceEdge, ResourceNode
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""
Extract RDS DB instance nodes and their edges.
Edges produced:
- db_instance security-group [network]
- db_instance VPC [network]
- db_instance cluster [depends_on]
- db_instance KMS key [encrypts]
"""
nodes: List[ResourceNode] = []
edges: List[ResourceEdge] = []
for db in client.db_instances.values():
props = {
"engine": getattr(db, "engine", None),
"engine_version": getattr(db, "engine_version", None),
"instance_class": getattr(db, "db_instance_class", None),
"vpc_id": getattr(db, "vpc_id", None),
"multi_az": getattr(db, "multi_az", None),
"publicly_accessible": getattr(db, "publicly_accessible", None),
"storage_encrypted": getattr(db, "storage_encrypted", None),
}
nodes.append(
ResourceNode(
id=db.arn,
type="rds_instance",
name=db.id,
service="rds",
region=db.region,
account_id=client.audited_account,
properties={k: v for k, v in props.items() if v is not None},
)
)
for sg in getattr(db, "security_groups", []):
sg_id = sg if isinstance(sg, str) else getattr(sg, "id", str(sg))
edges.append(
ResourceEdge(
source_id=db.arn,
target_id=sg_id,
edge_type="network",
label="sg",
)
)
vpc_id = getattr(db, "vpc_id", None)
if vpc_id:
edges.append(
ResourceEdge(
source_id=db.arn,
target_id=vpc_id,
edge_type="network",
label="in-vpc",
)
)
cluster_arn = getattr(db, "cluster_arn", None)
if cluster_arn:
edges.append(
ResourceEdge(
source_id=db.arn,
target_id=cluster_arn,
edge_type="depends_on",
label="cluster-member",
)
)
kms_key_id = getattr(db, "kms_key_id", None)
if kms_key_id:
edges.append(
ResourceEdge(
source_id=kms_key_id,
target_id=db.arn,
edge_type="encrypts",
label="kms",
)
)
return nodes, edges
@@ -1,92 +0,0 @@
from typing import List, Tuple
from lib.models import ResourceEdge, ResourceNode
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""
Extract S3 bucket nodes and their edges.
Edges produced:
- bucket replication-target bucket [replicates_to]
- bucket KMS key [encrypts]
- bucket logging bucket [logs_to]
"""
nodes: List[ResourceNode] = []
edges: List[ResourceEdge] = []
for bucket in client.buckets.values():
encryption = getattr(bucket, "encryption", None)
versioning = getattr(bucket, "versioning_enabled", None)
logging = getattr(bucket, "logging", None)
public = getattr(bucket, "public_access_block", None)
props = {}
if versioning is not None:
props["versioning"] = versioning
if encryption:
enc_type = getattr(encryption, "type", str(encryption))
props["encryption"] = enc_type
nodes.append(
ResourceNode(
id=bucket.arn,
type="s3_bucket",
name=bucket.name,
service="s3",
region=bucket.region,
account_id=client.audited_account,
properties=props,
)
)
# Replication edges
for rule in getattr(bucket, "replication_rules", None) or []:
dest_bucket = getattr(rule, "destination_bucket", None)
if dest_bucket:
dest_arn = (
dest_bucket
if dest_bucket.startswith("arn:")
else f"arn:aws:s3:::{dest_bucket}"
)
edges.append(
ResourceEdge(
source_id=bucket.arn,
target_id=dest_arn,
edge_type="replicates_to",
label="s3-replication",
)
)
# Logging edges
if logging:
target_bucket = getattr(logging, "target_bucket", None)
if target_bucket:
target_arn = (
target_bucket
if target_bucket.startswith("arn:")
else f"arn:aws:s3:::{target_bucket}"
)
edges.append(
ResourceEdge(
source_id=bucket.arn,
target_id=target_arn,
edge_type="logs_to",
label="access-logs",
)
)
# KMS encryption edges
if encryption:
kms_arn = getattr(encryption, "kms_master_key_id", None)
if kms_arn:
edges.append(
ResourceEdge(
source_id=kms_arn,
target_id=bucket.arn,
edge_type="encrypts",
label="kms",
)
)
return nodes, edges
@@ -1,92 +0,0 @@
from typing import List, Tuple
from lib.models import ResourceEdge, ResourceNode
def extract(client) -> Tuple[List[ResourceNode], List[ResourceEdge]]:
"""
Extract VPC and subnet nodes with their edges.
Edges produced:
- subnet VPC [depends_on]
- peering connection between VPCs [network]
"""
nodes: List[ResourceNode] = []
edges: List[ResourceEdge] = []
# VPCs
for vpc in client.vpcs.values():
name = vpc.id if hasattr(vpc, "id") else vpc.arn
for tag in vpc.tags or []:
if isinstance(tag, dict) and tag.get("Key") == "Name":
name = tag["Value"]
break
nodes.append(
ResourceNode(
id=vpc.arn,
type="vpc",
name=name,
service="vpc",
region=vpc.region,
account_id=client.audited_account,
properties={
"cidr_block": getattr(vpc, "cidr_block", None),
"is_default": getattr(vpc, "is_default", None),
},
)
)
# VPC Subnets
for subnet in client.vpc_subnets.values():
name = subnet.id if hasattr(subnet, "id") else subnet.arn
for tag in getattr(subnet, "tags", None) or []:
if isinstance(tag, dict) and tag.get("Key") == "Name":
name = tag["Value"]
break
nodes.append(
ResourceNode(
id=subnet.arn,
type="subnet",
name=name,
service="vpc",
region=subnet.region,
account_id=client.audited_account,
properties={
"vpc_id": getattr(subnet, "vpc_id", None),
"cidr_block": getattr(subnet, "cidr_block", None),
"availability_zone": getattr(subnet, "availability_zone", None),
"public": getattr(subnet, "public", None),
},
)
)
vpc_id = getattr(subnet, "vpc_id", None)
if vpc_id:
# Find the VPC ARN for this vpc_id
vpc_arn = next(
(v.arn for v in client.vpcs.values() if v.id == vpc_id),
vpc_id,
)
edges.append(
ResourceEdge(
source_id=subnet.arn,
target_id=vpc_arn,
edge_type="depends_on",
label="subnet-of",
)
)
# VPC Peering Connections
for peering in getattr(client, "vpc_peering_connections", {}).values():
edges.append(
ResourceEdge(
source_id=peering.arn,
target_id=getattr(peering, "accepter_vpc_id", peering.arn),
edge_type="network",
label="vpc-peer",
)
)
return nodes, edges
@@ -1,106 +0,0 @@
"""
graph_builder.py
----------------
Builds a ConnectivityGraph by reading already-loaded AWS service clients from
sys.modules. Only services that were actually scanned (i.e. whose client
module is already imported) contribute nodes and edges. Unknown / unloaded
services are silently skipped, so the output degrades gracefully when only a
subset of checks has been run.
"""
import sys
from typing import Tuple
from prowler.lib.logger import logger
from lib.models import ConnectivityGraph
# Registry: (sys.modules key, attribute name inside that module, extractor module path)
_SERVICE_REGISTRY: Tuple[Tuple[str, str, str], ...] = (
(
"prowler.providers.aws.services.awslambda.awslambda_client",
"awslambda_client",
"lib.extractors.lambda_extractor",
),
(
"prowler.providers.aws.services.ec2.ec2_client",
"ec2_client",
"lib.extractors.ec2_extractor",
),
(
"prowler.providers.aws.services.vpc.vpc_client",
"vpc_client",
"lib.extractors.vpc_extractor",
),
(
"prowler.providers.aws.services.rds.rds_client",
"rds_client",
"lib.extractors.rds_extractor",
),
(
"prowler.providers.aws.services.elbv2.elbv2_client",
"elbv2_client",
"lib.extractors.elbv2_extractor",
),
(
"prowler.providers.aws.services.s3.s3_client",
"s3_client",
"lib.extractors.s3_extractor",
),
(
"prowler.providers.aws.services.iam.iam_client",
"iam_client",
"lib.extractors.iam_extractor",
),
)
def build_graph() -> ConnectivityGraph:
"""
Iterate over every registered service, check whether its client module is
already loaded, and call the corresponding extractor.
Returns a ConnectivityGraph with all discovered nodes and edges.
Duplicate node IDs are silently deduplicated (first occurrence wins).
"""
graph = ConnectivityGraph()
seen_node_ids: set = set()
for client_module_key, client_attr, extractor_module_key in _SERVICE_REGISTRY:
client_module = sys.modules.get(client_module_key)
if client_module is None:
continue
service_client = getattr(client_module, client_attr, None)
if service_client is None:
continue
extractor_module = sys.modules.get(extractor_module_key)
if extractor_module is None:
try:
import importlib
extractor_module = importlib.import_module(extractor_module_key)
except ImportError as e:
logger.debug(
f"inventory graph_builder: cannot import extractor {extractor_module_key}: {e}"
)
continue
try:
nodes, edges = extractor_module.extract(service_client)
except Exception as e:
logger.error(
f"inventory graph_builder: extractor {extractor_module_key} failed: "
f"{e.__class__.__name__}[{e.__traceback__.tb_lineno}]: {e}"
)
continue
for node in nodes:
if node.id not in seen_node_ids:
graph.add_node(node)
seen_node_ids.add(node.id)
for edge in edges:
graph.add_edge(edge)
return graph
@@ -1,502 +0,0 @@
"""
inventory_output.py
-------------------
Writes the ConnectivityGraph produced by graph_builder to two files:
<output_path>.inventory.json machine-readable graph (nodes + edges)
<output_path>.inventory.html interactive D3.js force-directed graph
"""
import json
import os
from dataclasses import asdict
from datetime import datetime
from typing import Optional
from prowler.lib.logger import logger
from lib.models import ConnectivityGraph
# ---------------------------------------------------------------------------
# JSON output
# ---------------------------------------------------------------------------
def write_json(graph: ConnectivityGraph, file_path: str) -> None:
"""Serialise the graph to a JSON file."""
try:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
data = {
"generated_at": datetime.utcnow().isoformat() + "Z",
"nodes": [asdict(n) for n in graph.nodes],
"edges": [asdict(e) for e in graph.edges],
"stats": {
"node_count": len(graph.nodes),
"edge_count": len(graph.edges),
},
}
with open(file_path, "w", encoding="utf-8") as fh:
json.dump(data, fh, indent=2, default=str)
logger.info(f"Inventory graph JSON written to {file_path}")
except Exception as e:
logger.error(
f"inventory_output.write_json: {e.__class__.__name__}[{e.__traceback__.tb_lineno}]: {e}"
)
# ---------------------------------------------------------------------------
# HTML output (self-contained, D3.js CDN)
# ---------------------------------------------------------------------------
# Colour palette per node type
_NODE_COLOURS = {
"lambda_function": "#f59e0b",
"ec2_instance": "#3b82f6",
"security_group": "#6366f1",
"vpc": "#10b981",
"subnet": "#34d399",
"rds_instance": "#ef4444",
"load_balancer": "#8b5cf6",
"s3_bucket": "#06b6d4",
"iam_role": "#f97316",
"default": "#94a3b8",
}
# Edge stroke colours per edge type
_EDGE_COLOURS = {
"network": "#64748b",
"iam": "#f97316",
"triggers": "#a855f7",
"data_flow": "#0ea5e9",
"depends_on": "#94a3b8",
"routes_to": "#22c55e",
"replicates_to": "#ec4899",
"encrypts": "#eab308",
"logs_to": "#78716c",
}
_HTML_TEMPLATE = """\
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>Prowler AWS Connectivity Graph</title>
<script src="https://d3js.org/d3.v7.min.js"></script>
<style>
*, *::before, *::after {{ box-sizing: border-box; }}
body {{
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: #0f172a;
color: #e2e8f0;
}}
#header {{
padding: 12px 20px;
background: #1e293b;
border-bottom: 1px solid #334155;
display: flex;
align-items: center;
gap: 16px;
}}
#header h1 {{ margin: 0; font-size: 18px; font-weight: 700; }}
#header .stats {{ font-size: 13px; color: #94a3b8; }}
#controls {{
padding: 8px 20px;
background: #1e293b;
border-bottom: 1px solid #334155;
display: flex;
gap: 12px;
align-items: center;
flex-wrap: wrap;
}}
#controls label {{ font-size: 12px; color: #94a3b8; }}
#controls select, #controls input[type=range] {{
background: #0f172a;
color: #e2e8f0;
border: 1px solid #334155;
border-radius: 4px;
padding: 3px 6px;
font-size: 12px;
}}
#graph-container {{ width: 100%; height: calc(100vh - 100px); position: relative; }}
svg {{ width: 100%; height: 100%; }}
.node circle {{
stroke: #1e293b;
stroke-width: 1.5px;
cursor: pointer;
transition: r 0.15s;
}}
.node circle:hover {{ stroke-width: 3px; }}
.node text {{
font-size: 10px;
fill: #e2e8f0;
pointer-events: none;
text-shadow: 0 0 4px #0f172a;
}}
.link {{
stroke-opacity: 0.6;
stroke-width: 1.5px;
}}
.link-label {{
font-size: 8px;
fill: #94a3b8;
pointer-events: none;
}}
#tooltip {{
position: fixed;
background: #1e293b;
border: 1px solid #334155;
border-radius: 6px;
padding: 10px 14px;
font-size: 12px;
pointer-events: none;
max-width: 320px;
word-break: break-all;
z-index: 9999;
display: none;
}}
#tooltip strong {{ color: #f8fafc; }}
#tooltip .prop {{ color: #94a3b8; margin-top: 4px; }}
#legend {{
position: absolute;
top: 10px;
right: 10px;
background: rgba(30,41,59,0.9);
border: 1px solid #334155;
border-radius: 6px;
padding: 10px 14px;
font-size: 11px;
}}
#legend h3 {{ margin: 0 0 6px; font-size: 12px; }}
.legend-row {{ display: flex; align-items: center; gap: 6px; margin: 3px 0; }}
.legend-dot {{ width: 12px; height: 12px; border-radius: 50%; flex-shrink: 0; }}
.legend-line {{ width: 20px; height: 2px; flex-shrink: 0; }}
</style>
</head>
<body>
<div id="header">
<h1>🔗 AWS Connectivity Graph</h1>
<span class="stats" id="stat-label">Generated: {generated_at}</span>
</div>
<div id="controls">
<label>Filter service:
<select id="filter-service">
<option value="">All services</option>
</select>
</label>
<label>Link distance:
<input type="range" id="link-distance" min="40" max="300" value="120"/>
</label>
<label>Charge strength:
<input type="range" id="charge-strength" min="-800" max="-20" value="-250"/>
</label>
<span class="stats" id="visible-count"></span>
</div>
<div id="graph-container">
<svg id="graph-svg"></svg>
<div id="tooltip"></div>
<div id="legend">
<h3>Node types</h3>
{legend_nodes_html}
<h3 style="margin-top:8px">Edge types</h3>
{legend_edges_html}
</div>
</div>
<script>
const RAW_NODES = {nodes_json};
const RAW_EDGES = {edges_json};
const NODE_COLOURS = {node_colours_json};
const EDGE_COLOURS = {edge_colours_json};
// helpers
function nodeColour(d) {{
return NODE_COLOURS[d.type] || NODE_COLOURS["default"];
}}
function edgeColour(d) {{
return EDGE_COLOURS[d.edge_type] || "#94a3b8";
}}
function nodeRadius(d) {{
const base = {{
lambda_function: 9, ec2_instance: 10, vpc: 14, subnet: 8,
security_group: 7, rds_instance: 11, load_balancer: 12,
s3_bucket: 9, iam_role: 9
}};
return base[d.type] || 8;
}}
// filter controls
const services = [...new Set(RAW_NODES.map(n => n.service))].sort();
const sel = document.getElementById("filter-service");
services.forEach(s => {{
const o = document.createElement("option");
o.value = s; o.textContent = s;
sel.appendChild(o);
}});
// D3 setup
const svg = d3.select("#graph-svg");
const container = svg.append("g");
// zoom
svg.call(
d3.zoom().scaleExtent([0.05, 8])
.on("zoom", e => container.attr("transform", e.transform))
);
// arrowhead marker
const defs = svg.append("defs");
defs.append("marker")
.attr("id", "arrow")
.attr("viewBox", "0 -5 10 10")
.attr("refX", 20).attr("refY", 0)
.attr("markerWidth", 6).attr("markerHeight", 6)
.attr("orient", "auto")
.append("path")
.attr("d", "M0,-5L10,0L0,5")
.attr("fill", "#94a3b8");
// tooltip
const tooltip = document.getElementById("tooltip");
// simulation
let simulation, linkSel, nodeSel, labelSel;
function buildGraph(nodeFilter) {{
// Determine which nodes to show
const visibleNodes = nodeFilter
? RAW_NODES.filter(n => n.service === nodeFilter)
: RAW_NODES;
const visibleIds = new Set(visibleNodes.map(n => n.id));
// Only show edges where BOTH endpoints are visible
const visibleEdges = RAW_EDGES.filter(
e => visibleIds.has(e.source_id) && visibleIds.has(e.target_id)
);
document.getElementById("visible-count").textContent =
`Showing ${{visibleNodes.length}} nodes · ${{visibleEdges.length}} edges`;
container.selectAll("*").remove();
if (simulation) simulation.stop();
const nodes = visibleNodes.map(n => ({{ ...n }}));
const nodeIndex = Object.fromEntries(nodes.map(n => [n.id, n]));
const links = visibleEdges.map(e => ({{
...e,
source: nodeIndex[e.source_id] || e.source_id,
target: nodeIndex[e.target_id] || e.target_id,
}}));
const dist = +document.getElementById("link-distance").value;
const charge = +document.getElementById("charge-strength").value;
simulation = d3.forceSimulation(nodes)
.force("link", d3.forceLink(links).id(d => d.id).distance(dist))
.force("charge", d3.forceManyBody().strength(charge))
.force("center", d3.forceCenter(
document.getElementById("graph-container").clientWidth / 2,
document.getElementById("graph-container").clientHeight / 2
))
.force("collision", d3.forceCollide().radius(d => nodeRadius(d) + 6));
// Edges
linkSel = container.append("g").attr("class", "links")
.selectAll("line")
.data(links)
.join("line")
.attr("class", "link")
.attr("stroke", edgeColour)
.attr("marker-end", "url(#arrow)");
// Edge labels
labelSel = container.append("g").attr("class", "link-labels")
.selectAll("text")
.data(links)
.join("text")
.attr("class", "link-label")
.text(d => d.label || "");
// Nodes
nodeSel = container.append("g").attr("class", "nodes")
.selectAll("g")
.data(nodes)
.join("g")
.attr("class", "node")
.call(
d3.drag()
.on("start", (event, d) => {{
if (!event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x; d.fy = d.y;
}})
.on("drag", (event, d) => {{ d.fx = event.x; d.fy = event.y; }})
.on("end", (event, d) => {{
if (!event.active) simulation.alphaTarget(0);
d.fx = null; d.fy = null;
}})
)
.on("mouseover", (event, d) => {{
const props = Object.entries(d.properties || {{}})
.map(([k, v]) => `<div class="prop"><b>${{k}}</b>: ${{v}}</div>`)
.join("");
tooltip.innerHTML = `
<strong>${{d.name}}</strong>
<div class="prop"><b>type</b>: ${{d.type}}</div>
<div class="prop"><b>service</b>: ${{d.service}}</div>
<div class="prop"><b>region</b>: ${{d.region}}</div>
<div class="prop"><b>account</b>: ${{d.account_id}}</div>
<div class="prop" style="word-break:break-all"><b>arn</b>: ${{d.id}}</div>
${{props}}
`;
tooltip.style.display = "block";
tooltip.style.left = (event.clientX + 12) + "px";
tooltip.style.top = (event.clientY - 10) + "px";
}})
.on("mousemove", event => {{
tooltip.style.left = (event.clientX + 12) + "px";
tooltip.style.top = (event.clientY - 10) + "px";
}})
.on("mouseout", () => {{ tooltip.style.display = "none"; }});
nodeSel.append("circle")
.attr("r", nodeRadius)
.attr("fill", nodeColour);
nodeSel.append("text")
.attr("dx", d => nodeRadius(d) + 3)
.attr("dy", "0.35em")
.text(d => d.name.length > 24 ? d.name.slice(0, 22) + "" : d.name);
simulation.on("tick", () => {{
linkSel
.attr("x1", d => d.source.x)
.attr("y1", d => d.source.y)
.attr("x2", d => d.target.x)
.attr("y2", d => d.target.y);
labelSel
.attr("x", d => (d.source.x + d.target.x) / 2)
.attr("y", d => (d.source.y + d.target.y) / 2);
nodeSel.attr("transform", d => `translate(${{d.x}},${{d.y}})`);
}});
}}
// Initial render
buildGraph(null);
// Filter change
sel.addEventListener("change", () => buildGraph(sel.value || null));
// Simulation control sliders restart on change
document.getElementById("link-distance").addEventListener("input", () => buildGraph(sel.value || null));
document.getElementById("charge-strength").addEventListener("input", () => buildGraph(sel.value || null));
</script>
</body>
</html>
"""
def _build_legend_html(colours: dict, shape: str) -> str:
rows = []
for key, colour in sorted(colours.items()):
if shape == "dot":
rows.append(
f'<div class="legend-row">'
f'<div class="legend-dot" style="background:{colour}"></div>'
f"<span>{key}</span></div>"
)
else:
rows.append(
f'<div class="legend-row">'
f'<div class="legend-line" style="background:{colour}"></div>'
f"<span>{key}</span></div>"
)
return "\n".join(rows)
def write_html(graph: ConnectivityGraph, file_path: str) -> None:
"""Render the graph as a self-contained interactive HTML page."""
try:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
nodes_json = json.dumps(
[
{
"id": n.id,
"type": n.type,
"name": n.name,
"service": n.service,
"region": n.region,
"account_id": n.account_id,
"properties": n.properties,
}
for n in graph.nodes
],
indent=None,
default=str,
)
edges_json = json.dumps(
[
{
"source_id": e.source_id,
"target_id": e.target_id,
"edge_type": e.edge_type,
"label": e.label or "",
}
for e in graph.edges
],
indent=None,
default=str,
)
html = _HTML_TEMPLATE.format(
generated_at=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"),
nodes_json=nodes_json,
edges_json=edges_json,
node_colours_json=json.dumps(_NODE_COLOURS),
edge_colours_json=json.dumps(_EDGE_COLOURS),
legend_nodes_html=_build_legend_html(_NODE_COLOURS, "dot"),
legend_edges_html=_build_legend_html(_EDGE_COLOURS, "line"),
)
with open(file_path, "w", encoding="utf-8") as fh:
fh.write(html)
logger.info(f"Inventory graph HTML written to {file_path}")
except Exception as e:
logger.error(
f"inventory_output.write_html: {e.__class__.__name__}[{e.__traceback__.tb_lineno}]: {e}"
)
# ---------------------------------------------------------------------------
# Convenience entry-point called from __main__.py
# ---------------------------------------------------------------------------
def generate_inventory_outputs(output_path: str) -> None:
"""
Build the connectivity graph from currently-loaded service clients and write
both JSON and HTML outputs.
Args:
output_path: base file path WITHOUT extension, e.g.
"output/prowler-output-20240101120000".
The function appends .inventory.json and .inventory.html.
"""
from lib.graph_builder import build_graph
graph = build_graph()
if not graph.nodes:
logger.warning(
"Inventory graph: no nodes discovered. "
"Make sure at least one AWS service was scanned before generating the inventory."
)
write_json(graph, f"{output_path}.inventory.json")
write_html(graph, f"{output_path}.inventory.html")
-71
View File
@@ -1,71 +0,0 @@
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class ResourceNode:
"""
Represents a single AWS resource as a node in the connectivity graph.
id : globally unique identifier always the resource ARN
type : coarse resource type used for grouping/colour, e.g. "lambda_function"
name : human-readable label shown on the graph
service : AWS service name, e.g. "lambda", "ec2", "rds"
region : AWS region the resource lives in
account_id: AWS account ID
properties: additional resource-specific metadata (runtime, vpc_id, etc.)
"""
id: str
type: str
name: str
service: str
region: str
account_id: str
properties: Dict[str, Any] = field(default_factory=dict)
@dataclass
class ResourceEdge:
"""
Represents a directional relationship between two resource nodes.
source_id : ARN of the source node
target_id : ARN of the target node
edge_type : semantic type of the relationship, e.g.:
"network" resources share a network path (VPC/subnet/SG)
"iam" IAM trust or permission relationship
"triggers" one resource can invoke another (event source Lambda)
"data_flow" data is written/read (Lambda SQS dead-letter queue)
"depends_on" soft dependency (Lambda layer, subnet belongs to VPC)
"routes_to" traffic routing (LB target)
"encrypts" KMS key encrypts the resource
label : optional short label rendered on the edge in the HTML graph
"""
source_id: str
target_id: str
edge_type: str
label: Optional[str] = None
@dataclass
class ConnectivityGraph:
"""
Container for the full inventory connectivity graph.
nodes: all discovered resource nodes
edges: all discovered edges between nodes
"""
nodes: List[ResourceNode] = field(default_factory=list)
edges: List[ResourceEdge] = field(default_factory=list)
def add_node(self, node: ResourceNode) -> None:
self.nodes.append(node)
def add_edge(self, edge: ResourceEdge) -> None:
self.edges.append(edge)
def node_ids(self) -> set:
return {n.id for n in self.nodes}
@@ -10,7 +10,7 @@ Complete reference guide for all tools available in the Prowler MCP Server. Tool
|----------|------------|------------------------|
| Prowler Hub | 10 tools | No |
| Prowler Documentation | 2 tools | No |
| Prowler Cloud/App | 29 tools | Yes |
| Prowler Cloud/App | 32 tools | Yes |
## Tool Naming Convention
@@ -36,6 +36,14 @@ Tools for searching, viewing, and analyzing security findings across all cloud p
- **`prowler_app_get_finding_details`** - Get comprehensive details about a specific finding including remediation guidance, check metadata, and resource relationships
- **`prowler_app_get_findings_overview`** - Get aggregate statistics and trends about security findings as a markdown report
### Finding Groups Management
Tools for listing finding groups aggregated by check ID, viewing complete group counters, and drilling down into affected resources.
- **`prowler_app_list_finding_groups`** - List latest or historical finding groups with filters for provider, region, service, resource, category, check, severity, status, muted state, delta, date range, and sorting
- **`prowler_app_get_finding_group_details`** - Get complete details for a specific finding group including counters, description, timestamps, and impacted providers
- **`prowler_app_list_finding_group_resources`** - List actionable unmuted resources affected by a finding group by default, including nested resource and provider data plus the `finding_id` for remediation details. Set `include_muted` to include suppressed resources
### Provider Management
Tools for managing cloud provider connections in Prowler.
@@ -149,14 +149,6 @@ Prowler Cloud and App expose two formats:
* **CSV report:** Every requirement, every check, and every finding for the selected scan and filters. Available for all supported frameworks.
* **PDF report:** Curated executive-style report. Currently supported for Prowler ThreatScore, ENS RD2022, NIS2, and CSA CCM. Additional PDF reports are added in subsequent Prowler releases.
<Note>
**PDF detail section is capped at the first 100 failed findings per check.** The PDF is intended as an executive/auditor document, not a raw data dump: when a check produces more than 100 failed findings the report renders the first 100 and shows a banner pointing the reader to the CSV or JSON export for the complete list. The CSV and the ZIP scan output are never truncated.
The cap is configurable per deployment via the `DJANGO_PDF_MAX_FINDINGS_PER_CHECK` environment variable on the Prowler API workers; set it to `0` to disable truncation entirely. The default value of `100` keeps the PDF readable and bounded in size on enterprise-scale scans (hundreds of thousands of findings) without affecting smaller scans, where the cap is rarely reached.
Only **failed** findings are rendered in the detail section. PASS findings for the same check are excluded at query time. The PDF surfaces what needs attention, and the CSV/JSON exports surface everything for forensic review.
</Note>
#### Downloading From the Detail Page
Inside any framework detail page, the **CSV** and **PDF** buttons in the header trigger the same downloads as the overview dropdown. The PDF button only appears for frameworks that support it.
+4
View File
@@ -4,6 +4,10 @@ All notable changes to the **Prowler MCP Server** are documented in this file.
## [0.7.0] (Prowler UNRELEASED)
### 🚀 Added
- MCP Server tools for Prowler Finding Groups Management [(#11140)](https://github.com/prowler-cloud/prowler/pull/11140)
### 🔐 Security
- `cryptography` from 46.0.1 to 47.0.0 (transitive) for CVE-2026-39892 and CVE-2026-26007 / CVE-2026-34073 [(#10978)](https://github.com/prowler-cloud/prowler/pull/10978)
+1
View File
@@ -10,6 +10,7 @@
Full access to Prowler Cloud platform and self-managed Prowler App for:
- **Findings Analysis**: Query, filter, and analyze security findings across all your cloud environments
- **Finding Groups Analysis**: Triage findings grouped by check ID and drill down into affected resources
- **Provider Management**: Create, configure, and manage your configured Prowler providers (AWS, Azure, GCP, etc.)
- **Scan Orchestration**: Trigger on-demand scans and schedule recurring security assessments
- **Resource Inventory**: Search and view detailed information about your audited resources
@@ -0,0 +1,300 @@
"""Pydantic models for Prowler Finding Groups responses."""
from typing import Literal
from pydantic import Field
from prowler_mcp_server.prowler_app.models.base import MinimalSerializerMixin
FindingStatus = Literal["FAIL", "PASS", "MANUAL"]
FindingSeverity = Literal["critical", "high", "medium", "low", "informational"]
FindingDelta = Literal["new", "changed"]
def _attributes(data: dict) -> dict:
return data.get("attributes", {})
def _counter(attributes: dict, key: str) -> int:
return attributes.get(key) or 0
def _simplified_group_kwargs(data: dict) -> dict:
attributes = _attributes(data)
return {
"check_id": attributes.get("check_id", data.get("id", "")),
"check_title": attributes.get("check_title"),
"severity": attributes.get("severity", "informational"),
"status": attributes.get("status", "MANUAL"),
"muted": attributes.get("muted", False),
"impacted_providers": attributes.get("impacted_providers") or [],
"resources_fail": _counter(attributes, "resources_fail"),
"resources_total": _counter(attributes, "resources_total"),
"pass_count": _counter(attributes, "pass_count"),
"fail_count": _counter(attributes, "fail_count"),
"manual_count": _counter(attributes, "manual_count"),
"muted_count": _counter(attributes, "muted_count"),
"new_count": _counter(attributes, "new_count"),
"changed_count": _counter(attributes, "changed_count"),
"first_seen_at": attributes.get("first_seen_at"),
"last_seen_at": attributes.get("last_seen_at"),
"failing_since": attributes.get("failing_since"),
}
class SimplifiedFindingGroup(MinimalSerializerMixin):
"""Finding group summary optimized for browsing many checks."""
check_id: str = Field(description="Public check ID that identifies this group")
check_title: str | None = Field(
default=None, description="Human-readable check title"
)
severity: FindingSeverity = Field(description="Highest severity in the group")
status: FindingStatus = Field(description="Aggregated finding group status")
muted: bool = Field(
description="Whether all findings in this group are muted or accepted"
)
impacted_providers: list[str] = Field(
default_factory=list,
description="Provider types impacted by this finding group",
)
resources_fail: int = Field(
description="Number of non-muted failing resources in this group", ge=0
)
resources_total: int = Field(
description="Total number of resources in this group", ge=0
)
pass_count: int = Field(
description="Number of non-muted PASS findings in this group", ge=0
)
fail_count: int = Field(
description="Number of non-muted FAIL findings in this group", ge=0
)
manual_count: int = Field(
description="Number of non-muted MANUAL findings in this group", ge=0
)
muted_count: int = Field(description="Total muted findings in this group", ge=0)
new_count: int = Field(description="Number of new non-muted findings", ge=0)
changed_count: int = Field(
description="Number of changed non-muted findings", ge=0
)
first_seen_at: str | None = Field(
default=None, description="First time this group was detected"
)
last_seen_at: str | None = Field(
default=None, description="Last time this group was detected"
)
failing_since: str | None = Field(
default=None, description="First time this group started failing"
)
@classmethod
def from_api_response(cls, data: dict) -> "SimplifiedFindingGroup":
"""Transform JSON:API finding group response to simplified format."""
return cls(**_simplified_group_kwargs(data))
class DetailedFindingGroup(SimplifiedFindingGroup):
"""Finding group with complete counters and descriptive context."""
check_description: str | None = Field(
default=None, description="Description of the check behind this group"
)
pass_muted_count: int = Field(description="Muted PASS findings", ge=0)
fail_muted_count: int = Field(description="Muted FAIL findings", ge=0)
manual_muted_count: int = Field(description="Muted MANUAL findings", ge=0)
new_fail_count: int = Field(description="New non-muted FAIL findings", ge=0)
new_fail_muted_count: int = Field(description="New muted FAIL findings", ge=0)
new_pass_count: int = Field(description="New non-muted PASS findings", ge=0)
new_pass_muted_count: int = Field(description="New muted PASS findings", ge=0)
new_manual_count: int = Field(description="New non-muted MANUAL findings", ge=0)
new_manual_muted_count: int = Field(
description="New muted MANUAL findings", ge=0
)
changed_fail_count: int = Field(
description="Changed non-muted FAIL findings", ge=0
)
changed_fail_muted_count: int = Field(
description="Changed muted FAIL findings", ge=0
)
changed_pass_count: int = Field(
description="Changed non-muted PASS findings", ge=0
)
changed_pass_muted_count: int = Field(
description="Changed muted PASS findings", ge=0
)
changed_manual_count: int = Field(
description="Changed non-muted MANUAL findings", ge=0
)
changed_manual_muted_count: int = Field(
description="Changed muted MANUAL findings", ge=0
)
@classmethod
def from_api_response(cls, data: dict) -> "DetailedFindingGroup":
"""Transform JSON:API finding group response to detailed format."""
attributes = _attributes(data)
return cls(
**_simplified_group_kwargs(data),
check_description=attributes.get("check_description"),
pass_muted_count=_counter(attributes, "pass_muted_count"),
fail_muted_count=_counter(attributes, "fail_muted_count"),
manual_muted_count=_counter(attributes, "manual_muted_count"),
new_fail_count=_counter(attributes, "new_fail_count"),
new_fail_muted_count=_counter(attributes, "new_fail_muted_count"),
new_pass_count=_counter(attributes, "new_pass_count"),
new_pass_muted_count=_counter(attributes, "new_pass_muted_count"),
new_manual_count=_counter(attributes, "new_manual_count"),
new_manual_muted_count=_counter(attributes, "new_manual_muted_count"),
changed_fail_count=_counter(attributes, "changed_fail_count"),
changed_fail_muted_count=_counter(attributes, "changed_fail_muted_count"),
changed_pass_count=_counter(attributes, "changed_pass_count"),
changed_pass_muted_count=_counter(attributes, "changed_pass_muted_count"),
changed_manual_count=_counter(attributes, "changed_manual_count"),
changed_manual_muted_count=_counter(
attributes, "changed_manual_muted_count"
),
)
class FindingGroupsListResponse(MinimalSerializerMixin):
"""Paginated response for finding group list queries."""
groups: list[SimplifiedFindingGroup] = Field(
description="Finding groups matching the query"
)
total_num_groups: int = Field(
description="Total groups matching the query across all pages", ge=0
)
total_num_pages: int = Field(description="Total pages available", ge=0)
current_page: int = Field(description="Current page number", ge=1)
@classmethod
def from_api_response(cls, response: dict) -> "FindingGroupsListResponse":
"""Transform JSON:API list response to simplified format."""
pagination = response.get("meta", {}).get("pagination", {})
groups = [
SimplifiedFindingGroup.from_api_response(item)
for item in response.get("data", [])
]
return cls(
groups=groups,
total_num_groups=pagination.get("count", len(groups)),
total_num_pages=pagination.get("pages", 1),
current_page=pagination.get("page", 1),
)
class FindingGroupResourceInfo(MinimalSerializerMixin):
"""Nested resource information for a finding group row."""
uid: str = Field(description="Provider-native resource UID")
name: str = Field(description="Resource name")
service: str = Field(description="Cloud service")
region: str = Field(description="Cloud region")
type: str = Field(description="Resource type")
resource_group: str | None = Field(
default=None, description="Provider resource group or equivalent"
)
@classmethod
def from_api_response(cls, data: dict) -> "FindingGroupResourceInfo":
"""Transform nested resource data to simplified format."""
return cls(
uid=data.get("uid", ""),
name=data.get("name", ""),
service=data.get("service", ""),
region=data.get("region", ""),
type=data.get("type", ""),
resource_group=data.get("resource_group"),
)
class FindingGroupProviderInfo(MinimalSerializerMixin):
"""Nested provider information for a finding group resource row."""
type: str = Field(description="Provider type")
uid: str = Field(description="Provider-native account or subscription ID")
alias: str | None = Field(default=None, description="Provider alias")
@classmethod
def from_api_response(cls, data: dict) -> "FindingGroupProviderInfo":
"""Transform nested provider data to simplified format."""
return cls(
type=data.get("type", ""),
uid=data.get("uid", ""),
alias=data.get("alias"),
)
class FindingGroupResource(MinimalSerializerMixin):
"""Resource row affected by a finding group."""
id: str = Field(description="Row identifier for this finding group resource")
resource: FindingGroupResourceInfo = Field(description="Affected resource")
provider: FindingGroupProviderInfo = Field(description="Affected provider")
finding_id: str = Field(
description="Finding UUID to use with prowler_app_get_finding_details"
)
status: FindingStatus = Field(description="Finding status for this resource")
severity: FindingSeverity = Field(description="Finding severity")
muted: bool = Field(description="Whether the finding is muted")
delta: FindingDelta | None = Field(default=None, description="Change status")
first_seen_at: str | None = Field(default=None, description="First seen time")
last_seen_at: str | None = Field(default=None, description="Last seen time")
muted_reason: str | None = Field(default=None, description="Mute reason")
@classmethod
def from_api_response(cls, data: dict) -> "FindingGroupResource":
"""Transform JSON:API finding group resource response."""
attributes = _attributes(data)
return cls(
id=data.get("id", ""),
resource=FindingGroupResourceInfo.from_api_response(
attributes.get("resource") or {}
),
provider=FindingGroupProviderInfo.from_api_response(
attributes.get("provider") or {}
),
finding_id=str(attributes.get("finding_id", "")),
status=attributes.get("status", "MANUAL"),
severity=attributes.get("severity", "informational"),
muted=attributes.get("muted", False),
delta=attributes.get("delta"),
first_seen_at=attributes.get("first_seen_at"),
last_seen_at=attributes.get("last_seen_at"),
muted_reason=attributes.get("muted_reason"),
)
class FindingGroupResourcesListResponse(MinimalSerializerMixin):
"""Paginated response for finding group resource queries."""
resources: list[FindingGroupResource] = Field(
description="Resources matching the finding group query"
)
total_num_resources: int = Field(
description="Total resources matching the query across all pages", ge=0
)
total_num_pages: int = Field(description="Total pages available", ge=0)
current_page: int = Field(description="Current page number", ge=1)
@classmethod
def from_api_response(cls, response: dict) -> "FindingGroupResourcesListResponse":
"""Transform JSON:API resource list response to simplified format."""
pagination = response.get("meta", {}).get("pagination", {})
resources = [
FindingGroupResource.from_api_response(item)
for item in response.get("data", [])
]
return cls(
resources=resources,
total_num_resources=pagination.get("count", len(resources)),
total_num_pages=pagination.get("pages", 1),
current_page=pagination.get("page", 1),
)
@@ -0,0 +1,473 @@
"""Finding Groups tools for Prowler App MCP Server.
This module provides read-only tools for finding group triage and drill-downs.
"""
from typing import Any, Literal
from urllib.parse import quote
from pydantic import Field
from prowler_mcp_server.prowler_app.models.finding_groups import (
DetailedFindingGroup,
FindingGroupResourcesListResponse,
FindingGroupsListResponse,
)
from prowler_mcp_server.prowler_app.tools.base import BaseTool
StatusFilter = Literal["FAIL", "PASS", "MANUAL"]
SeverityFilter = Literal["critical", "high", "medium", "low", "informational"]
DeltaFilter = Literal["new", "changed"]
GROUP_DETAIL_FIELDS = (
"check_id,check_title,check_description,severity,status,muted,"
"impacted_providers,resources_fail,resources_total,pass_count,fail_count,"
"manual_count,pass_muted_count,fail_muted_count,manual_muted_count,"
"muted_count,new_count,changed_count,new_fail_count,new_fail_muted_count,"
"new_pass_count,new_pass_muted_count,new_manual_count,new_manual_muted_count,"
"changed_fail_count,changed_fail_muted_count,changed_pass_count,"
"changed_pass_muted_count,changed_manual_count,changed_manual_muted_count,"
"first_seen_at,last_seen_at,failing_since"
)
GROUP_LIST_FIELDS = (
"check_id,check_title,severity,status,muted,impacted_providers,"
"resources_fail,resources_total,pass_count,fail_count,manual_count,"
"muted_count,new_count,changed_count,first_seen_at,last_seen_at,failing_since"
)
RESOURCE_FIELDS = (
"resource,provider,finding_id,status,severity,muted,delta,"
"first_seen_at,last_seen_at,muted_reason"
)
class FindingGroupsTools(BaseTool):
"""Tools for Finding Groups operations."""
@staticmethod
def _bool_value(value: bool | str) -> bool:
"""Normalize bool-like MCP client values."""
if isinstance(value, bool):
return value
return value.lower() == "true"
@staticmethod
def _group_endpoint(date_range: tuple[str, str] | None) -> str:
return "/finding-groups/latest" if date_range is None else "/finding-groups"
@staticmethod
def _resource_endpoint(check_id: str, date_range: tuple[str, str] | None) -> str:
escaped_check_id = quote(check_id, safe="")
if date_range is None:
return f"/finding-groups/latest/{escaped_check_id}/resources"
return f"/finding-groups/{escaped_check_id}/resources"
def _base_date_params(
self, date_from: str | None, date_to: str | None
) -> tuple[tuple[str, str] | None, dict[str, Any]]:
date_range = self.api_client.normalize_date_range(
date_from, date_to, max_days=2
)
if date_range is None:
return None, {}
return date_range, {
"filter[inserted_at__gte]": date_range[0],
"filter[inserted_at__lte]": date_range[1],
}
def _apply_common_filters(
self,
params: dict[str, Any],
provider: list[str],
provider_type: list[str],
provider_uid: list[str],
provider_alias: str | None,
region: list[str],
service: list[str],
resource_type: list[str],
resource_name: str | None,
resource_uid: str | None,
resource_group: list[str],
category: list[str],
check_id: list[str],
check_title: str | None,
severity: list[SeverityFilter],
status: list[StatusFilter],
muted: bool | str | None,
delta: list[DeltaFilter],
) -> None:
if provider:
params["filter[provider__in]"] = provider
if provider_type:
params["filter[provider_type__in]"] = provider_type
if provider_uid:
params["filter[provider_uid__in]"] = provider_uid
if provider_alias:
params["filter[provider_alias__icontains]"] = provider_alias
if region:
params["filter[region__in]"] = region
if service:
params["filter[service__in]"] = service
if resource_type:
params["filter[resource_type__in]"] = resource_type
if resource_name:
params["filter[resource_name__icontains]"] = resource_name
if resource_uid:
params["filter[resource_uid__icontains]"] = resource_uid
if resource_group:
params["filter[resource_groups__in]"] = resource_group
if category:
params["filter[category__in]"] = category
if check_id:
params["filter[check_id__in]"] = check_id
if check_title:
params["filter[check_title__icontains]"] = check_title
if severity:
params["filter[severity__in]"] = severity
if status:
params["filter[status__in]"] = status
if muted is not None:
params["filter[muted]"] = self._bool_value(muted)
if delta:
params["filter[delta__in]"] = delta
async def list_finding_groups(
self,
provider: list[str] = Field(
default=[],
description="Filter by provider UUIDs. Multiple values allowed. If empty, all visible providers are returned.",
),
provider_type: list[str] = Field(
default=[],
description="Filter by provider type. Multiple values allowed, such as aws, azure, gcp, kubernetes, github, or m365.",
),
provider_uid: list[str] = Field(
default=[],
description="Filter by provider-native account, subscription, or project IDs. Multiple values allowed.",
),
provider_alias: str | None = Field(
default=None,
description="Filter by provider alias/name using partial matching.",
),
region: list[str] = Field(
default=[],
description="Filter by cloud regions. Multiple values allowed.",
),
service: list[str] = Field(
default=[],
description="Filter by cloud services. Multiple values allowed.",
),
resource_type: list[str] = Field(
default=[],
description="Filter by resource types. Multiple values allowed.",
),
resource_name: str | None = Field(
default=None,
description="Filter by resource name using partial matching.",
),
resource_uid: str | None = Field(
default=None,
description="Filter by resource UID using partial matching.",
),
resource_group: list[str] = Field(
default=[],
description="Filter by resource group values. Multiple values allowed.",
),
category: list[str] = Field(
default=[],
description="Filter by finding categories. Multiple values allowed.",
),
check_id: list[str] = Field(
default=[],
description="Filter by check IDs. Multiple values allowed.",
),
check_title: str | None = Field(
default=None,
description="Filter by check title using partial matching.",
),
severity: list[SeverityFilter] = Field(
default=[],
description="Filter by aggregated severity. Empty returns all severities.",
),
status: list[StatusFilter] = Field(
default=["FAIL"],
description="Filter by aggregated status. Default returns failing groups. Pass [] to return all statuses.",
),
muted: bool | str | None = Field(
default=None,
description="Filter by fully muted group state. Accepts true/false.",
),
include_muted: bool | str = Field(
default=False,
description="When false, excludes fully muted groups. Set true to include fully muted groups.",
),
delta: list[DeltaFilter] = Field(
default=[],
description="Filter by group delta values: new or changed.",
),
date_from: str | None = Field(
default=None,
description="Start date for historical query in YYYY-MM-DD format. Maximum range is 2 days.",
),
date_to: str | None = Field(
default=None,
description="End date for historical query in YYYY-MM-DD format. Maximum range is 2 days.",
),
sort: str | None = Field(
default=None,
description="Optional sort expression supported by the finding-groups API, such as -fail_count,-severity,check_id.",
),
page_size: int = Field(
default=50, description="Number of groups to return per page"
),
page_number: int = Field(
default=1, description="Page number to retrieve (1-indexed)"
),
) -> dict[str, Any]:
"""List finding groups aggregated by check ID.
Default behavior returns the latest non-muted FAIL groups for fast triage.
Without dates this uses `/finding-groups/latest`. With `date_from` or
`date_to`, this uses `/finding-groups` with a maximum 2-day date window.
Use this tool to find noisy or high-impact checks, then call
prowler_app_get_finding_group_details for complete counters or
prowler_app_list_finding_group_resources to drill into affected resources.
"""
try:
self.api_client.validate_page_size(page_size)
date_range, params = self._base_date_params(date_from, date_to)
endpoint = self._group_endpoint(date_range)
self._apply_common_filters(
params,
provider,
provider_type,
provider_uid,
provider_alias,
region,
service,
resource_type,
resource_name,
resource_uid,
resource_group,
category,
check_id,
check_title,
severity,
status,
muted,
delta,
)
params["filter[include_muted]"] = self._bool_value(include_muted)
params["page[size]"] = page_size
params["page[number]"] = page_number
params["fields[finding-groups]"] = GROUP_LIST_FIELDS
if sort:
params["sort"] = sort
clean_params = self.api_client.build_filter_params(params)
api_response = await self.api_client.get(endpoint, params=clean_params)
response = FindingGroupsListResponse.from_api_response(api_response)
return response.model_dump()
except Exception as e:
self.logger.error(f"Error listing finding groups: {e}")
return {"error": str(e), "status": "failed"}
async def get_finding_group_details(
self,
check_id: str = Field(
description="Public check ID that identifies the finding group. This is not a UUID."
),
date_from: str | None = Field(
default=None,
description="Start date for historical query in YYYY-MM-DD format. Maximum range is 2 days.",
),
date_to: str | None = Field(
default=None,
description="End date for historical query in YYYY-MM-DD format. Maximum range is 2 days.",
),
) -> dict[str, Any]:
"""Get complete details for one finding group by exact check ID.
Uses `filter[check_id]` exact matching against latest data by default,
or historical data when dates are provided. Fully muted groups are
included by default so accepted risk does not look like a missing group.
"""
try:
date_range, params = self._base_date_params(date_from, date_to)
endpoint = self._group_endpoint(date_range)
params.update(
{
"filter[check_id]": check_id,
"filter[include_muted]": True,
"page[size]": 1,
"page[number]": 1,
"fields[finding-groups]": GROUP_DETAIL_FIELDS,
}
)
clean_params = self.api_client.build_filter_params(params)
api_response = await self.api_client.get(endpoint, params=clean_params)
data = api_response.get("data", [])
if not data:
return {
"error": f"Finding group '{check_id}' not found.",
"status": "not_found",
}
group = DetailedFindingGroup.from_api_response(data[0])
return group.model_dump()
except Exception as e:
self.logger.error(f"Error getting finding group details: {e}")
return {"error": str(e), "status": "failed"}
async def list_finding_group_resources(
self,
check_id: str = Field(
description="Public check ID that identifies the finding group. This is not a UUID."
),
provider: list[str] = Field(
default=[],
description="Filter by provider UUIDs. Multiple values allowed.",
),
provider_type: list[str] = Field(
default=[],
description="Filter by provider type. Multiple values allowed.",
),
provider_uid: list[str] = Field(
default=[],
description="Filter by provider-native account, subscription, or project IDs. Multiple values allowed.",
),
provider_alias: str | None = Field(
default=None,
description="Filter by provider alias/name using partial matching.",
),
region: list[str] = Field(
default=[],
description="Filter by cloud regions. Multiple values allowed.",
),
service: list[str] = Field(
default=[],
description="Filter by cloud services. Multiple values allowed.",
),
resource_type: list[str] = Field(
default=[],
description="Filter by resource types. Multiple values allowed.",
),
resource_name: str | None = Field(
default=None,
description="Filter by resource name using partial matching.",
),
resource_uid: str | None = Field(
default=None,
description="Filter by resource UID using partial matching.",
),
resource_group: list[str] = Field(
default=[],
description="Filter by resource group values. Multiple values allowed.",
),
category: list[str] = Field(
default=[],
description="Filter by finding categories. Multiple values allowed.",
),
severity: list[SeverityFilter] = Field(
default=[],
description="Filter by severity. Empty returns all severities.",
),
status: list[StatusFilter] = Field(
default=["FAIL"],
description="Filter by status. Default returns failing resources. Pass [] to return all statuses.",
),
muted: bool | str | None = Field(
default=None,
description="Filter by muted state. Accepts true/false. Overrides include_muted when provided.",
),
include_muted: bool | str = Field(
default=False,
description="When false, returns only actionable unmuted resources by applying muted=false. Set true to include muted and unmuted resources.",
),
delta: list[DeltaFilter] = Field(
default=[], description="Filter by delta values: new or changed."
),
date_from: str | None = Field(
default=None,
description="Start date for historical query in YYYY-MM-DD format. Maximum range is 2 days.",
),
date_to: str | None = Field(
default=None,
description="End date for historical query in YYYY-MM-DD format. Maximum range is 2 days.",
),
sort: str | None = Field(
default=None,
description="Optional sort expression supported by the finding group resources API.",
),
page_size: int = Field(
default=50, description="Number of resources to return per page"
),
page_number: int = Field(
default=1, description="Page number to retrieve (1-indexed)"
),
) -> dict[str, Any]:
"""List resources affected by a finding group.
Without dates this uses `/finding-groups/latest/{check_id}/resources`.
With `date_from` or `date_to`, this uses
`/finding-groups/{check_id}/resources` with a maximum 2-day date window.
Default behavior returns FAIL, unmuted resources so the result is
actionable. Set `include_muted=True` to include accepted/suppressed
resources too. Each row includes nested resource and provider data plus
`finding_id`. Use `prowler_app_get_finding_details(finding_id)` to
retrieve complete remediation guidance for a specific resource finding.
"""
try:
self.api_client.validate_page_size(page_size)
date_range, params = self._base_date_params(date_from, date_to)
endpoint = self._resource_endpoint(check_id, date_range)
if muted is None and not self._bool_value(include_muted):
muted = False
self._apply_common_filters(
params,
provider,
provider_type,
provider_uid,
provider_alias,
region,
service,
resource_type,
resource_name,
resource_uid,
resource_group,
category,
[],
None,
severity,
status,
muted,
delta,
)
params["page[size]"] = page_size
params["page[number]"] = page_number
params["fields[finding-group-resources]"] = RESOURCE_FIELDS
if sort:
params["sort"] = sort
clean_params = self.api_client.build_filter_params(params)
api_response = await self.api_client.get(endpoint, params=clean_params)
response = FindingGroupResourcesListResponse.from_api_response(
api_response
)
return response.model_dump()
except Exception as e:
self.logger.error(f"Error listing finding group resources: {e}")
return {"error": str(e), "status": "failed"}
-4
View File
@@ -14,19 +14,15 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Action | Skill |
|--------|-------|
| Add changelog entry for a PR or feature | `prowler-changelog` |
| Adding a compliance output formatter (per-provider class + table dispatcher) | `prowler-compliance` |
| Adding new providers | `prowler-provider` |
| Adding services to existing providers | `prowler-provider` |
| Auditing check-to-requirement mappings as a cloud auditor | `prowler-compliance` |
| Create PR that requires changelog entry | `prowler-changelog` |
| Creating new checks | `prowler-sdk-check` |
| Creating/updating compliance frameworks | `prowler-compliance` |
| Fixing compliance JSON bugs (duplicate IDs, empty Section, stale refs) | `prowler-compliance` |
| Mapping checks to compliance controls | `prowler-compliance` |
| Mocking AWS with moto in tests | `prowler-test-sdk` |
| Review changelog format and conventions | `prowler-changelog` |
| Reviewing compliance framework PRs | `prowler-compliance-review` |
| Syncing compliance framework with upstream catalog | `prowler-compliance` |
| Update CHANGELOG.md in any component | `prowler-changelog` |
| Updating existing checks and metadata | `prowler-sdk-check` |
| Writing Prowler SDK tests | `prowler-test-sdk` |
-8
View File
@@ -16,14 +16,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
---
## [5.26.2] (Prowler UNRELEASED)
### 🐞 Fixed
- `entra_users_mfa_capable` and `entra_break_glass_account_fido2_security_key_registered` report a preventive FAIL per affected user (with the missing permission named) when the M365 service principal lacks `AuditLog.Read.All`, instead of mass false positives [(#10907)](https://github.com/prowler-cloud/prowler/pull/10907)
---
## [5.26.1] (Prowler v5.26.1)
### 🐞 Fixed
@@ -85,15 +85,6 @@ class entra_break_glass_account_fido2_security_key_registered(Check):
resource_id=user.id,
)
if entra_client.user_registration_details_error:
report.status = "FAIL"
report.status_extended = (
f"Cannot verify FIDO2 security key registration for break glass account {user.name}: "
f"{entra_client.user_registration_details_error}."
)
findings.append(report)
continue
auth_methods = set(user.authentication_methods)
has_fido2 = "fido2SecurityKey" in auth_methods
has_passkey_device_bound = "passKeyDeviceBound" in auth_methods
@@ -3,7 +3,7 @@ import json
from asyncio import gather
from datetime import datetime, timezone
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple
from typing import Dict, List, Optional
from uuid import UUID
from kiota_abstractions.base_request_configuration import RequestConfiguration
@@ -76,7 +76,6 @@ class Entra(M365Service):
self.tenant_domain = provider.identity.tenant_domain
self.tenant_id = getattr(provider.identity, "tenant_id", None)
self.user_registration_details_error: Optional[str] = None
attributes = loop.run_until_complete(
gather(
self._get_authorization_policy(),
@@ -855,9 +854,7 @@ class Entra(M365Service):
for member in members:
user_roles_map.setdefault(member.id, []).append(role_template_id)
registration_details, self.user_registration_details_error = (
await self._get_user_registration_details()
)
registration_details = await self._get_user_registration_details()
while users_response:
for user in getattr(users_response, "value", []) or []:
@@ -900,24 +897,18 @@ class Entra(M365Service):
)
return users
async def _get_user_registration_details(
self,
) -> Tuple[Dict[str, Dict[str, Any]], Optional[str]]:
async def _get_user_registration_details(self):
"""Retrieve user authentication method registration details.
Fetches registration details from the Microsoft Graph API, including
MFA capability and the specific authentication methods each user has registered.
Returns:
A tuple containing:
- A dictionary mapping user IDs to their registration details,
where each value is a dict with 'is_mfa_capable' (bool) and
'authentication_methods' (list of str), or an empty dict if
retrieval fails.
- An error message string if there was an access error, None otherwise.
dict: A dictionary mapping user IDs to their registration details,
where each value is a dict with 'is_mfa_capable' (bool) and
'authentication_methods' (list of str).
"""
registration_details = {}
error_message = None
try:
registration_builder = (
self.client.reports.authentication_methods.user_registration_details
@@ -942,25 +933,16 @@ class Entra(M365Service):
next_link
).get()
except ODataError as error:
error_code = getattr(error.error, "code", None) if error.error else None
if error_code == "Authorization_RequestDenied":
error_message = "Insufficient privileges to read user registration details. Required permission: AuditLog.Read.All"
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error_message}"
)
else:
except Exception as error:
if (
error.__class__.__name__ == "ODataError"
and error.__dict__.get("response_status_code", None) == 403
):
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
error_message = str(error)
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
error_message = f"Failed to retrieve user registration details: {error}"
return registration_details, error_message
return registration_details
async def _get_oauth_apps(self) -> Optional[Dict[str, "OAuthApp"]]:
"""
@@ -13,10 +13,6 @@ class entra_users_mfa_capable(Check):
("Ensure all member users are 'MFA capable'").
Guest users and disabled accounts are excluded from the evaluation.
- PASS: The member user is MFA capable.
- FAIL: The member user is not MFA capable, or MFA capability cannot be
verified due to insufficient permissions to read user registration details.
"""
def execute(self) -> List[CheckReportM365]:
@@ -46,13 +42,7 @@ class entra_users_mfa_capable(Check):
resource_id=user.id,
)
if entra_client.user_registration_details_error:
report.status = "FAIL"
report.status_extended = (
f"Cannot verify MFA capability for user {user.name}: "
f"{entra_client.user_registration_details_error}."
)
elif not user.is_mfa_capable:
if not user.is_mfa_capable:
report.status = "FAIL"
report.status_extended = f"User {user.name} is not MFA capable."
else:
+1 -1
View File
@@ -50,7 +50,7 @@ Reusable patterns for common technologies:
|-------|-------------|
| `typescript` | Const types, flat interfaces, utility types |
| `react-19` | React 19 patterns, React Compiler |
| `nextjs-16` | App Router, Server Actions, proxy.ts, streaming |
| `nextjs-15` | App Router, Server Actions, streaming |
| `tailwind-4` | cn() utility, Tailwind 4 patterns |
| `playwright` | Page Object Model, selectors |
| `vitest` | Unit testing, React Testing Library |
+150
View File
@@ -0,0 +1,150 @@
---
name: nextjs-15
description: >
Next.js 15 App Router patterns.
Trigger: When working in Next.js App Router (app/), Server Components vs Client Components, Server Actions, Route Handlers, caching/revalidation, and streaming/Suspense.
license: Apache-2.0
metadata:
author: prowler-cloud
version: "1.0"
scope: [root, ui]
auto_invoke: "App Router / Server Actions"
allowed-tools: Read, Edit, Write, Glob, Grep, Bash, WebFetch, WebSearch, Task
---
## App Router File Conventions
```
app/
├── layout.tsx # Root layout (required)
├── page.tsx # Home page (/)
├── loading.tsx # Loading UI (Suspense)
├── error.tsx # Error boundary
├── not-found.tsx # 404 page
├── (auth)/ # Route group (no URL impact)
│ ├── login/page.tsx # /login
│ └── signup/page.tsx # /signup
├── api/
│ └── route.ts # API handler
└── _components/ # Private folder (not routed)
```
## Server Components (Default)
```typescript
// No directive needed - async by default
export default async function Page() {
const data = await db.query();
return <Component data={data} />;
}
```
## Server Actions
```typescript
// app/actions.ts
"use server";
import { revalidatePath } from "next/cache";
import { redirect } from "next/navigation";
export async function createUser(formData: FormData) {
const name = formData.get("name") as string;
await db.users.create({ data: { name } });
revalidatePath("/users");
redirect("/users");
}
// Usage
<form action={createUser}>
<input name="name" required />
<button type="submit">Create</button>
</form>
```
## Data Fetching
```typescript
// Parallel
async function Page() {
const [users, posts] = await Promise.all([
getUsers(),
getPosts(),
]);
return <Dashboard users={users} posts={posts} />;
}
// Streaming with Suspense
<Suspense fallback={<Loading />}>
<SlowComponent />
</Suspense>
```
## Route Handlers (API)
```typescript
// app/api/users/route.ts
import { NextRequest, NextResponse } from "next/server";
export async function GET(request: NextRequest) {
const users = await db.users.findMany();
return NextResponse.json(users);
}
export async function POST(request: NextRequest) {
const body = await request.json();
const user = await db.users.create({ data: body });
return NextResponse.json(user, { status: 201 });
}
```
## Middleware
```typescript
// middleware.ts (root level)
import { NextResponse } from "next/server";
import type { NextRequest } from "next/server";
export function middleware(request: NextRequest) {
const token = request.cookies.get("token");
if (!token && request.nextUrl.pathname.startsWith("/dashboard")) {
return NextResponse.redirect(new URL("/login", request.url));
}
return NextResponse.next();
}
export const config = {
matcher: ["/dashboard/:path*"],
};
```
## Metadata
```typescript
// Static
export const metadata = {
title: "My App",
description: "Description",
};
// Dynamic
export async function generateMetadata({ params }) {
const product = await getProduct(params.id);
return { title: product.name };
}
```
## server-only Package
```typescript
import "server-only";
// This will error if imported in client component
export async function getSecretData() {
return db.secrets.findMany();
}
```
-160
View File
@@ -1,160 +0,0 @@
---
name: nextjs-16
description: >
Next.js 16 App Router patterns.
Trigger: When working in Next.js App Router (app/), Server Components vs Client Components, Server Actions, Route Handlers, proxy.ts, caching/revalidation, Cache Components, and streaming/Suspense.
license: Apache-2.0
metadata:
author: prowler-cloud
version: "1.0"
scope: [root, ui]
auto_invoke: "App Router / Server Actions"
allowed-tools: Read, Edit, Write, Glob, Grep, Bash, WebFetch, WebSearch, Task
---
## App Router File Conventions
```
app/
├── layout.tsx # Root layout (required)
├── page.tsx # Home page (/)
├── loading.tsx # Loading UI (Suspense)
├── error.tsx # Error boundary
├── not-found.tsx # 404 page
├── (auth)/ # Route group (no URL impact)
│ ├── login/page.tsx # /login
│ └── signup/page.tsx # /signup
├── api/
│ └── route.ts # API handler
└── _components/ # Private folder (not routed)
```
## Next.js 16 Notes
- Use `proxy.ts` for request-boundary logic. `middleware.ts` is deprecated in Next.js 16.
- `proxy.ts` runs on the Node.js runtime and cannot be configured for Edge.
- Keep `proxy.ts` matchers narrow. Exclude `api`, static files, and image assets unless the route explicitly needs proxy logic.
- Route Handlers in `app/api/**/route.ts` are the right fit for health checks, webhooks, backend-for-frontend endpoints, and server-only proxy calls.
## Server Components (Default)
```typescript
// No directive needed - async by default
export default async function Page() {
const data = await db.query();
return <Component data={data} />;
}
```
## Server Actions
```typescript
"use server";
import { revalidatePath } from "next/cache";
import { redirect } from "next/navigation";
export async function createUser(formData: FormData) {
const name = formData.get("name") as string;
await db.users.create({ data: { name } });
revalidatePath("/users");
redirect("/users");
}
```
## Data Fetching
```typescript
async function Page() {
const [users, posts] = await Promise.all([getUsers(), getPosts()]);
return <Dashboard users={users} posts={posts} />;
}
<Suspense fallback={<Loading />}>
<SlowComponent />
</Suspense>;
```
## Caching and Revalidation
```typescript
import { revalidatePath, revalidateTag } from "next/cache";
export async function refreshDashboard() {
"use server";
revalidatePath("/");
revalidateTag("dashboard");
}
```
- Use `revalidatePath` for route-level invalidation after mutations.
- Use `revalidateTag` when data fetches share a cache tag across routes.
- With Cache Components enabled, put `"use cache"` only in pure server-side cached functions. Do not cache auth, tenant-scoped, or per-user responses unless the cache key explicitly isolates them.
## Route Handlers (API)
```typescript
// app/api/users/route.ts
import { NextResponse } from "next/server";
export async function GET() {
const users = await db.users.findMany();
return NextResponse.json(users);
}
export async function POST(request: Request) {
const body = await request.json();
const user = await db.users.create({ data: body });
return NextResponse.json(user, { status: 201 });
}
```
## Proxy
```typescript
// proxy.ts (root level)
import { NextResponse } from "next/server";
import type { NextRequest } from "next/server";
export function proxy(request: NextRequest) {
const token = request.cookies.get("token");
if (!token && request.nextUrl.pathname.startsWith("/dashboard")) {
return NextResponse.redirect(new URL("/login", request.url));
}
return NextResponse.next();
}
export const config = {
matcher: ["/dashboard/:path*"],
};
```
## Metadata
```typescript
export const metadata = {
title: "My App",
description: "Description",
};
export async function generateMetadata() {
const product = await getProduct();
return { title: product.name };
}
```
## server-only Package
```typescript
import "server-only";
export async function getSecretData() {
return db.secrets.findMany();
}
```
+3 -3
View File
@@ -1,7 +1,7 @@
---
name: prowler-ui
description: >
Prowler UI-specific patterns. For generic patterns, see: typescript, react-19, nextjs-16, tailwind-4.
Prowler UI-specific patterns. For generic patterns, see: typescript, react-19, nextjs-15, tailwind-4.
Trigger: When working inside ui/ on Prowler-specific conventions (shadcn vs HeroUI legacy, folder placement, actions/adapters, shared types/hooks/lib).
license: Apache-2.0
metadata:
@@ -18,7 +18,7 @@ allowed-tools: Read, Edit, Write, Glob, Grep, Bash, WebFetch, WebSearch, Task
- `typescript` - Const types, flat interfaces
- `react-19` - No useMemo/useCallback, compiler
- `nextjs-16` - App Router, Server Actions
- `nextjs-15` - App Router, Server Actions
- `tailwind-4` - cn() utility, styling rules
- `zod-4` - Schema validation
- `zustand-5` - State management
@@ -28,7 +28,7 @@ allowed-tools: Read, Edit, Write, Glob, Grep, Bash, WebFetch, WebSearch, Task
## Tech Stack (Versions)
```
Next.js 16.2.3 | React 19.2.5 | Tailwind 4.1.18 | shadcn/ui
Next.js 15.5.9 | React 19.2.2 | Tailwind 4.1.13 | shadcn/ui
Zod 4.1.11 | React Hook Form 7.62.0 | Zustand 5.0.8
NextAuth 5.0.0-beta.30 | Recharts 2.15.4
HeroUI 2.8.4 (LEGACY - do not add new components)
+1 -1
View File
@@ -18,7 +18,7 @@ allowed-tools: Read, Edit, Write, Glob, Grep, Bash, WebFetch, WebSearch, Task
|-----------|-------|----------|
| SDK | Python 3.10+, Poetry | `prowler/` |
| API | Django 5.1, DRF, Celery | `api/` |
| UI | Next.js 16, React 19, Tailwind 4 | `ui/` |
| UI | Next.js 15, React 19, Tailwind 4 | `ui/` |
| MCP | FastMCP 2.13.1 | `mcp_server/` |
## Quick Commands
+1 -1
View File
@@ -2,7 +2,7 @@
name: react-19
description: >
React 19 patterns with React Compiler.
Trigger: When writing React 19 components/hooks in .tsx (React Compiler rules, hook patterns, refs as props). If using Next.js App Router/Server Actions, also use nextjs-16.
Trigger: When writing React 19 components/hooks in .tsx (React Compiler rules, hook patterns, refs as props). If using Next.js App Router/Server Actions, also use nextjs-15.
license: Apache-2.0
metadata:
author: prowler-cloud
@@ -67,7 +67,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -105,7 +104,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -144,7 +142,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -181,7 +178,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -232,7 +228,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -280,7 +275,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -327,7 +321,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -375,7 +368,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -430,7 +422,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -466,7 +457,6 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -510,117 +500,3 @@ class Test_entra_break_glass_account_fido2_security_key_registered:
assert len(result) == 1
assert result[0].status == "PASS"
assert result[0].resource_name == "BreakGlass1"
def test_user_registration_details_permission_error(self):
"""Test FAIL when there's a permission error reading user registration details."""
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = "Insufficient privileges to read user registration details. Required permission: AuditLog.Read.All"
with (
mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=set_mocked_m365_provider(),
),
mock.patch(
f"{CHECK_MODULE_PATH}.entra_client",
new=entra_client,
),
):
from prowler.providers.m365.services.entra.entra_break_glass_account_fido2_security_key_registered.entra_break_glass_account_fido2_security_key_registered import (
entra_break_glass_account_fido2_security_key_registered,
)
policy_id = str(uuid4())
bg_user_id = str(uuid4())
entra_client.conditional_access_policies = {
policy_id: _make_policy(policy_id, excluded_users=[bg_user_id]),
}
entra_client.users = {
bg_user_id: User(
id=bg_user_id,
name="BreakGlass1",
on_premises_sync_enabled=False,
authentication_methods=[],
),
}
check = entra_break_glass_account_fido2_security_key_registered()
result = check.execute()
assert len(result) == 1
assert result[0].status == "FAIL"
assert (
"Cannot verify FIDO2 security key registration for break glass account BreakGlass1"
in result[0].status_extended
)
assert "AuditLog.Read.All" in result[0].status_extended
assert result[0].resource_name == "BreakGlass1"
assert result[0].resource_id == bg_user_id
def test_user_registration_details_permission_error_with_missing_user(self):
"""Per-user emission and missing-user short-circuit on the error path.
Two break-glass user IDs are excluded from all CAPs, but only one is
present in ``entra_client.users``. With ``user_registration_details_error``
set, the present user must produce one preventive FAIL anchored to the
real user; the missing user must be skipped by the existing
``if not user: continue`` guard rather than crash or yield a synthetic
finding.
"""
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = "Insufficient privileges to read user registration details. Required permission: AuditLog.Read.All"
with (
mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=set_mocked_m365_provider(),
),
mock.patch(
f"{CHECK_MODULE_PATH}.entra_client",
new=entra_client,
),
):
from prowler.providers.m365.services.entra.entra_break_glass_account_fido2_security_key_registered.entra_break_glass_account_fido2_security_key_registered import (
entra_break_glass_account_fido2_security_key_registered,
)
policy_id = str(uuid4())
present_user_id = str(uuid4())
missing_user_id = str(uuid4())
entra_client.conditional_access_policies = {
policy_id: _make_policy(
policy_id,
excluded_users=[present_user_id, missing_user_id],
),
}
entra_client.users = {
present_user_id: User(
id=present_user_id,
name="BreakGlass1",
on_premises_sync_enabled=False,
authentication_methods=[],
),
# missing_user_id intentionally absent — exercises the
# `if not user: continue` short-circuit inside the loop.
}
check = entra_break_glass_account_fido2_security_key_registered()
result = check.execute()
# One finding for the present user; the missing one is skipped.
assert len(result) == 1
assert result[0].status == "FAIL"
assert (
"Cannot verify FIDO2 security key registration for break glass account BreakGlass1"
in result[0].status_extended
)
assert "AuditLog.Read.All" in result[0].status_extended
assert result[0].resource == entra_client.users[present_user_id]
assert result[0].resource_name == "BreakGlass1"
assert result[0].resource_id == present_user_id
@@ -11,7 +11,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -54,7 +53,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -97,7 +95,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -156,7 +153,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -195,7 +191,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -253,7 +248,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -292,7 +286,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -331,7 +324,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -391,7 +383,6 @@ class Test_entra_users_mfa_capable:
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = None
with (
mock.patch(
@@ -429,125 +420,3 @@ class Test_entra_users_mfa_capable:
assert result[0].resource == entra_client.users[user_id]
assert result[0].resource_name == "Test User"
assert result[0].resource_id == user_id
def test_user_registration_details_permission_error(self):
"""Test FAIL when there's a permission error reading user registration details."""
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = "Insufficient privileges to read user registration details. Required permission: AuditLog.Read.All"
with (
mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=set_mocked_m365_provider(),
),
mock.patch(
"prowler.providers.m365.services.entra.entra_users_mfa_capable.entra_users_mfa_capable.entra_client",
new=entra_client,
),
):
from prowler.providers.m365.services.entra.entra_users_mfa_capable.entra_users_mfa_capable import (
entra_users_mfa_capable,
)
user_id = str(uuid4())
entra_client.users = {
user_id: User(
id=user_id,
name="Test User",
on_premises_sync_enabled=False,
directory_roles_ids=[],
is_mfa_capable=False,
account_enabled=True,
)
}
check = entra_users_mfa_capable()
result = check.execute()
assert len(result) == 1
assert result[0].status == "FAIL"
assert (
"Cannot verify MFA capability for user Test User"
in result[0].status_extended
)
assert "AuditLog.Read.All" in result[0].status_extended
assert result[0].resource == entra_client.users[user_id]
assert result[0].resource_name == "Test User"
assert result[0].resource_id == user_id
def test_user_registration_details_permission_error_skips_guest_and_disabled(self):
"""CIS-scope skip (Guest, disabled) still applies on the permission-error path.
With ``user_registration_details_error`` set, only enabled member users
should receive a per-user "Cannot verify MFA capability" FAIL guests
and disabled members are filtered out before the error branch runs.
"""
entra_client = mock.MagicMock
entra_client.audited_tenant = "audited_tenant"
entra_client.audited_domain = DOMAIN
entra_client.user_registration_details_error = "Insufficient privileges to read user registration details. Required permission: AuditLog.Read.All"
with (
mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=set_mocked_m365_provider(),
),
mock.patch(
"prowler.providers.m365.services.entra.entra_users_mfa_capable.entra_users_mfa_capable.entra_client",
new=entra_client,
),
):
from prowler.providers.m365.services.entra.entra_users_mfa_capable.entra_users_mfa_capable import (
entra_users_mfa_capable,
)
member_id = str(uuid4())
guest_id = str(uuid4())
disabled_member_id = str(uuid4())
entra_client.users = {
member_id: User(
id=member_id,
name="Enabled Member",
on_premises_sync_enabled=False,
directory_roles_ids=[],
is_mfa_capable=False,
account_enabled=True,
user_type="Member",
),
guest_id: User(
id=guest_id,
name="Guest User",
on_premises_sync_enabled=False,
directory_roles_ids=[],
is_mfa_capable=False,
account_enabled=True,
user_type="Guest",
),
disabled_member_id: User(
id=disabled_member_id,
name="Disabled Member",
on_premises_sync_enabled=False,
directory_roles_ids=[],
is_mfa_capable=False,
account_enabled=False,
user_type="Member",
),
}
check = entra_users_mfa_capable()
result = check.execute()
# Only the enabled member should be reported — Guest and
# disabled member are skipped before the error branch.
assert len(result) == 1
assert result[0].status == "FAIL"
assert (
"Cannot verify MFA capability for user Enabled Member"
in result[0].status_extended
)
assert "AuditLog.Read.All" in result[0].status_extended
assert result[0].resource == entra_client.users[member_id]
assert result[0].resource_name == "Enabled Member"
assert result[0].resource_id == member_id
@@ -665,11 +665,10 @@ class Test_Entra_Service:
)
)
registration_details, error_message = asyncio.run(
registration_details = asyncio.run(
entra_service._get_user_registration_details()
)
assert error_message is None
assert registration_details == {
"user-1": {
"is_mfa_capable": True,
@@ -687,37 +686,6 @@ class Test_Entra_Service:
registration_builder.with_url.assert_called_once_with("next-link")
registration_builder_next.get.assert_awaited()
def test__get_user_registration_details_returns_error_on_permission_denied(self):
"""Test that 403 Authorization_RequestDenied returns an empty dict and
a descriptive error message naming the missing AuditLog.Read.All permission.
"""
from msgraph.generated.models.o_data_errors.main_error import MainError
from msgraph.generated.models.o_data_errors.o_data_error import ODataError
odata_error = ODataError()
odata_error.error = MainError()
odata_error.error.code = "Authorization_RequestDenied"
registration_builder = SimpleNamespace(get=AsyncMock(side_effect=odata_error))
entra_service = Entra.__new__(Entra)
entra_service.client = SimpleNamespace(
reports=SimpleNamespace(
authentication_methods=SimpleNamespace(
user_registration_details=registration_builder
)
)
)
registration_details, error_message = asyncio.run(
entra_service._get_user_registration_details()
)
assert registration_details == {}
assert error_message is not None
assert "AuditLog.Read.All" in error_message
assert "user registration details" in error_message
def test__get_service_principals_filters_third_party_owners(self):
"""Service principals owned by another tenant must not be returned."""
# Mixed-case input to verify the service normalizes both sides before
+3 -3
View File
@@ -5,7 +5,7 @@
> - [`prowler-test-ui`](../skills/prowler-test-ui/SKILL.md) - Playwright E2E testing (comprehensive)
> - [`typescript`](../skills/typescript/SKILL.md) - Const types, flat interfaces
> - [`react-19`](../skills/react-19/SKILL.md) - No useMemo/useCallback, compiler
> - [`nextjs-16`](../skills/nextjs-16/SKILL.md) - App Router, Server Actions
> - [`nextjs-15`](../skills/nextjs-15/SKILL.md) - App Router, Server Actions
> - [`tailwind-4`](../skills/tailwind-4/SKILL.md) - cn() utility, no var() in className
> - [`zod-4`](../skills/zod-4/SKILL.md) - New API (z.email(), z.uuid())
> - [`zustand-5`](../skills/zustand-5/SKILL.md) - Selectors, persist middleware
@@ -21,7 +21,7 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Action | Skill |
|--------|-------|
| Add changelog entry for a PR or feature | `prowler-changelog` |
| App Router / Server Actions | `nextjs-16` |
| App Router / Server Actions | `nextjs-15` |
| Building AI chat features | `ai-sdk-5` |
| Committing changes | `prowler-commit` |
| Create PR that requires changelog entry | `prowler-changelog` |
@@ -179,7 +179,7 @@ test("action works", { tag: ["@critical", "@feature"] }, async ({ page }) => {
## TECH STACK
Next.js 16.2.3 | React 19.2.5 | Tailwind 4.1.18 | shadcn/ui
Next.js 15.5.9 | React 19.2.2 | Tailwind 4.1.13 | shadcn/ui
Zod 4.1.11 | React Hook Form 7.62.0 | Zustand 5.0.8 | NextAuth 5.0.0-beta.30 | Recharts 2.15.4
> **Note**: HeroUI exists in `components/ui/` as legacy code. Do NOT add new components there.
+2 -13
View File
@@ -4,23 +4,11 @@ All notable changes to the **Prowler UI** are documented in this file.
## [1.27.0] (Prowler UNRELEASED)
### 🚀 Added
- AWS findings and resource details now expose a "View in AWS Console" link that opens the resource directly in the AWS Console via the universal `/go/view` ARN resolver. The per-provider external link is rendered by a new shared `ExternalResourceLink` component, which also covers the existing IaC repository link [(#9172)](https://github.com/prowler-cloud/prowler/pull/9172)
### 🔄 Changed
- Trimmed unused npm dependencies [(#11115)](https://github.com/prowler-cloud/prowler/pull/11115)
- Lighthouse now accepts Prowler App Finding Groups MCP tools [(#11140)](https://github.com/prowler-cloud/prowler/pull/11140)
- Attack Paths graph now uses React Flow with improved layout, interactions, export, minimap, and browser test coverage [(#10686)](https://github.com/prowler-cloud/prowler/pull/10686)
- SAML ACS URL is only shown if the email domain is configured [(#11144)](https://github.com/prowler-cloud/prowler/pull/11144)
---
## [1.26.2] (Prowler 5.26.2)
### 🐞 Fixed
- Finding drawer no longer renders literal backticks around inline code in Risk, Description and Remediation sections [(#11142)](https://github.com/prowler-cloud/prowler/pull/11142)
---
@@ -31,6 +19,7 @@ All notable changes to the **Prowler UI** are documented in this file.
- Role form Cancel buttons now return to Roles [(#11125)](https://github.com/prowler-cloud/prowler/pull/11125)
- Shared select dropdowns stay constrained and scrollable inside modals [(#11125)](https://github.com/prowler-cloud/prowler/pull/11125)
---
## [1.26.0] (Prowler v5.26.0)
@@ -1,35 +0,0 @@
import { render, screen } from "@testing-library/react";
import { describe, expect, it } from "vitest";
import { MarkdownContainer } from "./markdown-container";
describe("MarkdownContainer", () => {
it("renders bold and inline code as semantic elements", () => {
render(
<MarkdownContainer>
{"**Bedrock API keys** are evaluated, configured to `never expire`."}
</MarkdownContainer>,
);
const code = screen.getByText("never expire");
expect(code.tagName).toBe("CODE");
expect(screen.getByText("Bedrock API keys").tagName).toBe("STRONG");
});
it("neutralizes the @tailwindcss/typography backtick pseudo-elements on inline code", () => {
const { container } = render(
<MarkdownContainer>{"text `code` text"}</MarkdownContainer>,
);
const wrapper = container.firstElementChild;
expect(wrapper).not.toBeNull();
const className = wrapper?.className ?? "";
// The prose plugin from @tailwindcss/typography adds ::before/::after
// pseudo-elements with literal backticks on every <code> tag. Without
// these overrides the drawer renders `never expire` with visible
// backticks, which is the bug PROWLER-1729 fixes.
expect(className).toMatch(/prose-code:before:content-none/);
expect(className).toMatch(/prose-code:after:content-none/);
});
});
@@ -5,7 +5,7 @@ interface MarkdownContainerProps {
}
export const MarkdownContainer = ({ children }: MarkdownContainerProps) => (
<div className="prose prose-sm dark:prose-invert prose-code:before:content-none prose-code:after:content-none max-w-none break-words whitespace-normal">
<div className="prose prose-sm dark:prose-invert max-w-none break-words whitespace-normal">
<ReactMarkdown>{children}</ReactMarkdown>
</div>
);
@@ -44,10 +44,6 @@ import {
TooltipTrigger,
} from "@/components/shadcn/tooltip";
import { EventsTimeline } from "@/components/shared/events-timeline/events-timeline";
import {
ExternalResourceLink,
resolveExternalTarget,
} from "@/components/shared/external-resource-link";
import {
QUERY_EDITOR_LANGUAGE,
QueryCodeEditor,
@@ -433,16 +429,6 @@ export function ResourceDetailDrawerContent({
const resourceDetailHref = f?.resourceId
? buildResourceDetailHref(f.resourceId)
: null;
const externalResourceTarget = resolveExternalTarget({
providerType,
resourceUid,
providerUid,
resourceName,
findingUid: f?.uid,
region: resourceRegion,
});
const hasIdAction =
Boolean(resourceDetailHref) || Boolean(externalResourceTarget);
const findingRecommendationUrl = f?.remediation.recommendation.url;
const checkRecommendationUrl = checkMeta.remediation.recommendation.url;
const recommendationUrl = isNonEmptyString(findingRecommendationUrl)
@@ -713,31 +699,17 @@ export function ResourceDetailDrawerContent({
entityId={resourceUid}
idLabel="UID"
idAction={
hasIdAction ? (
<span className="inline-flex items-center gap-2">
{resourceDetailHref && (
<Button variant="link" size="link-sm" asChild>
<Link
href={resourceDetailHref}
target="_blank"
rel="noopener noreferrer"
>
View Resource
<ExternalLink className="size-3" />
</Link>
</Button>
)}
{externalResourceTarget && (
<ExternalResourceLink
providerType={providerType}
resourceUid={resourceUid}
providerUid={providerUid}
resourceName={resourceName}
findingUid={f?.uid}
region={resourceRegion}
/>
)}
</span>
resourceDetailHref ? (
<Button variant="link" size="link-sm" asChild>
<Link
href={resourceDetailHref}
target="_blank"
rel="noopener noreferrer"
>
View Resource
<ExternalLink className="size-3" />
</Link>
</Button>
) : undefined
}
/>
@@ -253,10 +253,9 @@ export const SamlConfigForm = ({
reader.readAsText(file);
};
const trimmedEmailDomain = emailDomain.trim();
const acsUrl = trimmedEmailDomain
? `${apiBaseUrl}/accounts/saml/${trimmedEmailDomain}/acs/`
: "";
const acsUrl = emailDomain
? `${apiBaseUrl}/accounts/saml/${emailDomain}/acs/`
: `${apiBaseUrl}/accounts/saml/your-domain.com/acs/`;
return (
<form
@@ -309,17 +308,11 @@ export const SamlConfigForm = ({
<span className="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
ACS URL:
</span>
{acsUrl ? (
<CodeSnippet
value={acsUrl}
ariaLabel="Copy ACS URL"
className="h-10 w-full"
/>
) : (
<p className="text-xs text-gray-500 dark:text-gray-400">
Enter your email domain above to generate the ACS URL.
</p>
)}
<CodeSnippet
value={acsUrl}
ariaLabel="Copy ACS URL"
className="h-10 w-full"
/>
</div>
<div>
@@ -1,7 +1,7 @@
"use client";
import { Row, RowSelectionState } from "@tanstack/react-table";
import { Container, CornerDownRight, Link } from "lucide-react";
import { Container, CornerDownRight, ExternalLink, Link } from "lucide-react";
import { useState } from "react";
import { FloatingMuteButton } from "@/components/findings/floating-mute-button";
@@ -22,7 +22,6 @@ import {
} from "@/components/shadcn/info-field/info-field";
import { LoadingState } from "@/components/shadcn/spinner/loading-state";
import { EventsTimeline } from "@/components/shared/events-timeline/events-timeline";
import { ExternalResourceLink } from "@/components/shared/external-resource-link";
import {
QUERY_EDITOR_LANGUAGE,
QueryCodeEditor,
@@ -32,6 +31,7 @@ import { DateWithTime } from "@/components/ui/entities/date-with-time";
import { EntityInfo } from "@/components/ui/entities/entity-info";
import { DataTable } from "@/components/ui/table";
import { getGroupLabel } from "@/lib/categories";
import { buildGitFileUrl } from "@/lib/iac-utils";
import { getRegionFlag } from "@/lib/region-flags";
import { ProviderType, ResourceProps } from "@/types";
@@ -190,6 +190,16 @@ export const ResourceDetailContent = ({
handleMuteComplete,
);
const gitUrl =
providerData.provider === "iac"
? buildGitFileUrl(
providerData.uid,
attributes.name,
"",
attributes.region,
)
: null;
const findingTitle =
findingDetails?.attributes?.check_metadata?.checktitle || "Finding Detail";
const resourceName =
@@ -258,13 +268,25 @@ export const ResourceDetailContent = ({
</TooltipTrigger>
<TooltipContent>Copy resource link to clipboard</TooltipContent>
</Tooltip>
<ExternalResourceLink
providerType={providerData.provider}
resourceUid={attributes.uid}
providerUid={providerData.uid}
resourceName={attributes.name}
region={attributes.region}
/>
{providerData.provider === "iac" && gitUrl && (
<Tooltip>
<TooltipTrigger asChild>
<a
href={gitUrl}
target="_blank"
rel="noopener noreferrer"
className="text-bg-data-info inline-flex items-center gap-1 text-sm"
aria-label="Open resource in repository"
>
<ExternalLink size={16} />
View in Repository
</a>
</TooltipTrigger>
<TooltipContent>
Go to Resource in the Repository
</TooltipContent>
</Tooltip>
)}
</div>
</div>
</div>
@@ -1,71 +0,0 @@
import { render, screen } from "@testing-library/react";
import { describe, expect, it } from "vitest";
import { ExternalResourceLink } from "./external-resource-link";
describe("ExternalResourceLink", () => {
it("renders an AWS Console link for AWS resources with a valid ARN", () => {
const arn = "arn:aws:s3:::example-bucket";
render(<ExternalResourceLink providerType="aws" resourceUid={arn} />);
const link = screen.getByRole("link", {
name: /open resource in aws console/i,
});
expect(link).toHaveAttribute(
"href",
`https://console.aws.amazon.com/go/view?arn=${encodeURIComponent(arn)}`,
);
expect(link).toHaveAttribute("target", "_blank");
expect(link).toHaveAttribute("rel", "noopener noreferrer");
expect(link).toHaveTextContent("View in AWS Console");
});
it("renders a repository link for IaC resources", () => {
render(
<ExternalResourceLink
providerType="iac"
providerUid="https://github.com/example/repo"
resourceName="main.tf"
findingUid="check-id-main.tf-10:15"
region="develop"
/>,
);
const link = screen.getByRole("link", {
name: /open resource in the repository/i,
});
expect(link).toHaveAttribute(
"href",
"https://github.com/example/repo/blob/develop/main.tf#L10-L15",
);
expect(link).toHaveTextContent("View in Repository");
});
it("renders nothing for AWS resources without a valid ARN", () => {
const { container } = render(
<ExternalResourceLink providerType="aws" resourceUid="not-an-arn" />,
);
expect(container).toBeEmptyDOMElement();
});
it("renders nothing for IaC resources missing repo url or filename", () => {
const { container } = render(
<ExternalResourceLink
providerType="iac"
providerUid=""
resourceName="main.tf"
/>,
);
expect(container).toBeEmptyDOMElement();
});
it("renders nothing for providers without external link support", () => {
const { container } = render(
<ExternalResourceLink
providerType="azure"
resourceUid="/subscriptions/abc/resourceGroups/rg"
/>,
);
expect(container).toBeEmptyDOMElement();
});
});
@@ -1,94 +0,0 @@
import { ExternalLink } from "lucide-react";
import { Button } from "@/components/shadcn";
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from "@/components/shadcn/tooltip";
import { buildAwsConsoleUrl } from "@/lib/aws-utils";
import { buildGitFileUrl, extractLineRangeFromUid } from "@/lib/iac-utils";
interface ExternalResourceLinkProps {
providerType: string | null | undefined;
resourceUid?: string | null;
providerUid?: string | null;
resourceName?: string | null;
findingUid?: string | null;
region?: string | null;
className?: string;
}
interface ExternalResourceTarget {
url: string;
label: string;
tooltip: string;
}
export const resolveExternalTarget = ({
providerType,
resourceUid,
providerUid,
resourceName,
findingUid,
region,
}: ExternalResourceLinkProps): ExternalResourceTarget | null => {
if (providerType === "aws" && resourceUid) {
const url = buildAwsConsoleUrl(resourceUid);
if (!url) return null;
return {
url,
label: "View in AWS Console",
tooltip: "Open resource in AWS Console",
};
}
if (providerType === "iac" && providerUid && resourceName) {
const lineRange = findingUid
? (extractLineRangeFromUid(findingUid) ?? "")
: "";
const url = buildGitFileUrl(
providerUid,
resourceName,
lineRange,
region ?? undefined,
);
if (!url) return null;
return {
url,
label: "View in Repository",
tooltip: "Open resource in the repository",
};
}
return null;
};
export const ExternalResourceLink = (props: ExternalResourceLinkProps) => {
const target = resolveExternalTarget(props);
if (!target) return null;
return (
<Tooltip>
<TooltipTrigger asChild>
<Button
variant="link"
size="link-sm"
asChild
className={props.className}
>
<a
href={target.url}
target="_blank"
rel="noopener noreferrer"
aria-label={target.tooltip}
>
{target.label}
<ExternalLink className="size-3" />
</a>
</Button>
</TooltipTrigger>
<TooltipContent>{target.tooltip}</TooltipContent>
</Tooltip>
);
};
@@ -1,4 +0,0 @@
export {
ExternalResourceLink,
resolveExternalTarget,
} from "./external-resource-link";
-26
View File
@@ -1,26 +0,0 @@
import { describe, expect, it } from "vitest";
import { buildAwsConsoleUrl } from "./aws-utils";
describe("buildAwsConsoleUrl", () => {
it("returns a `/go/view` URL with the ARN URL-encoded", () => {
const arn = "arn:aws:s3:::my-bucket";
expect(buildAwsConsoleUrl(arn)).toBe(
`https://console.aws.amazon.com/go/view?arn=${encodeURIComponent(arn)}`,
);
});
it("preserves regional and account scoping in the encoded ARN", () => {
const arn =
"arn:aws:iam::123456789012:role/MyRole-with+special/chars and spaces";
const url = buildAwsConsoleUrl(arn);
expect(url).not.toBeNull();
expect(url).toContain(encodeURIComponent(arn));
});
it("returns null for missing or non-ARN inputs", () => {
expect(buildAwsConsoleUrl("")).toBeNull();
expect(buildAwsConsoleUrl("not-an-arn")).toBeNull();
expect(buildAwsConsoleUrl("https://example.com")).toBeNull();
});
});
-9
View File
@@ -1,9 +0,0 @@
// Uses the AWS Console's universal `/go/view` redirect so we don't have to
// special-case each service — the console resolves the ARN to the right page.
export const buildAwsConsoleUrl = (resourceArn: string): string | null => {
if (!resourceArn || !resourceArn.startsWith("arn:")) {
return null;
}
return `https://console.aws.amazon.com/go/view?arn=${encodeURIComponent(resourceArn)}`;
};
+4
View File
@@ -67,6 +67,10 @@ const ALLOWED_TOOLS = new Set([
"prowler_app_search_security_findings",
"prowler_app_get_finding_details",
"prowler_app_get_findings_overview",
// Finding Groups
"prowler_app_list_finding_groups",
"prowler_app_get_finding_group_details",
"prowler_app_list_finding_group_resources",
// Providers
"prowler_app_search_providers",
// Scans