chore(revision): resolve comments

chore(changelog): update ofr universal compliance
feat(reporting): bound PDF compliance report memory and CPU on large scans
2026-05-18 02:03:26 +00:00 · 2026-05-14 11:55:48 +02:00 · 2026-05-13 17:35:21 +02:00 · 2026-05-13 17:31:15 +02:00
10 changed files with 1398 additions and 104 deletions
@@ -11,6 +11,7 @@ All notable changes to the **Prowler API** are documented in this file.
 ### 🔄 Changed

 - Remove orphaned `gin_resources_search_idx` declaration from `Resource.Meta.indexes` (DB index dropped in `0072_drop_unused_indexes`) [(#11001)](https://github.com/prowler-cloud/prowler/pull/11001)
+- PDF compliance reports cap detail tables at 100 failed findings per check (configurable via `DJANGO_PDF_MAX_FINDINGS_PER_CHECK`) to bound worker memory on large scans [(#11160)](https://github.com/prowler-cloud/prowler/pull/11160)

 ---

@@ -20,11 +20,15 @@ from tasks.jobs.reports import (
    ThreatScoreReportGenerator,
 )
 from tasks.jobs.threatscore import compute_threatscore_metrics
-from tasks.jobs.threatscore_utils import _aggregate_requirement_statistics_from_database
+from tasks.jobs.threatscore_utils import (
+    _aggregate_requirement_statistics_from_database,
+    _get_compliance_check_ids,
+)

 from api.db_router import READ_REPLICA_ALIAS, MainRouter
 from api.db_utils import rls_transaction
 from api.models import Provider, Scan, ScanSummary, StateChoices, ThreatScoreSnapshot
+from api.utils import initialize_prowler_provider
 from prowler.lib.check.compliance_models import Compliance
 from prowler.lib.outputs.finding import Finding as FindingOutput

@@ -427,6 +431,7 @@ def generate_threatscore_report(
    provider_obj: Provider | None = None,
    requirement_statistics: dict[str, dict[str, int]] | None = None,
    findings_cache: dict[str, list[FindingOutput]] | None = None,
+    prowler_provider=None,
 ) -> None:
    """
    Generate a PDF compliance report based on Prowler ThreatScore framework.
@@ -455,6 +460,7 @@ def generate_threatscore_report(
        provider_obj=provider_obj,
        requirement_statistics=requirement_statistics,
        findings_cache=findings_cache,
+        prowler_provider=prowler_provider,
        only_failed=only_failed,
    )

@@ -469,6 +475,7 @@ def generate_ens_report(
    provider_obj: Provider | None = None,
    requirement_statistics: dict[str, dict[str, int]] | None = None,
    findings_cache: dict[str, list[FindingOutput]] | None = None,
+    prowler_provider=None,
 ) -> None:
    """
    Generate a PDF compliance report for ENS RD2022 framework.
@@ -495,6 +502,7 @@ def generate_ens_report(
        provider_obj=provider_obj,
        requirement_statistics=requirement_statistics,
        findings_cache=findings_cache,
+        prowler_provider=prowler_provider,
        include_manual=include_manual,
    )

@@ -510,6 +518,7 @@ def generate_nis2_report(
    provider_obj: Provider | None = None,
    requirement_statistics: dict[str, dict[str, int]] | None = None,
    findings_cache: dict[str, list[FindingOutput]] | None = None,
+    prowler_provider=None,
 ) -> None:
    """
    Generate a PDF compliance report for NIS2 Directive (EU) 2022/2555.
@@ -537,6 +546,7 @@ def generate_nis2_report(
        provider_obj=provider_obj,
        requirement_statistics=requirement_statistics,
        findings_cache=findings_cache,
+        prowler_provider=prowler_provider,
        only_failed=only_failed,
        include_manual=include_manual,
    )
@@ -553,6 +563,7 @@ def generate_csa_report(
    provider_obj: Provider | None = None,
    requirement_statistics: dict[str, dict[str, int]] | None = None,
    findings_cache: dict[str, list[FindingOutput]] | None = None,
+    prowler_provider=None,
 ) -> None:
    """
    Generate a PDF compliance report for CSA Cloud Controls Matrix (CCM) v4.0.
@@ -580,6 +591,7 @@ def generate_csa_report(
        provider_obj=provider_obj,
        requirement_statistics=requirement_statistics,
        findings_cache=findings_cache,
+        prowler_provider=prowler_provider,
        only_failed=only_failed,
        include_manual=include_manual,
    )
@@ -596,6 +608,7 @@ def generate_cis_report(
    provider_obj: Provider | None = None,
    requirement_statistics: dict[str, dict[str, int]] | None = None,
    findings_cache: dict[str, list[FindingOutput]] | None = None,
+    prowler_provider=None,
 ) -> None:
    """
    Generate a PDF compliance report for a specific CIS Benchmark variant.
@@ -627,6 +640,7 @@ def generate_cis_report(
        provider_obj=provider_obj,
        requirement_statistics=requirement_statistics,
        findings_cache=findings_cache,
+        prowler_provider=prowler_provider,
        only_failed=only_failed,
        include_manual=include_manual,
    )
@@ -771,6 +785,17 @@ def generate_compliance_reports(
        results["csa"] = {"upload": False, "path": ""}
        generate_csa = False

+    # Load the framework definitions for this provider once. We use this map
+    # both to pick the latest CIS variant and to precompute the set of
+    # check_ids each framework consumes (for findings_cache eviction).
+    frameworks_bulk: dict = {}
+    try:
+        frameworks_bulk = Compliance.get_bulk(provider_type)
+    except Exception as e:
+        logger.error("Error loading compliance frameworks for %s: %s", provider_type, e)
+        # Fall through; individual frameworks will still try and fail
+        # gracefully if their compliance_id is missing.
+
    # For CIS we do NOT pre-check the provider against a hard-coded whitelist
    # (that list drifts the moment a new CIS JSON ships). Instead, we inspect
    # the dynamically loaded framework map and pick the latest available CIS
@@ -778,7 +803,6 @@ def generate_compliance_reports(
    latest_cis: str | None = None
    if generate_cis:
        try:
-            frameworks_bulk = Compliance.get_bulk(provider_type)
            latest_cis = _pick_latest_cis_variant(
                name for name in frameworks_bulk.keys() if name.startswith("cis_")
            )
@@ -815,10 +839,84 @@ def generate_compliance_reports(
        tenant_id, scan_id
    )

-    # Create shared findings cache
-    findings_cache = {}
+    # Initialize the Prowler provider once for the whole report batch. Each
+    # generator used to re-init this in _load_compliance_data, paying the
+    # boto3/Azure-SDK construction cost 5 times per scan. The instance is
+    # only used by FindingOutput.transform_api_finding to enrich findings,
+    # so a single shared instance is correct.
+    logger.info("Initializing prowler_provider once for all reports (scan %s)", scan_id)
+    try:
+        with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
+            prowler_provider = initialize_prowler_provider(provider_obj)
+    except Exception as init_error:
+        # If init fails the generators will fall back to lazy init in
+        # _load_compliance_data; we just log and continue.
+        logger.warning(
+            "Could not pre-initialize prowler_provider for scan %s: %s",
+            scan_id,
+            init_error,
+        )
+        prowler_provider = None
+
+    # Create shared findings cache up front so the eviction closure below
+    # can reference it. Defined BEFORE the closure to avoid the UnboundLocalError
+    # trap if an early-return is later inserted between the closure and its
+    # first use.
+    findings_cache: dict[str, list[FindingOutput]] = {}
    logger.info("Created shared findings cache for all reports")

+    # Precompute the set of check_ids each framework consumes. After a
+    # framework finishes, every check_id that no remaining framework still
+    # needs is evicted from findings_cache so the dict does not keep
+    # growing through the batch (PROWLER-1733).
+    pending_checks_by_framework: dict[str, set[str]] = {}
+    if generate_threatscore:
+        pending_checks_by_framework["threatscore"] = _get_compliance_check_ids(
+            frameworks_bulk.get(f"prowler_threatscore_{provider_type}")
+        )
+    if generate_ens:
+        pending_checks_by_framework["ens"] = _get_compliance_check_ids(
+            frameworks_bulk.get(f"ens_rd2022_{provider_type}")
+        )
+    if generate_nis2:
+        pending_checks_by_framework["nis2"] = _get_compliance_check_ids(
+            frameworks_bulk.get(f"nis2_{provider_type}")
+        )
+    if generate_csa:
+        pending_checks_by_framework["csa"] = _get_compliance_check_ids(
+            frameworks_bulk.get(f"csa_ccm_4.0_{provider_type}")
+        )
+    if generate_cis and latest_cis:
+        pending_checks_by_framework["cis"] = _get_compliance_check_ids(
+            frameworks_bulk.get(latest_cis)
+        )
+
+    def _evict_after_framework(done_key: str) -> int:
+        """Drop from findings_cache every check_id no pending framework still needs."""
+        done = pending_checks_by_framework.pop(done_key, set())
+        still_needed: set[str] = (
+            set().union(*pending_checks_by_framework.values())
+            if pending_checks_by_framework
+            else set()
+        )
+        exclusive = done - still_needed
+        evicted = 0
+        for cid in exclusive:
+            if findings_cache.pop(cid, None) is not None:
+                evicted += 1
+        if evicted:
+            logger.info(
+                "Evicted %d exclusive check entries from findings_cache after %s "
+                "(remaining cache size: %d)",
+                evicted,
+                done_key,
+                len(findings_cache),
+            )
+            # Release the lists' memory now instead of waiting for the next
+            # gc cycle; FindingOutput instances retain quite a bit of state.
+            gc.collect()
+        return evicted
+
    generated_report_keys: list[str] = []
    output_paths: dict[str, str] = {}
    out_dir: str | None = None
@@ -907,6 +1005,7 @@ def generate_compliance_reports(
                provider_obj=provider_obj,
                requirement_statistics=requirement_statistics,
                findings_cache=findings_cache,
+                prowler_provider=prowler_provider,
            )

            # Compute and store ThreatScore metrics snapshot
@@ -984,9 +1083,15 @@ def generate_compliance_reports(
                logger.warning("ThreatScore report saved locally at %s", out_dir)

        except Exception as e:
-            logger.error("Error generating ThreatScore report: %s", e)
+            logger.exception(
+                "compliance_report_failed framework=threatscore scan_id=%s tenant_id=%s",
+                scan_id,
+                tenant_id,
+            )
            results["threatscore"] = {"upload": False, "path": "", "error": str(e)}

+        _evict_after_framework("threatscore")
+
    # Generate ENS report
    if generate_ens:
        generated_report_keys.append("ens")
@@ -1006,6 +1111,7 @@ def generate_compliance_reports(
                provider_obj=provider_obj,
                requirement_statistics=requirement_statistics,
                findings_cache=findings_cache,
+                prowler_provider=prowler_provider,
            )

            upload_uri_ens = _upload_to_s3(
@@ -1020,9 +1126,15 @@ def generate_compliance_reports(
                logger.warning("ENS report saved locally at %s", out_dir)

        except Exception as e:
-            logger.error("Error generating ENS report: %s", e)
+            logger.exception(
+                "compliance_report_failed framework=ens scan_id=%s tenant_id=%s",
+                scan_id,
+                tenant_id,
+            )
            results["ens"] = {"upload": False, "path": "", "error": str(e)}

+        _evict_after_framework("ens")
+
    # Generate NIS2 report
    if generate_nis2:
        generated_report_keys.append("nis2")
@@ -1043,6 +1155,7 @@ def generate_compliance_reports(
                provider_obj=provider_obj,
                requirement_statistics=requirement_statistics,
                findings_cache=findings_cache,
+                prowler_provider=prowler_provider,
            )

            upload_uri_nis2 = _upload_to_s3(
@@ -1057,9 +1170,15 @@ def generate_compliance_reports(
                logger.warning("NIS2 report saved locally at %s", out_dir)

        except Exception as e:
-            logger.error("Error generating NIS2 report: %s", e)
+            logger.exception(
+                "compliance_report_failed framework=nis2 scan_id=%s tenant_id=%s",
+                scan_id,
+                tenant_id,
+            )
            results["nis2"] = {"upload": False, "path": "", "error": str(e)}

+        _evict_after_framework("nis2")
+
    # Generate CSA CCM report
    if generate_csa:
        generated_report_keys.append("csa")
@@ -1080,6 +1199,7 @@ def generate_compliance_reports(
                provider_obj=provider_obj,
                requirement_statistics=requirement_statistics,
                findings_cache=findings_cache,
+                prowler_provider=prowler_provider,
            )

            upload_uri_csa = _upload_to_s3(
@@ -1094,9 +1214,15 @@ def generate_compliance_reports(
                logger.warning("CSA CCM report saved locally at %s", out_dir)

        except Exception as e:
-            logger.error("Error generating CSA CCM report: %s", e)
+            logger.exception(
+                "compliance_report_failed framework=csa scan_id=%s tenant_id=%s",
+                scan_id,
+                tenant_id,
+            )
            results["csa"] = {"upload": False, "path": "", "error": str(e)}

+        _evict_after_framework("csa")
+
    # Generate CIS Benchmark report for the latest available version only.
    # CIS ships multiple versions per provider (e.g. cis_1.4_aws, cis_5.0_aws,
    # cis_6.0_aws); we dynamically pick the highest semantic version at run
@@ -1119,6 +1245,7 @@ def generate_compliance_reports(
                provider_obj=provider_obj,
                requirement_statistics=requirement_statistics,
                findings_cache=findings_cache,
+                prowler_provider=prowler_provider,
            )

            upload_uri_cis = _upload_to_s3(
@@ -1147,14 +1274,22 @@ def generate_compliance_reports(
                )

        except Exception as e:
-            logger.error("Error generating CIS report %s: %s", latest_cis, e)
+            logger.exception(
+                "compliance_report_failed framework=cis variant=%s scan_id=%s tenant_id=%s",
+                latest_cis,
+                scan_id,
+                tenant_id,
+            )
            results["cis"] = {
                "upload": False,
                "path": "",
                "error": str(e),
            }
        finally:
-            # Free ReportLab/matplotlib memory before moving on.
+            # Free ReportLab/matplotlib memory before moving on. CIS is
+            # always the last framework, so evicting its entries clears the
+            # cache entirely (subject to its check_ids set).
+            _evict_after_framework("cis")
            gc.collect()

    # Clean up temporary files only if all generated reports were
@@ -1,6 +1,9 @@
 import gc
 import os
+import resource as _resource_module
+import time
 from abc import ABC, abstractmethod
+from contextlib import contextmanager
 from dataclasses import dataclass, field
 from typing import Any

@@ -41,6 +44,7 @@ from .config import (
    COLOR_LIGHT_BLUE,
    COLOR_LIGHTER_BLUE,
    COLOR_PROWLER_DARK_GREEN,
+    FINDINGS_TABLE_CHUNK_SIZE,
    PADDING_LARGE,
    PADDING_SMALL,
    FrameworkConfig,
@@ -48,6 +52,46 @@ from .config import (

 logger = get_task_logger(__name__)

+
+@contextmanager
+def _log_phase(phase: str, **tags: Any):
+    """Log start/end timing and RSS deltas around a long-running task section.
+
+    Generic helper: callers pass arbitrary ``key=value`` tags
+    (e.g. ``scan_id``, ``framework``, ``provider_id``) and they are
+    emitted as part of the structured log line, so Grafana/Datadog/
+    CloudWatch queries can pivot by whichever dimension is relevant to
+    the task. ``getrusage`` returns KB on Linux and bytes on macOS;
+    the values are still useful in relative terms even though units
+    differ across platforms.
+    """
+    tag_str = " ".join(f"{key}={value}" for key, value in tags.items())
+    suffix = f" {tag_str}" if tag_str else ""
+
+    start = time.perf_counter()
+    rss_before = _resource_module.getrusage(_resource_module.RUSAGE_SELF).ru_maxrss
+    logger.info("phase_start phase=%s%s rss_kb=%d", phase, suffix, rss_before)
+    try:
+        yield
+    except Exception:
+        elapsed = time.perf_counter() - start
+        logger.exception(
+            "phase_failed phase=%s%s elapsed_s=%.2f", phase, suffix, elapsed
+        )
+        raise
+    else:
+        elapsed = time.perf_counter() - start
+        rss_after = _resource_module.getrusage(_resource_module.RUSAGE_SELF).ru_maxrss
+        logger.info(
+            "phase_end phase=%s%s elapsed_s=%.2f rss_kb=%d delta_rss_kb=%d",
+            phase,
+            suffix,
+            elapsed,
+            rss_after,
+            rss_after - rss_before,
+        )
+
+
 # Register fonts (done once at module load)
 _fonts_registered: bool = False

@@ -335,6 +379,7 @@ class BaseComplianceReportGenerator(ABC):
        provider_obj: Provider | None = None,
        requirement_statistics: dict[str, dict[str, int]] | None = None,
        findings_cache: dict[str, list[FindingOutput]] | None = None,
+        prowler_provider: Any | None = None,
        **kwargs,
    ) -> None:
        """Generate the PDF compliance report.
@@ -351,23 +396,35 @@ class BaseComplianceReportGenerator(ABC):
            provider_obj: Optional pre-fetched Provider object
            requirement_statistics: Optional pre-aggregated statistics
            findings_cache: Optional pre-loaded findings cache
+            prowler_provider: Optional pre-initialized Prowler provider. When
+                generating multiple reports for the same scan the master
+                function initializes this once and passes it in to avoid
+                re-running boto3/Azure-SDK setup per framework.
            **kwargs: Additional framework-specific arguments
        """
+        framework = self.config.display_name
        logger.info(
-            "Generating %s report for scan %s", self.config.display_name, scan_id
+            "report_generation_start framework=%s scan_id=%s compliance_id=%s",
+            framework,
+            scan_id,
+            compliance_id,
        )

        try:
            # 1. Load compliance data
-            data = self._load_compliance_data(
-                tenant_id=tenant_id,
-                scan_id=scan_id,
-                compliance_id=compliance_id,
-                provider_id=provider_id,
-                provider_obj=provider_obj,
-                requirement_statistics=requirement_statistics,
-                findings_cache=findings_cache,
-            )
+            with _log_phase(
+                "load_compliance_data", scan_id=scan_id, framework=framework
+            ):
+                data = self._load_compliance_data(
+                    tenant_id=tenant_id,
+                    scan_id=scan_id,
+                    compliance_id=compliance_id,
+                    provider_id=provider_id,
+                    provider_obj=provider_obj,
+                    requirement_statistics=requirement_statistics,
+                    findings_cache=findings_cache,
+                    prowler_provider=prowler_provider,
+                )

            # 2. Create PDF document
            doc = self._create_document(output_path, data)
@@ -377,37 +434,54 @@ class BaseComplianceReportGenerator(ABC):
            elements = []

            # Cover page (lightweight)
-            elements.extend(self.create_cover_page(data))
-            elements.append(PageBreak())
+            with _log_phase("cover_page", scan_id=scan_id, framework=framework):
+                elements.extend(self.create_cover_page(data))
+                elements.append(PageBreak())

            # Executive summary (framework-specific)
-            elements.extend(self.create_executive_summary(data))
+            with _log_phase("executive_summary", scan_id=scan_id, framework=framework):
+                elements.extend(self.create_executive_summary(data))

            # Body sections (charts + requirements index)
            # Override _build_body_sections() in subclasses to change section order
-            elements.extend(self._build_body_sections(data))
+            with _log_phase("body_sections", scan_id=scan_id, framework=framework):
+                elements.extend(self._build_body_sections(data))

            # Detailed findings - heaviest section, loads findings on-demand
-            logger.info("Building detailed findings section...")
-            elements.extend(self.create_detailed_findings(data, **kwargs))
-            gc.collect()  # Free findings data after processing
+            with _log_phase("detailed_findings", scan_id=scan_id, framework=framework):
+                elements.extend(self.create_detailed_findings(data, **kwargs))
+                gc.collect()  # Free findings data after processing

            # 4. Build the PDF
-            logger.info("Building PDF document with %d elements...", len(elements))
-            self._build_pdf(doc, elements, data)
+            logger.info(
+                "doc_build_about_to_run framework=%s scan_id=%s elements=%d",
+                framework,
+                scan_id,
+                len(elements),
+            )
+            with _log_phase("doc_build", scan_id=scan_id, framework=framework):
+                self._build_pdf(doc, elements, data)

            # Final cleanup
            del elements
            gc.collect()

-            logger.info("Successfully generated report at %s", output_path)
+            logger.info(
+                "report_generation_end framework=%s scan_id=%s output_path=%s",
+                framework,
+                scan_id,
+                output_path,
+            )

-        except Exception as e:
-            import traceback
-
-            tb_lineno = e.__traceback__.tb_lineno if e.__traceback__ else "unknown"
-            logger.error("Error generating report, line %s -- %s", tb_lineno, e)
-            logger.error("Full traceback:\n%s", traceback.format_exc())
+        except Exception:
+            # logger.exception captures the full traceback; the contextual
+            # keys keep production search-by-scan-id viable.
+            logger.exception(
+                "report_generation_failed framework=%s scan_id=%s compliance_id=%s",
+                framework,
+                scan_id,
+                compliance_id,
+            )
            raise

    def _build_body_sections(self, data: ComplianceData) -> list:
@@ -638,15 +712,25 @@ class BaseComplianceReportGenerator(ABC):
        for req in requirements:
            check_ids_to_load.extend(req.checks)

-        # Load findings on-demand only for the checks that will be displayed
-        # Uses the shared findings cache to avoid duplicate queries across reports
+        # Load findings on-demand only for the checks that will be displayed.
+        # When ``only_failed`` is active at requirement level, also push the
+        # FAIL filter down to the finding level: a requirement marked FAIL
+        # because 1/1000 findings failed must not render a table dominated by
+        # 999 PASS rows. That hides the actual failure under noise and
+        # makes the per-check cap truncate the wrong rows.
+        # ``total_counts`` is populated with the pre-cap total per check_id
+        # (FAIL-only when only_failed is active) so the "Showing first N of
+        # M" banner uses the same denominator the reader cares about.
        logger.info("Loading findings on-demand for %d requirements", len(requirements))
+        total_counts: dict[str, int] = {}
        findings_by_check_id = _load_findings_for_requirement_checks(
            data.tenant_id,
            data.scan_id,
            check_ids_to_load,
            data.prowler_provider,
            data.findings_by_check_id,  # Pass the cache to update it
+            total_counts_out=total_counts,
+            only_failed_findings=only_failed,
        )

        for req in requirements:
@@ -678,9 +762,31 @@ class BaseComplianceReportGenerator(ABC):
                        )
                    )
                else:
-                    # Create findings table
-                    findings_table = self._create_findings_table(findings)
-                    elements.append(findings_table)
+                    # Surface truncation BEFORE the tables so readers see it
+                    # at the same scroll position as the data itself, not
+                    # after thousands of rendered rows.
+                    loaded = len(findings)
+                    total = total_counts.get(check_id, loaded)
+                    if total > loaded:
+                        kind = "failed findings" if only_failed else "findings"
+                        elements.append(
+                            Paragraph(
+                                f"<b>&#9888; Showing first {loaded:,} of "
+                                f"{total:,} {kind} for this check.</b> "
+                                f"Use the CSV or JSON-OCSF export for the full "
+                                f"list. The PDF caps detail rows to keep "
+                                f"the report readable and bounded in size.",
+                                self.styles["normal"],
+                            )
+                        )
+                        elements.append(Spacer(1, 0.05 * inch))
+
+                    # Create chunked findings tables to prevent OOM when a
+                    # single check has thousands of findings (ReportLab
+                    # resolves layout per Flowable, so many small tables
+                    # render contiguously with a bounded memory peak).
+                    findings_tables = self._create_findings_tables(findings)
+                    elements.extend(findings_tables)

                elements.append(Spacer(1, 0.1 * inch))

@@ -735,6 +841,7 @@ class BaseComplianceReportGenerator(ABC):
        provider_obj: Provider | None,
        requirement_statistics: dict | None,
        findings_cache: dict | None,
+        prowler_provider: Any | None = None,
    ) -> ComplianceData:
        """Load and aggregate compliance data from the database.

@@ -746,6 +853,9 @@ class BaseComplianceReportGenerator(ABC):
            provider_obj: Optional pre-fetched Provider
            requirement_statistics: Optional pre-aggregated statistics
            findings_cache: Optional pre-loaded findings
+            prowler_provider: Optional pre-initialized Prowler provider. When
+                the master function initializes it once and passes it in,
+                we skip the per-report ``initialize_prowler_provider`` call.

        Returns:
            Aggregated ComplianceData object
@@ -755,7 +865,8 @@ class BaseComplianceReportGenerator(ABC):
            if provider_obj is None:
                provider_obj = Provider.objects.get(id=provider_id)

-            prowler_provider = initialize_prowler_provider(provider_obj)
+            if prowler_provider is None:
+                prowler_provider = initialize_prowler_provider(provider_obj)
            provider_type = provider_obj.provider

            # Load compliance framework
@@ -823,13 +934,32 @@ class BaseComplianceReportGenerator(ABC):
    ) -> SimpleDocTemplate:
        """Create the PDF document template.

+        Validates that ``output_path`` is a filesystem path string with an
+        existing parent directory. SimpleDocTemplate technically accepts a
+        BytesIO too, but we want every report to land on disk so the
+        Celery worker doesn't hold the full PDF in memory while uploading
+        to S3.
+
        Args:
            output_path: Path for the output PDF
            data: Compliance data for metadata

        Returns:
            Configured SimpleDocTemplate
+
+        Raises:
+            TypeError: ``output_path`` is not a string.
+            FileNotFoundError: The parent directory does not exist.
        """
+        if not isinstance(output_path, str):
+            raise TypeError(
+                "output_path must be a filesystem path string; "
+                f"got {type(output_path).__name__}"
+            )
+        parent_dir = os.path.dirname(output_path)
+        if parent_dir and not os.path.isdir(parent_dir):
+            raise FileNotFoundError(f"Output directory does not exist: {parent_dir}")
+
        return SimpleDocTemplate(
            output_path,
            pagesize=letter,
@@ -876,47 +1006,10 @@ class BaseComplianceReportGenerator(ABC):
            onLaterPages=add_footer,
        )

-    def _create_findings_table(self, findings: list[FindingOutput]) -> Any:
-        """Create a findings table.
-
-        Args:
-            findings: List of finding objects
-
-        Returns:
-            ReportLab Table element
-        """
-
-        def get_finding_title(f):
-            metadata = getattr(f, "metadata", None)
-            if metadata:
-                return getattr(metadata, "CheckTitle", getattr(f, "check_id", ""))
-            return getattr(f, "check_id", "")
-
-        def get_resource_name(f):
-            name = getattr(f, "resource_name", "")
-            if not name:
-                name = getattr(f, "resource_uid", "")
-            return name
-
-        def get_severity(f):
-            metadata = getattr(f, "metadata", None)
-            if metadata:
-                return getattr(metadata, "Severity", "").capitalize()
-            return ""
-
-        # Convert findings to dicts for the table
-        data = []
-        for f in findings:
-            item = {
-                "title": get_finding_title(f),
-                "resource_name": get_resource_name(f),
-                "severity": get_severity(f),
-                "status": getattr(f, "status", "").upper(),
-                "region": getattr(f, "region", "global"),
-            }
-            data.append(item)
-
-        columns = [
+    # Column layout shared by all findings sub-tables. Defined as a method so
+    # subclasses can override it without re-implementing the chunking logic.
+    def _findings_table_columns(self) -> list[ColumnConfig]:
+        return [
            ColumnConfig("Finding", 2.5 * inch, "title"),
            ColumnConfig("Resource", 3 * inch, "resource_name"),
            ColumnConfig("Severity", 0.9 * inch, "severity"),
@@ -924,9 +1017,122 @@ class BaseComplianceReportGenerator(ABC):
            ColumnConfig("Region", 0.9 * inch, "region"),
        ]

+    @staticmethod
+    def _finding_to_row(f: FindingOutput) -> dict[str, str]:
+        """Project a FindingOutput onto the row dict the table expects.
+
+        Kept defensive: missing metadata or attributes return empty strings
+        rather than raising, so a single malformed finding never breaks the
+        whole report.
+        """
+        metadata = getattr(f, "metadata", None)
+        title = (
+            getattr(metadata, "CheckTitle", getattr(f, "check_id", ""))
+            if metadata
+            else getattr(f, "check_id", "")
+        )
+        resource_name = getattr(f, "resource_name", "") or getattr(
+            f, "resource_uid", ""
+        )
+        severity = getattr(metadata, "Severity", "").capitalize() if metadata else ""
+        return {
+            "title": title,
+            "resource_name": resource_name,
+            "severity": severity,
+            "status": getattr(f, "status", "").upper(),
+            "region": getattr(f, "region", "global"),
+        }
+
+    def _create_findings_tables(
+        self,
+        findings: list[FindingOutput],
+        chunk_size: int | None = None,
+    ) -> list[Any]:
+        """Build a list of small findings tables to keep ``doc.build()`` memory bounded.
+
+        ReportLab resolves layout (column widths, row heights, page-breaks)
+        per Flowable. A single ``LongTable`` of 15k rows forces all of that
+        to be computed at once and reliably OOMs the worker on large scans.
+        Splitting into chunks of ``chunk_size`` rows produces an equivalent-
+        looking PDF (LongTable repeats headers; chunks render contiguously)
+        with a bounded memory peak per chunk.
+
+        Args:
+            findings: List of finding objects for a single check.
+            chunk_size: Rows per sub-table. ``None`` uses
+                ``FINDINGS_TABLE_CHUNK_SIZE`` from config.
+
+        Returns:
+            List of ReportLab flowables (interleaved ``Table``/``LongTable``
+            and small ``Spacer`` between chunks). Empty list when there are
+            no findings.
+        """
+        if not findings:
+            return []
+
+        chunk_size = chunk_size or FINDINGS_TABLE_CHUNK_SIZE
+
+        # Build all rows first so we can chunk without re-walking the
+        # FindingOutput list. Malformed findings are skipped with a logged
+        # exception, never enough to abort the entire report.
+        rows: list[dict[str, str]] = []
+        for f in findings:
+            try:
+                rows.append(self._finding_to_row(f))
+            except Exception:
+                logger.exception(
+                    "Skipping malformed finding while building table for check %s",
+                    getattr(f, "check_id", "unknown"),
+                )
+
+        if not rows:
+            return []
+
+        columns = self._findings_table_columns()
+
+        flowables: list = []
+        total = len(rows)
+        for start in range(0, total, chunk_size):
+            chunk = rows[start : start + chunk_size]
+            flowables.append(
+                create_data_table(
+                    data=chunk,
+                    columns=columns,
+                    header_color=self.config.primary_color,
+                    normal_style=self.styles["normal_center"],
+                )
+            )
+            # A tiny spacer between chunks keeps them visually contiguous
+            # without forcing a page-break (KeepTogether would negate the
+            # memory benefit of chunking).
+            if start + chunk_size < total:
+                flowables.append(Spacer(1, 0.05 * inch))
+
+        if total > chunk_size:
+            logger.debug(
+                "Built %d findings sub-tables (chunk_size=%d, total_findings=%d)",
+                (total + chunk_size - 1) // chunk_size,
+                chunk_size,
+                total,
+            )
+
+        return flowables
+
+    def _create_findings_table(self, findings: list[FindingOutput]) -> Any:
+        """Deprecated alias kept for backwards compatibility.
+
+        Returns the first chunk produced by ``_create_findings_tables``.
+        New callers MUST use ``_create_findings_tables``, which returns a
+        list of flowables and is what ``create_detailed_findings`` invokes.
+        """
+        flowables = self._create_findings_tables(findings)
+        if flowables:
+            return flowables[0]
+        # Empty input → return an empty (header-only) table so callers that
+        # used to receive a Table never get None.
        return create_data_table(
-            data=data,
-            columns=columns,
+            data=[],
+            columns=self._findings_table_columns(),
            header_color=self.config.primary_color,
            normal_style=self.styles["normal_center"],
        )
@@ -1,9 +1,11 @@
 import gc
 import io
 import math
+import time
 from typing import Callable

 import matplotlib
+from celery.utils.log import get_task_logger

 # Use non-interactive Agg backend for memory efficiency in server environments
 # This MUST be set before importing pyplot
@@ -20,6 +22,26 @@ from .config import (  # noqa: E402
    CHART_DPI_DEFAULT,
 )

+logger = get_task_logger(__name__)
+
+
+def _log_chart_built(name: str, dpi: int, buffer: io.BytesIO, started: float) -> None:
+    """Emit a structured DEBUG line summarising a chart render.
+
+    Centralised so the formatting stays consistent across all chart helpers
+    and so we never accidentally pay for buffer.getbuffer().nbytes when
+    debug logging is disabled.
+    """
+    if logger.isEnabledFor(10):  # logging.DEBUG
+        logger.debug(
+            "chart_built name=%s dpi=%d bytes=%d elapsed_s=%.2f",
+            name,
+            dpi,
+            buffer.getbuffer().nbytes,
+            time.perf_counter() - started,
+        )
+
+
 # Use centralized DPI setting from config
 DEFAULT_CHART_DPI = CHART_DPI_DEFAULT

@@ -77,6 +99,7 @@ def create_vertical_bar_chart(
    Returns:
        BytesIO buffer containing the PNG image
    """
+    _started = time.perf_counter()
    if color_func is None:
        color_func = get_chart_color_for_percentage

@@ -122,6 +145,7 @@ def create_vertical_bar_chart(
        plt.close(fig)
        gc.collect()  # Force garbage collection after heavy matplotlib operation

+    _log_chart_built("vertical_bar", dpi, buffer, _started)
    return buffer


@@ -156,6 +180,7 @@ def create_horizontal_bar_chart(
    Returns:
        BytesIO buffer containing the PNG image
    """
+    _started = time.perf_counter()
    if color_func is None:
        color_func = get_chart_color_for_percentage

@@ -207,6 +232,7 @@ def create_horizontal_bar_chart(
        plt.close(fig)
        gc.collect()  # Force garbage collection after heavy matplotlib operation

+    _log_chart_built("horizontal_bar", dpi, buffer, _started)
    return buffer


@@ -239,6 +265,7 @@ def create_radar_chart(
    Returns:
        BytesIO buffer containing the PNG image
    """
+    _started = time.perf_counter()
    num_vars = len(labels)
    angles = [n / float(num_vars) * 2 * math.pi for n in range(num_vars)]

@@ -275,6 +302,7 @@ def create_radar_chart(
        plt.close(fig)
        gc.collect()  # Force garbage collection after heavy matplotlib operation

+    _log_chart_built("radar", dpi, buffer, _started)
    return buffer


@@ -303,6 +331,7 @@ def create_pie_chart(
    Returns:
        BytesIO buffer containing the PNG image
    """
+    _started = time.perf_counter()
    fig, ax = plt.subplots(figsize=figsize)

    _, _, autotexts = ax.pie(
@@ -330,6 +359,7 @@ def create_pie_chart(
        plt.close(fig)
        gc.collect()  # Force garbage collection after heavy matplotlib operation

+    _log_chart_built("pie", dpi, buffer, _started)
    return buffer


@@ -362,6 +392,7 @@ def create_stacked_bar_chart(
    Returns:
        BytesIO buffer containing the PNG image
    """
+    _started = time.perf_counter()
    fig, ax = plt.subplots(figsize=figsize)

    # Default colors if not provided
@@ -401,4 +432,5 @@ def create_stacked_bar_chart(
        plt.close(fig)
        gc.collect()  # Force garbage collection after heavy matplotlib operation

+    _log_chart_built("stacked_bar", dpi, buffer, _started)
    return buffer
@@ -475,8 +475,15 @@ def create_data_table(
            else:
                value = item.get(col.field, "")

+            # Wrap every string cell in Paragraph so the data rows keep the
+            # caller-supplied font/colour/alignment. Skipping Paragraph for
+            # short cells (a tempting micro-optimisation) breaks visual
+            # consistency: ReportLab Table falls back to Helvetica/black for
+            # raw strings, mixing fonts within the same table.
+            # ``escape_html`` keeps ``<``/``>``/``&`` in resource names from
+            # breaking Paragraph's mini-HTML parser.
            if normal_style and isinstance(value, str):
-                value = Paragraph(value, normal_style)
+                value = Paragraph(escape_html(value), normal_style)
            row.append(value)
        table_data.append(row)

@@ -508,17 +515,26 @@ def create_data_table(
    for idx, col in enumerate(columns):
        styles.append(("ALIGN", (idx, 0), (idx, -1), col.align))

-    # Alternate row backgrounds - skip for very large tables as it adds memory overhead
+    # Alternate row backgrounds: single O(1) ROWBACKGROUNDS style entry.
+    # The previous implementation appended N per-row BACKGROUND commands,
+    # which scaled the TableStyle list linearly with row count. ReportLab
+    # cycles through the colour list row-by-row so the visual is identical.
+    # The ALTERNATE_ROWS_MAX_SIZE cap is preserved to mirror legacy
+    # behaviour (very large tables stay plain), but the memory cost of the
+    # styles list is now constant regardless of row count.
    if (
        alternate_rows
        and len(table_data) > 1
        and len(table_data) <= ALTERNATE_ROWS_MAX_SIZE
    ):
-        for i in range(1, len(table_data)):
-            if i % 2 == 0:
-                styles.append(
-                    ("BACKGROUND", (0, i), (-1, i), colors.Color(0.98, 0.98, 0.98))
-                )
+        styles.append(
+            (
+                "ROWBACKGROUNDS",
+                (0, 1),
+                (-1, -1),
+                [colors.white, colors.Color(0.98, 0.98, 0.98)],
+            )
+        )

    table.setStyle(TableStyle(styles))
    return table
@@ -1,3 +1,4 @@
+import os
 from dataclasses import dataclass, field

 from reportlab.lib import colors
@@ -23,6 +24,47 @@ ALTERNATE_ROWS_MAX_SIZE = 200
 # Larger = fewer queries but more memory per batch
 FINDINGS_BATCH_SIZE = 2000

+# Maximum rows per findings sub-table. ReportLab resolves layout per Flowable;
+# splitting a huge findings list into multiple smaller tables keeps the peak
+# memory of doc.build() bounded. A single 15k-row LongTable would force
+# ReportLab to compute all column widths/row heights/page-breaks at once and
+# OOM the worker; 300-row chunks are rendered contiguously with negligible
+# visual impact.
+FINDINGS_TABLE_CHUNK_SIZE = 300
+
+# Maximum findings rendered per check in the detailed-findings section.
+#
+# Product behaviour: compliance PDFs render at most ``MAX_FINDINGS_PER_CHECK``
+# **failed** findings per check (PASS rows are excluded at SQL level by the
+# ``only_failed`` flag that all four list-rendering frameworks default to:
+# ThreatScore, NIS2, CSA, CIS; ENS does not render finding tables). Above
+# this cap each affected check renders an in-PDF banner
+# ("Showing first 100 of N failed findings for this check. Use the CSV
+# or JSON export for the full list") so the reader knows the table is
+# truncated and where to find the full data.
+#
+# Why a cap exists at all:
+#   * ``FindingOutput.transform_api_finding`` is O(N) per finding (Pydantic
+#     v1 validation + nested model construction).
+#   * ReportLab resolves layout per Flowable; thousands of sub-tables make
+#     ``doc.build()`` very slow and grow the PDF unboundedly.
+#   * A human-readable executive/auditor PDF does not need 12,000 rows for
+#     one check; that is forensic data and lives in the CSV/JSON exports.
+#
+# Why 100 specifically:
+#   * Covers ~99% of real scans without truncation (most checks emit far
+#     fewer than 100 findings even in enterprise estates).
+#   * Worst-case rendered rows = 100 × ~500 checks = 50k rows across all
+#     frameworks, which keeps RSS bounded and a 5-framework run completes
+#     in minutes instead of hours.
+#
+# Override at runtime via ``DJANGO_PDF_MAX_FINDINGS_PER_CHECK``:
+#   * Set to ``0`` to disable the cap entirely (load every finding; only
+#     advisable for small scans).
+#   * Set to a larger value (e.g. ``500``) for forensic detail in big runs;
+#     watch RSS in the Celery worker.
+MAX_FINDINGS_PER_CHECK = int(os.environ.get("DJANGO_PDF_MAX_FINDINGS_PER_CHECK", "100"))
+

 # =============================================================================
 # Base colors
@@ -1,6 +1,8 @@
 from celery.utils.log import get_task_logger
 from config.django.base import DJANGO_FINDINGS_BATCH_SIZE
-from django.db.models import Count, Q
+from django.db.models import Count, F, Q, Window
+from django.db.models.functions import RowNumber
+from tasks.jobs.reports.config import MAX_FINDINGS_PER_CHECK

 from api.db_router import READ_REPLICA_ALIAS
 from api.db_utils import rls_transaction
@@ -154,6 +156,8 @@ def _load_findings_for_requirement_checks(
    check_ids: list[str],
    prowler_provider,
    findings_cache: dict[str, list[FindingOutput]] | None = None,
+    total_counts_out: dict[str, int] | None = None,
+    only_failed_findings: bool = False,
 ) -> dict[str, list[FindingOutput]]:
    """
    Load findings for specific check IDs on-demand with optional caching.
@@ -178,6 +182,23 @@ def _load_findings_for_requirement_checks(
        prowler_provider: The initialized Prowler provider instance.
        findings_cache (dict, optional): Cache of already loaded findings.
            If provided, checks are first looked up in cache before querying database.
+        total_counts_out (dict, optional): If provided, populated with
+            ``{check_id: total_findings_in_db}`` BEFORE any per-check cap is
+            applied. Lets callers render a "Showing first N of M" banner for
+            truncated checks. Only populated for ``check_ids`` actually
+            queried (cache hits keep whatever value the caller already had).
+            When ``only_failed_findings=True`` the total is FAIL-only.
+        only_failed_findings (bool): When True, push the ``status=FAIL``
+            filter down into the SQL query so PASS rows are never loaded
+            from the DB nor pydantic-transformed. This matches the
+            ``only_failed`` requirement-level filter applied at PDF render
+            time: a requirement marked FAIL because 1/1000 findings failed
+            shouldn't render a table of 999 PASS rows. That hides the
+            actual failure under noise and wastes the per-check cap on
+            irrelevant data. NOTE: the findings cache stores whatever the
+            first caller asked for, so all callers in a single
+            ``generate_compliance_reports`` run MUST pass the same flag
+            (which they do: it threads from ``only_failed`` defaults).

    Returns:
        dict[str, list[FindingOutput]]: Dictionary mapping check_id to list of FindingOutput objects.
@@ -222,17 +243,88 @@ def _load_findings_for_requirement_checks(
        )

        with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
-            # Use iterator with chunk_size for memory-efficient streaming
-            # chunk_size controls how many rows Django fetches from DB at once
-            findings_queryset = (
-                Finding.all_objects.filter(
-                    tenant_id=tenant_id,
-                    scan_id=scan_id,
-                    check_id__in=check_ids_to_load,
-                )
-                .order_by("check_id", "uid")
-                .iterator(chunk_size=DJANGO_FINDINGS_BATCH_SIZE)
+            base_qs = Finding.all_objects.filter(
+                tenant_id=tenant_id,
+                scan_id=scan_id,
+                check_id__in=check_ids_to_load,
            )
+            if only_failed_findings:
+                # Push the FAIL filter down into SQL: DB returns ~N×FAIL
+                # rows instead of N×ALL, and we never spend pydantic CPU on
+                # PASS findings the PDF would never render.
+                base_qs = base_qs.filter(status=StatusChoices.FAIL)
+
+            # Aggregate totals once so we (a) know which checks need capping
+            # and (b) can surface "Showing first N of M" in the PDF banner.
+            # Cheap: a single COUNT grouped by check_id.
+            totals: dict[str, int] = {
+                row["check_id"]: row["total"]
+                for row in base_qs.values("check_id").annotate(total=Count("id"))
+            }
+            if total_counts_out is not None:
+                total_counts_out.update(totals)
+
+            cap = MAX_FINDINGS_PER_CHECK
+            checks_over_cap = (
+                {cid for cid, n in totals.items() if n > cap} if cap > 0 else set()
+            )
+
+            # Use iterator with chunk_size for memory-efficient streaming.
+            # FindingOutput.transform_api_finding (prowler/lib/outputs/finding.py)
+            # reads finding.resources.first() and resource.tags.all() per
+            # finding, which without prefetch generates 2N queries per chunk.
+            # prefetch_related runs once per iterator chunk (Django >=4.1) and
+            # collapses that into a constant 2 extra queries per chunk.
+            if checks_over_cap:
+                # Two-step query so we can both cap rows per check AND attach
+                # prefetch_related on the streamed results:
+                #
+                #   1) ``ranked`` annotates every matching finding with a
+                #      per-check row number via a window function. The
+                #      partition keeps numbering independent per check, and
+                #      ordering by ``uid`` makes the "first N" selection
+                #      deterministic across runs (same scan → same rows).
+                #
+                #   2) The outer ``Finding.all_objects.filter(id__in=...)``
+                #      keeps only IDs whose row number is within the cap and
+                #      re-opens a plain queryset on it. Django cannot combine
+                #      ``Window`` annotations with ``prefetch_related`` on the
+                #      same queryset (the window is evaluated post-aggregation
+                #      and the prefetch loader fights with it), so the inner
+                #      SELECT becomes a subquery and the outer queryset is
+                #      free to prefetch resources/tags as usual.
+                #
+                # PostgreSQL only materialises
+                # ``cap * |checks_over_cap| + sum(uncapped)`` rows for the
+                # window step, vs the full table scan the previous path did.
+                ranked = base_qs.annotate(
+                    rn=Window(
+                        expression=RowNumber(),
+                        partition_by=[F("check_id")],
+                        order_by=F("uid").asc(),
+                    )
+                )
+                findings_queryset = (
+                    Finding.all_objects.filter(
+                        id__in=ranked.filter(rn__lte=cap).values("id")
+                    )
+                    .prefetch_related("resources", "resources__tags")
+                    .order_by("check_id", "uid")
+                    .iterator(chunk_size=DJANGO_FINDINGS_BATCH_SIZE)
+                )
+                logger.info(
+                    "Per-check cap=%d active for %d checks (max %d each); "
+                    "skipping transform for surplus rows",
+                    cap,
+                    len(checks_over_cap),
+                    cap,
+                )
+            else:
+                findings_queryset = (
+                    base_qs.prefetch_related("resources", "resources__tags")
+                    .order_by("check_id", "uid")
+                    .iterator(chunk_size=DJANGO_FINDINGS_BATCH_SIZE)
+                )

            # Pre-initialize empty lists for all check_ids to load
            # This avoids repeated dict lookups and 'if not in' checks
@@ -248,7 +340,11 @@ def _load_findings_for_requirement_checks(
                findings_count += 1

            logger.info(
-                f"Loaded {findings_count} findings for {len(check_ids_to_load)} checks"
+                "Loaded %d findings for %d checks (truncated %d checks total=%d)",
+                findings_count,
+                len(check_ids_to_load),
+                len(checks_over_cap),
+                sum(totals.values()),
            )

    # Build result dict using cache references (no data duplication)
@@ -258,3 +354,40 @@ def _load_findings_for_requirement_checks(
    }

    return result
+
+
+def _get_compliance_check_ids(compliance_obj) -> set[str]:
+    """Return the union of all check_ids referenced by a compliance framework.
+
+    Used by the master report orchestrator to know which checks each
+    framework consumes from the shared ``findings_cache``, so that once a
+    framework finishes the entries no other pending framework needs can be
+    evicted from the cache (PROWLER-1733).
+
+    Args:
+        compliance_obj: A loaded Compliance framework object exposing a
+            ``Requirements`` iterable, each requirement carrying ``Checks``.
+            ``None`` is treated as "no checks" rather than raising, so the
+            caller can pass ``frameworks_bulk.get(...)`` directly without
+            an extra existence check.
+
+    Returns:
+        Set of check_id strings (empty if ``compliance_obj`` is ``None``).
+    """
+    if compliance_obj is None:
+        return set()
+    checks: set[str] = set()
+    requirements = getattr(compliance_obj, "Requirements", None) or []
+    try:
+        # Defensive: Mock objects (used in unit tests) return another Mock
+        # for any attribute access, which is truthy but not iterable. Treat
+        # any non-iterable Requirements value as "no checks".
+        for req in requirements:
+            req_checks = getattr(req, "Checks", None) or []
+            try:
+                checks.update(req_checks)
+            except TypeError:
+                continue
+    except TypeError:
+        return set()
+    return checks
@@ -44,6 +44,8 @@ from api.models import (
    Finding,
    Resource,
    ResourceFindingMapping,
+    ResourceTag,
+    ResourceTagMapping,
    StateChoices,
    StatusChoices,
 )
@@ -367,6 +369,317 @@ class TestLoadFindingsForChecks:

        assert result == {}

+    def test_prefetch_avoids_n_plus_one(self, tenants_fixture, scans_fixture):
+        """Loading N findings must NOT execute O(N) extra queries for resources/tags.
+
+        Regression test for PROWLER-1733. ``FindingOutput.transform_api_finding``
+        reads ``finding.resources.first()`` and ``resource.tags.all()`` per
+        finding. Without ``prefetch_related`` that's 2N additional queries;
+        with prefetch it collapses to a small constant per iterator chunk.
+        """
+        from django.test.utils import CaptureQueriesContext
+        from django.db import connections
+
+        tenant = tenants_fixture[0]
+        scan = scans_fixture[0]
+
+        # Build N findings, each linked to one resource that owns 2 tags.
+        N = 20
+        for i in range(N):
+            finding = Finding.objects.create(
+                tenant_id=tenant.id,
+                scan=scan,
+                uid=f"f-prefetch-{i}",
+                check_id="aws_check_prefetch",
+                status=StatusChoices.FAIL,
+                severity=Severity.high,
+                impact=Severity.high,
+                check_metadata={
+                    "provider": "aws",
+                    "checkid": "aws_check_prefetch",
+                    "checktitle": "t",
+                    "checktype": [],
+                    "servicename": "s",
+                    "subservicename": "",
+                    "severity": "high",
+                    "resourcetype": "r",
+                    "description": "",
+                    "risk": "",
+                    "relatedurl": "",
+                    "remediation": {
+                        "recommendation": {"text": "", "url": ""},
+                        "code": {
+                            "nativeiac": "",
+                            "terraform": "",
+                            "cli": "",
+                            "other": "",
+                        },
+                    },
+                    "resourceidtemplate": "",
+                    "categories": [],
+                    "dependson": [],
+                    "relatedto": [],
+                    "notes": "",
+                },
+                raw_result={},
+            )
+            resource = Resource.objects.create(
+                tenant_id=tenant.id,
+                provider=scan.provider,
+                uid=f"r-prefetch-{i}",
+                name=f"r-prefetch-{i}",
+                metadata="{}",
+                details="",
+                region="us-east-1",
+                service="s",
+                type="t::r",
+            )
+            ResourceFindingMapping.objects.create(
+                tenant_id=tenant.id, finding=finding, resource=resource
+            )
+            for k in ("env", "owner"):
+                tag, _ = ResourceTag.objects.get_or_create(
+                    tenant_id=tenant.id, key=k, value=f"v-{i}-{k}"
+                )
+                ResourceTagMapping.objects.create(
+                    tenant_id=tenant.id, resource=resource, tag=tag
+                )
+
+        mock_provider = Mock()
+        mock_provider.type = "aws"
+        mock_provider.identity.account = "test"
+
+        # Patch transform_api_finding to a no-op so the test isolates queries
+        # to the queryset/prefetch path (transform itself is exercised by
+        # the integration tests above and not by this regression check).
+        with patch(
+            "tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
+            side_effect=lambda model, provider: Mock(check_id=model.check_id),
+        ):
+            with CaptureQueriesContext(
+                connections["default_read_replica"]
+                if "default_read_replica" in connections.databases
+                else connections["default"]
+            ) as ctx:
+                _load_findings_for_requirement_checks(
+                    str(tenant.id),
+                    str(scan.id),
+                    ["aws_check_prefetch"],
+                    mock_provider,
+                )
+
+        # Expected: a small constant number of queries irrespective of N.
+        # Pre-fix this would be ~1 + 2*N. We give some slack for RLS SET
+        # LOCAL statements that the rls_transaction emits.
+        assert len(ctx.captured_queries) < N, (
+            f"Expected O(1) queries with prefetch_related; got "
+            f"{len(ctx.captured_queries)} for N={N} (N+1 regression?)"
+        )
+
+    def test_max_findings_per_check_cap(self, tenants_fixture, scans_fixture):
+        """When a check exceeds ``MAX_FINDINGS_PER_CHECK``, only ``cap`` rows
+        are loaded AND ``total_counts_out`` reports the pre-cap total.
+
+        Guards the PROWLER-1733 truncation knob: prevents both runaway memory
+        and silent data loss in the PDF (the banner relies on knowing the
+        real total).
+        """
+        from unittest.mock import patch as _patch
+
+        tenant = tenants_fixture[0]
+        scan = scans_fixture[0]
+
+        # Create 12 findings for a single check; cap to 5.
+        check_id = "aws_check_cap_test"
+        for i in range(12):
+            finding = Finding.objects.create(
+                tenant_id=tenant.id,
+                scan=scan,
+                uid=f"f-cap-{i:02d}",
+                check_id=check_id,
+                status=StatusChoices.FAIL,
+                severity=Severity.high,
+                impact=Severity.high,
+                check_metadata={},
+                raw_result={},
+            )
+            resource = Resource.objects.create(
+                tenant_id=tenant.id,
+                provider=scan.provider,
+                uid=f"r-cap-{i:02d}",
+                name=f"r-cap-{i:02d}",
+                metadata="{}",
+                details="",
+                region="us-east-1",
+                service="s",
+                type="t::r",
+            )
+            ResourceFindingMapping.objects.create(
+                tenant_id=tenant.id, finding=finding, resource=resource
+            )
+
+        mock_provider = Mock(type="aws")
+        mock_provider.identity.account = "test"
+
+        totals: dict = {}
+        # Patch the cap to a small value AND skip the heavy transform so we
+        # only assert on row counts and totals.
+        with (
+            _patch("tasks.jobs.threatscore_utils.MAX_FINDINGS_PER_CHECK", 5),
+            _patch(
+                "tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
+                side_effect=lambda model, provider: Mock(check_id=model.check_id),
+            ),
+        ):
+            result = _load_findings_for_requirement_checks(
+                str(tenant.id),
+                str(scan.id),
+                [check_id],
+                mock_provider,
+                total_counts_out=totals,
+            )
+
+        assert len(result[check_id]) == 5, (
+            f"cap=5 should yield exactly 5 loaded findings, got {len(result[check_id])}"
+        )
+        assert totals[check_id] == 12, (
+            f"total_counts_out should report the pre-cap total (12), got {totals[check_id]}"
+        )
+
+    def test_only_failed_findings_pushes_down_to_sql(
+        self, tenants_fixture, scans_fixture
+    ):
+        """When ``only_failed_findings=True``, PASS rows are excluded by the
+        DB filter, not just visually hidden afterwards.
+
+        Regression for the consistency fix: previously the requirement-level
+        ``only_failed`` flag filtered which requirements appeared, but inside
+        each rendered requirement the table still showed PASS rows mixed
+        with FAIL, which combined with ``MAX_FINDINGS_PER_CHECK`` could
+        truncate to 1000 PASS findings and hide the actual failure.
+        """
+        from unittest.mock import patch as _patch
+
+        tenant = tenants_fixture[0]
+        scan = scans_fixture[0]
+        check_id = "aws_check_only_failed_test"
+
+        # Mix PASS and FAIL so the filter has something to drop.
+        for i in range(6):
+            status = StatusChoices.FAIL if i % 2 == 0 else StatusChoices.PASS
+            finding = Finding.objects.create(
+                tenant_id=tenant.id,
+                scan=scan,
+                uid=f"f-of-{i:02d}",
+                check_id=check_id,
+                status=status,
+                severity=Severity.high,
+                impact=Severity.high,
+                check_metadata={},
+                raw_result={},
+            )
+            resource = Resource.objects.create(
+                tenant_id=tenant.id,
+                provider=scan.provider,
+                uid=f"r-of-{i:02d}",
+                name=f"r-of-{i:02d}",
+                metadata="{}",
+                details="",
+                region="us-east-1",
+                service="s",
+                type="t::r",
+            )
+            ResourceFindingMapping.objects.create(
+                tenant_id=tenant.id, finding=finding, resource=resource
+            )
+
+        mock_provider = Mock(type="aws")
+        mock_provider.identity.account = "test"
+
+        totals: dict = {}
+        with _patch(
+            "tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
+            side_effect=lambda model, provider: Mock(
+                check_id=model.check_id, status=model.status
+            ),
+        ):
+            result = _load_findings_for_requirement_checks(
+                str(tenant.id),
+                str(scan.id),
+                [check_id],
+                mock_provider,
+                total_counts_out=totals,
+                only_failed_findings=True,
+            )
+
+        # 3 FAIL + 3 PASS in DB; FAIL-only filter should load just 3.
+        loaded = result[check_id]
+        assert len(loaded) == 3, f"expected 3 FAIL findings, got {len(loaded)}"
+        statuses = {getattr(f, "status", None) for f in loaded}
+        assert statuses == {StatusChoices.FAIL}, (
+            f"expected all loaded findings to be FAIL; got statuses {statuses}"
+        )
+        # total_counts must reflect the FAIL-only total, not the global total.
+        assert totals[check_id] == 3, (
+            f"total_counts should be FAIL-only (3), got {totals[check_id]}"
+        )
+
+    def test_max_findings_per_check_disabled(self, tenants_fixture, scans_fixture):
+        """``MAX_FINDINGS_PER_CHECK=0`` disables the cap; load all rows."""
+        from unittest.mock import patch as _patch
+
+        tenant = tenants_fixture[0]
+        scan = scans_fixture[0]
+
+        check_id = "aws_check_uncapped"
+        for i in range(8):
+            f = Finding.objects.create(
+                tenant_id=tenant.id,
+                scan=scan,
+                uid=f"f-unc-{i:02d}",
+                check_id=check_id,
+                status=StatusChoices.FAIL,
+                severity=Severity.high,
+                impact=Severity.high,
+                check_metadata={},
+                raw_result={},
+            )
+            r = Resource.objects.create(
+                tenant_id=tenant.id,
+                provider=scan.provider,
+                uid=f"r-unc-{i:02d}",
+                name=f"r-unc-{i:02d}",
+                metadata="{}",
+                details="",
+                region="us-east-1",
+                service="s",
+                type="t::r",
+            )
+            ResourceFindingMapping.objects.create(
+                tenant_id=tenant.id, finding=f, resource=r
+            )
+
+        mock_provider = Mock(type="aws")
+        mock_provider.identity.account = "test"
+        totals: dict = {}
+        with (
+            _patch("tasks.jobs.threatscore_utils.MAX_FINDINGS_PER_CHECK", 0),
+            _patch(
+                "tasks.jobs.threatscore_utils.FindingOutput.transform_api_finding",
+                side_effect=lambda model, provider: Mock(check_id=model.check_id),
+            ),
+        ):
+            result = _load_findings_for_requirement_checks(
+                str(tenant.id),
+                str(scan.id),
+                [check_id],
+                mock_provider,
+                total_counts_out=totals,
+            )
+
+        assert len(result[check_id]) == 8
+        assert totals[check_id] == 8
+

 class TestCleanupStaleTmpOutputDirectories:
    """Unit tests for opportunistic stale cleanup under tmp output root."""
@@ -855,6 +1168,181 @@ class TestGenerateComplianceReportsOptimized:
        assert result["cis"] == {"upload": False, "path": ""}
        mock_cis.assert_not_called()

+    @patch("api.utils.initialize_prowler_provider")
+    @patch("tasks.jobs.report.rmtree")
+    @patch("tasks.jobs.report._upload_to_s3")
+    @patch("tasks.jobs.report.generate_cis_report")
+    @patch("tasks.jobs.report.generate_csa_report")
+    @patch("tasks.jobs.report.generate_nis2_report")
+    @patch("tasks.jobs.report.generate_ens_report")
+    @patch("tasks.jobs.report.generate_threatscore_report")
+    @patch("tasks.jobs.report._generate_compliance_output_directory")
+    @patch("tasks.jobs.report._aggregate_requirement_statistics_from_database")
+    @patch("tasks.jobs.report.Compliance.get_bulk")
+    @patch("tasks.jobs.report.Provider.objects.get")
+    @patch("tasks.jobs.report.ScanSummary.objects.filter")
+    def test_findings_cache_eviction_after_framework(
+        self,
+        mock_scan_summary_filter,
+        mock_provider_get,
+        mock_get_bulk,
+        mock_aggregate_stats,
+        mock_generate_output_dir,
+        mock_threatscore,
+        mock_ens,
+        mock_nis2,
+        mock_csa,
+        mock_cis,
+        mock_upload_to_s3,
+        mock_rmtree,
+        mock_init_provider,
+    ):
+        """After each framework finishes, exclusive entries are evicted.
+
+        Threat scenario for PROWLER-1733: the shared ``findings_cache`` used
+        to grow monotonically through all 5 frameworks. With the new
+        eviction logic, check_ids only used by ThreatScore are dropped when
+        ThreatScore finishes, before ENS runs.
+        """
+        from types import SimpleNamespace
+        from tasks.jobs import report as report_mod
+
+        mock_scan_summary_filter.return_value.exists.return_value = True
+        mock_provider_get.return_value = Mock(uid="provider-uid", provider="aws")
+        # ThreatScore consumes {tsc_only, shared}; ENS consumes {ens_only,
+        # shared}. After ThreatScore evicts, tsc_only must be gone but
+        # shared and ens_only must remain.
+        mock_get_bulk.return_value = {
+            "prowler_threatscore_aws": SimpleNamespace(
+                Requirements=[SimpleNamespace(Checks=["tsc_only", "shared"])]
+            ),
+            "ens_rd2022_aws": SimpleNamespace(
+                Requirements=[SimpleNamespace(Checks=["ens_only", "shared"])]
+            ),
+        }
+        mock_aggregate_stats.return_value = {}
+        mock_generate_output_dir.return_value = "/tmp/tenant/scan/x/prowler-out"
+        mock_upload_to_s3.return_value = "s3://bucket/tenant/scan/x/report.pdf"
+        mock_init_provider.return_value = Mock(name="prowler_provider")
+
+        # Seed the cache as if both frameworks had already loaded their
+        # findings. We mutate it indirectly: each generator wrapper is a
+        # Mock: make ThreatScore populate the cache, and have ENS observe
+        # the state at call time so we can introspect post-eviction.
+        observed_state: dict = {}
+
+        def _threatscore_side_effect(**kwargs):
+            cache = kwargs["findings_cache"]
+            cache["tsc_only"] = ["tsc-finding"]
+            cache["shared"] = ["shared-finding"]
+
+        def _ens_side_effect(**kwargs):
+            # ENS runs AFTER threatscore's _evict_after_framework("threatscore").
+            observed_state["cache_keys_when_ens_runs"] = set(
+                kwargs["findings_cache"].keys()
+            )
+            kwargs["findings_cache"]["ens_only"] = ["ens-finding"]
+
+        mock_threatscore.side_effect = _threatscore_side_effect
+        mock_ens.side_effect = _ens_side_effect
+
+        report_mod.generate_compliance_reports(
+            tenant_id=str(uuid.uuid4()),
+            scan_id=str(uuid.uuid4()),
+            provider_id=str(uuid.uuid4()),
+            generate_threatscore=True,
+            generate_ens=True,
+            generate_nis2=False,
+            generate_csa=False,
+            generate_cis=False,
+        )
+
+        # ``tsc_only`` was exclusive to ThreatScore → evicted before ENS ran.
+        # ``shared`` is still pending for ENS → must remain.
+        assert "tsc_only" not in observed_state["cache_keys_when_ens_runs"], (
+            "tsc_only should have been evicted before ENS ran"
+        )
+        assert "shared" in observed_state["cache_keys_when_ens_runs"], (
+            "shared must remain in cache because ENS still needs it"
+        )
+
+    @patch("api.utils.initialize_prowler_provider")
+    @patch("tasks.jobs.report.rmtree")
+    @patch("tasks.jobs.report._upload_to_s3")
+    @patch("tasks.jobs.report.generate_cis_report")
+    @patch("tasks.jobs.report.generate_csa_report")
+    @patch("tasks.jobs.report.generate_nis2_report")
+    @patch("tasks.jobs.report.generate_ens_report")
+    @patch("tasks.jobs.report.generate_threatscore_report")
+    @patch("tasks.jobs.report._generate_compliance_output_directory")
+    @patch("tasks.jobs.report._aggregate_requirement_statistics_from_database")
+    @patch("tasks.jobs.report.Compliance.get_bulk")
+    @patch("tasks.jobs.report.Provider.objects.get")
+    @patch("tasks.jobs.report.ScanSummary.objects.filter")
+    def test_prowler_provider_initialized_once(
+        self,
+        mock_scan_summary_filter,
+        mock_provider_get,
+        mock_get_bulk,
+        mock_aggregate_stats,
+        mock_generate_output_dir,
+        mock_threatscore,
+        mock_ens,
+        mock_nis2,
+        mock_csa,
+        mock_cis,
+        mock_upload_to_s3,
+        mock_rmtree,
+        mock_init_provider,
+    ):
+        """``initialize_prowler_provider`` must be called exactly once for
+        the whole batch (PROWLER-1733). Previously each generator re-init'd
+        the SDK provider in ``_load_compliance_data`` → 5 inits per scan.
+        """
+        mock_scan_summary_filter.return_value.exists.return_value = True
+        mock_provider_get.return_value = Mock(uid="provider-uid", provider="aws")
+        # CIS variant discovery needs at least one cis_* key.
+        mock_get_bulk.return_value = {"cis_6.0_aws": Mock()}
+        mock_aggregate_stats.return_value = {}
+        mock_generate_output_dir.return_value = "/tmp/tenant/scan/x/prowler-out"
+        mock_upload_to_s3.return_value = "s3://bucket/tenant/scan/x/report.pdf"
+        mock_init_provider.return_value = Mock(name="prowler_provider")
+
+        generate_compliance_reports(
+            tenant_id=str(uuid.uuid4()),
+            scan_id=str(uuid.uuid4()),
+            provider_id=str(uuid.uuid4()),
+            generate_threatscore=True,
+            generate_ens=True,
+            generate_nis2=True,
+            generate_csa=True,
+            generate_cis=True,
+        )
+
+        # All 5 wrappers were invoked once each…
+        mock_threatscore.assert_called_once()
+        mock_ens.assert_called_once()
+        mock_nis2.assert_called_once()
+        mock_csa.assert_called_once()
+        mock_cis.assert_called_once()
+        # …but the SDK provider was initialized only once.
+        assert mock_init_provider.call_count == 1, (
+            f"expected 1 init, got {mock_init_provider.call_count} "
+            f"(prowler_provider must be shared across reports)"
+        )
+
+        # The shared instance must reach every wrapper as kwargs.
+        shared = mock_init_provider.return_value
+        for mock_wrapper in (
+            mock_threatscore,
+            mock_ens,
+            mock_nis2,
+            mock_csa,
+            mock_cis,
+        ):
+            _, call_kwargs = mock_wrapper.call_args
+            assert call_kwargs.get("prowler_provider") is shared
+
    @patch("tasks.jobs.report.rmtree")
    @patch("tasks.jobs.report._upload_to_s3")
    @patch("tasks.jobs.report.generate_threatscore_report")
@@ -1269,6 +1269,48 @@ class TestComponentEdgeCases:
        # Should be a LongTable for large datasets
        assert isinstance(table, LongTable)

+    def test_zebra_uses_rowbackgrounds_not_per_row_background(self, monkeypatch):
+        """The styles list must contain exactly one ROWBACKGROUNDS entry
+        regardless of row count, never N per-row BACKGROUND entries.
+        """
+        captured: dict = {}
+
+        # Capture the list passed to TableStyle. create_data_table builds a
+        # list of style tuples and wraps it in a TableStyle exactly once;
+        # by patching TableStyle we intercept that list.
+        import tasks.jobs.reports.components as comp_mod
+
+        original_table_style = comp_mod.TableStyle
+
+        def _capture_table_style(style_list):
+            captured["styles"] = list(style_list)
+            return original_table_style(style_list)
+
+        monkeypatch.setattr(comp_mod, "TableStyle", _capture_table_style)
+
+        data = [{"name": f"Item {i}"} for i in range(60)]
+        columns = [ColumnConfig("Name", 2 * inch, "name")]
+        comp_mod.create_data_table(data, columns, alternate_rows=True)
+
+        styles = captured["styles"]
+        # Count by command name.
+        names = [s[0] for s in styles if isinstance(s, tuple) and s]
+        # Exactly one ROWBACKGROUNDS entry.
+        assert names.count("ROWBACKGROUNDS") == 1
+        # Zero per-row BACKGROUND entries on data rows. (The header row
+        # BACKGROUND command is intentional and lives at coords (0,0)/(-1,0).)
+        data_row_bg = [
+            s
+            for s in styles
+            if isinstance(s, tuple)
+            and s[0] == "BACKGROUND"
+            and not (s[1] == (0, 0) and s[2] == (-1, 0))
+        ]
+        assert data_row_bg == [], (
+            f"expected no per-row BACKGROUND entries on data rows; "
+            f"got {len(data_row_bg)}"
+        )
+
    def test_create_risk_component_zero_values(self):
        """Test risk component with zero values."""
        component = create_risk_component(risk_level=0, weight=0, score=0)
@@ -1344,3 +1386,194 @@ class TestFrameworkConfigEdgeCases:
        assert get_framework_config("my_custom_threatscore_compliance") is not None
        assert get_framework_config("ens_something_else") is not None
        assert get_framework_config("nis2_gcp") is not None
+
+
+# =============================================================================
+# Findings Table Chunking Tests (PROWLER-1733)
+# =============================================================================
+#
+# These tests guard the OOM-prevention behaviour added in PROWLER-1733:
+# ``_create_findings_tables`` must split a list of findings into multiple
+# small sub-tables instead of producing one giant Table, which would force
+# ReportLab to resolve layout for all rows at once and OOM the worker on
+# scans with thousands of findings per check.
+
+
+class _DummyMetadata:
+    """Lightweight stand-in for FindingOutput.metadata used in chunking tests."""
+
+    def __init__(self, check_title: str = "Title", severity: str = "high"):
+        self.CheckTitle = check_title
+        self.Severity = severity
+
+
+class _DummyFinding:
+    """Lightweight stand-in for FindingOutput used in chunking tests.
+
+    The chunking code only reads a small set of attributes via ``getattr``,
+    so a duck-typed object is enough and lets the tests run without touching
+    the DB or pydantic deserialisation.
+    """
+
+    def __init__(
+        self,
+        check_id: str = "aws_check",
+        resource_name: str = "res-1",
+        resource_uid: str = "",
+        status: str = "FAIL",
+        region: str = "us-east-1",
+        with_metadata: bool = True,
+    ):
+        self.check_id = check_id
+        self.resource_name = resource_name
+        self.resource_uid = resource_uid
+        self.status = status
+        self.region = region
+        if with_metadata:
+            self.metadata = _DummyMetadata()
+        else:
+            self.metadata = None
+
+
+def _make_concrete_generator():
+    """Return a minimal concrete subclass of BaseComplianceReportGenerator."""
+
+    class _Concrete(BaseComplianceReportGenerator):
+        def create_executive_summary(self, data):
+            return []
+
+        def create_charts_section(self, data):
+            return []
+
+        def create_requirements_index(self, data):
+            return []
+
+    return _Concrete(FrameworkConfig(name="test", display_name="Test"))
+
+
+class TestFindingsTableChunking:
+    """Tests for ``_create_findings_tables`` (PROWLER-1733)."""
+
+    def test_chunking_produces_expected_number_of_subtables(self):
+        """5000 findings @ chunk_size=300 → 17 sub-tables + 16 spacers."""
+        generator = _make_concrete_generator()
+        findings = [_DummyFinding(check_id="c1") for _ in range(5000)]
+
+        flowables = generator._create_findings_tables(findings, chunk_size=300)
+
+        tables = [f for f in flowables if isinstance(f, (Table, LongTable))]
+        spacers = [f for f in flowables if isinstance(f, Spacer)]
+        # ceil(5000 / 300) == 17
+        assert len(tables) == 17
+        # Spacer between every pair of contiguous tables, not after the last
+        assert len(spacers) == 16
+
+    def test_chunk_size_param_overrides_default(self):
+        """250 findings @ chunk_size=100 → 3 sub-tables."""
+        generator = _make_concrete_generator()
+        findings = [_DummyFinding(check_id="c2") for _ in range(250)]
+
+        flowables = generator._create_findings_tables(findings, chunk_size=100)
+        tables = [f for f in flowables if isinstance(f, (Table, LongTable))]
+        assert len(tables) == 3
+
+    def test_empty_findings_returns_empty_list(self):
+        """No findings → no flowables. Callers can extend(...) safely."""
+        generator = _make_concrete_generator()
+        assert generator._create_findings_tables([]) == []
+
+    def test_single_chunk_has_no_spacer(self):
+        """A single sub-table must not emit a trailing spacer."""
+        generator = _make_concrete_generator()
+        findings = [_DummyFinding(check_id="c3") for _ in range(10)]
+
+        flowables = generator._create_findings_tables(findings, chunk_size=300)
+        assert len(flowables) == 1
+        assert isinstance(flowables[0], (Table, LongTable))
+
+    def test_malformed_finding_is_skipped(self):
+        """A broken finding must not abort the report; it is logged and skipped."""
+        generator = _make_concrete_generator()
+
+        class _Broken:
+            # No attributes at all; getattr() defaults will mostly cope, but
+            # we force an explicit error by making the metadata attribute
+            # itself raise on access.
+            @property
+            def metadata(self):
+                raise RuntimeError("boom")
+
+            check_id = "broken"
+
+        findings = [
+            _DummyFinding(check_id="c4"),
+            _Broken(),
+            _DummyFinding(check_id="c4"),
+        ]
+        flowables = generator._create_findings_tables(findings, chunk_size=300)
+        # Two good rows → one sub-table containing them; the broken one is
+        # logged and dropped, not propagated.
+        tables = [f for f in flowables if isinstance(f, (Table, LongTable))]
+        assert len(tables) == 1
+
+    def test_create_findings_table_alias_returns_first_chunk(self):
+        """The deprecated alias must keep returning a single Table flowable."""
+        generator = _make_concrete_generator()
+        findings = [_DummyFinding(check_id="c5") for _ in range(700)]
+
+        first = generator._create_findings_table(findings)
+        assert isinstance(first, (Table, LongTable))
+
+    def test_create_findings_table_alias_empty(self):
+        """Alias on empty input returns an empty (header-only) Table, not None."""
+        generator = _make_concrete_generator()
+        result = generator._create_findings_table([])
+        # The legacy alias never returned None; an empty header-only table
+        # is a strict superset of that contract.
+        assert isinstance(result, (Table, LongTable))
+
+
+# =============================================================================
+# Logging Context Manager Tests (PROWLER-1733)
+# =============================================================================
+
+
+class TestLogPhaseContextManager:
+    """Tests for ``_log_phase`` (PROWLER-1733).
+
+    The context manager emits structured ``phase_start`` / ``phase_end``
+    logs with ``scan_id``, ``framework`` and ``elapsed_s``, so Datadog/
+    CloudWatch queries can pivot by scan and find the slow section.
+    """
+
+    def test_emits_start_and_end_with_elapsed_and_rss(self, caplog):
+        from tasks.jobs.reports.base import _log_phase
+
+        caplog.set_level("INFO", logger="tasks.jobs.reports.base")
+        with _log_phase("unit_test_phase", scan_id="s-1", framework="Test FW"):
+            pass
+
+        messages = [r.getMessage() for r in caplog.records]
+        starts = [m for m in messages if "phase_start" in m]
+        ends = [m for m in messages if "phase_end" in m]
+
+        assert len(starts) == 1 and len(ends) == 1
+        assert "phase=unit_test_phase" in starts[0]
+        assert "scan_id=s-1" in starts[0]
+        assert "framework=Test FW" in starts[0]
+        assert "elapsed_s=" in ends[0]
+        assert "rss_kb=" in ends[0]
+        assert "delta_rss_kb=" in ends[0]
+
+    def test_failure_logs_phase_failed_and_reraises(self, caplog):
+        from tasks.jobs.reports.base import _log_phase
+
+        caplog.set_level("INFO", logger="tasks.jobs.reports.base")
+        with pytest.raises(RuntimeError, match="boom"):
+            with _log_phase("failing_phase", scan_id="s-2", framework="FW"):
+                raise RuntimeError("boom")
+
+        messages = [r.getMessage() for r in caplog.records]
+        assert any("phase_failed" in m and "failing_phase" in m for m in messages)
+        # No phase_end on the failure path.
+        assert not any("phase_end" in m for m in messages)
@@ -149,6 +149,14 @@ Prowler Cloud and App expose two formats:
 * **CSV report:** Every requirement, every check, and every finding for the selected scan and filters. Available for all supported frameworks.
 * **PDF report:** Curated executive-style report. Currently supported for Prowler ThreatScore, ENS RD2022, NIS2, and CSA CCM. Additional PDF reports are added in subsequent Prowler releases.

+<Note>
+**PDF detail section is capped at the first 100 failed findings per check.** The PDF is intended as an executive/auditor document, not a raw data dump: when a check produces more than 100 failed findings the report renders the first 100 and shows a banner pointing the reader to the CSV or JSON-OCSF export for the complete list. The compliance CSV and the scan outputs are never truncated.
+
+The cap is configurable per deployment via the `DJANGO_PDF_MAX_FINDINGS_PER_CHECK` environment variable on the Prowler API workers; set it to `0` to disable truncation entirely. The default value of `100` keeps the PDF readable and bounded in size on enterprise-scale scans (hundreds of thousands of findings) without affecting smaller scans, where the cap is rarely reached.
+
+Only **failed** findings are rendered in the detail section. PASS findings for the same check are excluded at query time. The PDF surfaces what needs attention, and the CSV/JSON exports surface everything for forensic review.
+</Note>
+
 #### Downloading From the Detail Page

 Inside any framework detail page, the **CSV** and **PDF** buttons in the header trigger the same downloads as the overview dropdown. The PDF button only appears for frameworks that support it.
Author	SHA1	Message	Date
pedrooot	619b38999b	chore(revision): resolve comments	2026-05-14 11:55:48 +02:00
pedrooot	19f53df019	chore(changelog): update ofr universal compliance	2026-05-13 17:35:21 +02:00
pedrooot	cd0f1d34c7	feat(reporting): bound PDF compliance report memory and CPU on large scans	2026-05-13 17:31:15 +02:00