feat(scans): Reset resource failed findings to 0 for ephemeral resources (#10929)

2026-05-06 08:47:18 +00:00 · 2026-04-29 19:08:16 +02:00
parent 59dcdb87c4
commit 85d38b5f71
5 changed files with 588 additions and 3 deletions
@@ -2,6 +2,14 @@

 All notable changes to the **Prowler API** are documented in this file.

+## [1.27.0] (Prowler UNRELEASED)
+
+### 🚀 Added
+
+- New `scan-reset-ephemeral-resources` post-scan task zeroes `failed_findings_count` for resources missing from the latest full-scope scan, keeping ephemeral resources from polluting the Resources page sort [(#10929)](https://github.com/prowler-cloud/prowler/pull/10929)
+
+---
+
 ## [1.26.1] (Prowler v5.25.1)

 ### 🐞 Fixed
@@ -595,10 +595,40 @@ class Scan(RowLevelSecurityProtectedModel):
    objects = ActiveProviderManager()
    all_objects = models.Manager()

+    _SCOPING_SCANNER_ARG_KEYS_CACHE: tuple[str, ...] | None = None
+
+    @classmethod
+    def get_scoping_scanner_arg_keys(cls) -> tuple[str, ...]:
+        """Return the scanner_args keys that mark a scan as scoped.
+
+        Derived from ``prowler.lib.scan.scan.Scan.__init__`` so the API stays
+        in sync with whatever the SDK actually accepts as filters. Cached at
+        class level — the signature is stable for the process lifetime.
+        """
+        if cls._SCOPING_SCANNER_ARG_KEYS_CACHE is None:
+            import inspect
+
+            from prowler.lib.scan.scan import Scan as ProwlerScan
+
+            params = inspect.signature(ProwlerScan.__init__).parameters
+            cls._SCOPING_SCANNER_ARG_KEYS_CACHE = tuple(
+                name for name in params if name not in ("self", "provider")
+            )
+        return cls._SCOPING_SCANNER_ARG_KEYS_CACHE
+
    class TriggerChoices(models.TextChoices):
        SCHEDULED = "scheduled", _("Scheduled")
        MANUAL = "manual", _("Manual")

+    # Trigger values for scans that ran the SDK end-to-end. Imported scans (or
+    # any future trigger) are intentionally NOT in this set — they may carry
+    # only a partial slice of resources, so post-scan logic that depends on a
+    # full-scope sweep (e.g. resetting ephemeral resource findings) must skip
+    # them by default.
+    LIVE_SCAN_TRIGGERS = frozenset(
+        (TriggerChoices.SCHEDULED.value, TriggerChoices.MANUAL.value)
+    )
+
    id = models.UUIDField(primary_key=True, default=uuid7, editable=False)
    name = models.CharField(
        blank=True, null=True, max_length=100, validators=[MinLengthValidator(3)]
@@ -681,6 +711,24 @@ class Scan(RowLevelSecurityProtectedModel):
    class JSONAPIMeta:
        resource_name = "scans"

+    def is_full_scope(self) -> bool:
+        """Return True if this scan ran with no scoping filters at all.
+
+        Used to gate post-scan operations (such as resetting the
+        failed_findings_count of resources missing from the scan) that are only
+        safe when the scan covered every check, service, and category. Imported
+        scans are NOT full-scope by definition — they may carry only a partial
+        slice of resources, so they're rejected via ``trigger`` even before the
+        scanner_args check.
+        """
+        if self.trigger not in self.LIVE_SCAN_TRIGGERS:
+            return False
+        scanner_args = self.scanner_args or {}
+        for key in self.get_scoping_scanner_arg_keys():
+            if scanner_args.get(key):
+                return False
+        return True
+

 class AttackPathsScan(RowLevelSecurityProtectedModel):
    objects = ActiveProviderManager()
@@ -10,16 +10,29 @@ from typing import Any

 import sentry_sdk
 from celery.utils.log import get_task_logger
+from config.django.base import DJANGO_FINDINGS_BATCH_SIZE
 from config.env import env
 from config.settings.celery import CELERY_DEADLOCK_ATTEMPTS
 from django.db import IntegrityError, OperationalError
-from django.db.models import Case, Count, IntegerField, Max, Min, Prefetch, Q, Sum, When
+from django.db.models import (
+    Case,
+    Count,
+    Exists,
+    IntegerField,
+    Max,
+    Min,
+    OuterRef,
+    Prefetch,
+    Q,
+    Sum,
+    When,
+)
 from django.utils import timezone as django_timezone
 from tasks.jobs.queries import (
    COMPLIANCE_UPSERT_PROVIDER_SCORE_SQL,
    COMPLIANCE_UPSERT_TENANT_SUMMARY_SQL,
 )
-from tasks.utils import CustomEncoder
+from tasks.utils import CustomEncoder, batched

 from api.compliance import PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE
 from api.constants import SEVERITY_ORDER
@@ -2069,3 +2082,169 @@ def aggregate_finding_group_summaries(tenant_id: str, scan_id: str):
        "created": created_count,
        "updated": updated_count,
    }
+
+
+def reset_ephemeral_resource_findings_count(tenant_id: str, scan_id: str) -> dict:
+    """Zero failed_findings_count for resources missing from a completed full-scope scan.
+
+    Resources that exist in the database for the scan's provider but were not
+    touched by this scan are treated as ephemeral. We keep their historical
+    findings, but reset the denormalized counter that drives the Resources page
+    sort so they stop ranking at the top.
+
+    Skipped (no-op) when:
+        - The scan is not in COMPLETED state.
+        - The scan ran with any scoping filter in scanner_args (partial scope).
+
+    Query design (must scale to 500k+ resources per provider):
+        Phase 1 — collect ephemeral IDs with one anti-join read.
+            Outer filter ``(tenant_id, provider_id, failed_findings_count > 0)``
+            uses ``resources_tenant_provider_idx``. The correlated
+            ``NOT EXISTS`` subquery hits the implicit unique index
+            ``(tenant_id, scan_id, resource_id)`` on ``ResourceScanSummary``.
+            ``NOT EXISTS`` (vs ``NOT IN``) is null-safe and lets the planner
+            choose between hash anti-join and indexed nested-loop anti-join.
+            ``.iterator(chunk_size=...)`` skips the queryset cache so memory
+            stays bounded while streaming UUIDs.
+        Phase 2 — UPDATE in fixed-size batches.
+            One large UPDATE would hold row-exclusive locks for seconds and
+            create a WAL spike. Batched UPDATEs by ``id__in`` (~1k rows each)
+            hit the primary key, keep each lock window ~50ms, bound WAL chunks,
+            and let other writers proceed between batches.
+            ``failed_findings_count__gt=0`` in the UPDATE is idempotent under
+            concurrent scans and skips no-op rewrites.
+        Reads use the primary DB, not the replica: ``ResourceScanSummary`` rows
+        were written by the same scan task that triggered this one, so replica
+        lag could falsely classify scanned resources as ephemeral.
+
+        Scope detection (``Scan.is_full_scope()``) derives the set of scoping
+        scanner_args from ``prowler.lib.scan.scan.Scan.__init__`` via
+        introspection, so the API can never drift from the SDK's filter
+        contract. Imported scans are also rejected by trigger — they may only
+        cover a partial slice of resources.
+    """
+    with rls_transaction(tenant_id):
+        scan = Scan.objects.filter(tenant_id=tenant_id, id=scan_id).first()
+
+    if scan is None:
+        logger.warning(f"Scan {scan_id} not found")
+        return {"status": "skipped", "reason": "scan not found"}
+
+    if scan.state != StateChoices.COMPLETED:
+        logger.info(f"Scan {scan_id} not completed; skipping ephemeral reset")
+        return {"status": "skipped", "reason": "scan not completed"}
+
+    if not scan.is_full_scope():
+        logger.info(
+            f"Scan {scan_id} ran with scoping filters; skipping ephemeral reset"
+        )
+        return {"status": "skipped", "reason": "partial scan scope"}
+
+    # Race protection: if a newer completed full-scope scan exists for this
+    # provider, our ResourceScanSummary set is stale relative to the resources'
+    # current failed_findings_count values (which the newer scan already
+    # refreshed). Wiping based on the older scan would zero counts the newer
+    # scan just set. Skip and let the newer scan's reset task do the work; if
+    # this task was delayed in the queue, that's the correct outcome.
+    # `completed_at__isnull=False` is required: Postgres orders NULL first in
+    # DESC, so a sibling COMPLETED scan with a missing completed_at would sort
+    # as "newest" and incorrectly cause us to skip.
+    with rls_transaction(tenant_id):
+        latest_full_scope_scan_id = (
+            Scan.objects.filter(
+                tenant_id=tenant_id,
+                provider_id=scan.provider_id,
+                state=StateChoices.COMPLETED,
+                completed_at__isnull=False,
+            )
+            .order_by("-completed_at", "-inserted_at")
+            .values_list("id", flat=True)
+            .first()
+        )
+    if latest_full_scope_scan_id != scan.id:
+        logger.info(
+            f"Scan {scan_id} is not the latest completed scan for provider "
+            f"{scan.provider_id}; skipping ephemeral reset"
+        )
+        return {"status": "skipped", "reason": "newer scan exists"}
+
+    # Defensive gate: ResourceScanSummary rows are written by perform_prowler_scan
+    # via best-effort bulk_create. If those writes failed silently (or the scan
+    # genuinely produced resources but no summaries were persisted), the
+    # ~Exists(in_scan) anti-join below would classify EVERY resource for this
+    # provider as ephemeral and zero their counts. Bail loudly instead.
+    with rls_transaction(tenant_id):
+        summaries_present = ResourceScanSummary.objects.filter(
+            tenant_id=tenant_id, scan_id=scan_id
+        ).exists()
+    if scan.unique_resource_count > 0 and not summaries_present:
+        logger.error(
+            f"Scan {scan_id} reports {scan.unique_resource_count} unique "
+            f"resources but no ResourceScanSummary rows are persisted; "
+            f"skipping ephemeral reset to avoid wiping valid counts"
+        )
+        return {"status": "skipped", "reason": "summaries missing"}
+
+    # Stays on the primary DB intentionally. ResourceScanSummary rows are
+    # written by perform_prowler_scan in the same chain that triggered this
+    # task, so replica lag could return an empty/partial summary set; a stale
+    # read here would classify every Resource as ephemeral and wipe valid
+    # failed_findings_count values on the primary. Same rationale as
+    # update_provider_compliance_scores below in this module.
+    # Materializing the ID list (rather than streaming the iterator into
+    # batched UPDATEs) is intentional: it lets the UPDATEs run in their own
+    # short rls_transactions instead of one long transaction holding row locks
+    # on every batch. At 500k UUIDs the peak memory is ~40 MB — acceptable for
+    # a Celery worker — and is the better trade-off versus a multi-second
+    # write-lock window blocking concurrent scans.
+    with rls_transaction(tenant_id):
+        in_scan = ResourceScanSummary.objects.filter(
+            tenant_id=tenant_id,
+            scan_id=scan_id,
+            resource_id=OuterRef("pk"),
+        )
+        ephemeral_ids = list(
+            Resource.objects.filter(
+                tenant_id=tenant_id,
+                provider_id=scan.provider_id,
+                failed_findings_count__gt=0,
+            )
+            .filter(~Exists(in_scan))
+            .values_list("id", flat=True)
+            .iterator(chunk_size=DJANGO_FINDINGS_BATCH_SIZE)
+        )
+
+    if not ephemeral_ids:
+        logger.info(f"No ephemeral resources for scan {scan_id}")
+        return {
+            "status": "completed",
+            "scan_id": str(scan_id),
+            "provider_id": str(scan.provider_id),
+            "reset": 0,
+        }
+
+    total_updated = 0
+    for batch, _ in batched(ephemeral_ids, DJANGO_FINDINGS_BATCH_SIZE):
+        # batched() always yields a final tuple, which is empty when the input
+        # length is an exact multiple of the batch size. Skip it so we don't
+        # issue a no-op UPDATE ... WHERE id IN ().
+        if not batch:
+            continue
+        with rls_transaction(tenant_id):
+            total_updated += Resource.objects.filter(
+                tenant_id=tenant_id,
+                id__in=batch,
+                failed_findings_count__gt=0,
+            ).update(failed_findings_count=0)
+
+    logger.info(
+        f"Ephemeral resource reset for scan {scan_id}: "
+        f"{total_updated} resources zeroed for provider {scan.provider_id}"
+    )
+
+    return {
+        "status": "completed",
+        "scan_id": str(scan_id),
+        "provider_id": str(scan.provider_id),
+        "reset": total_updated,
+    }
@@ -58,6 +58,7 @@ from tasks.jobs.scan import (
    aggregate_findings,
    create_compliance_requirements,
    perform_prowler_scan,
+    reset_ephemeral_resource_findings_count,
    update_provider_compliance_scores,
 )
 from tasks.utils import (
@@ -77,6 +78,7 @@ from prowler.lib.check.compliance_models import Compliance
 from prowler.lib.outputs.compliance.generic.generic import GenericCompliance
 from prowler.lib.outputs.finding import Finding as FindingOutput

+
 logger = get_task_logger(__name__)


@@ -158,6 +160,13 @@ def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str)
            generate_outputs_task.si(
                scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
            ),
+            # post-scan task — runs in the parallel group so a
+            # failure cannot cascade into reports or integrations. Its only
+            # prerequisite is that perform_prowler_scan has committed
+            # ResourceScanSummary, which is true by the time this chain fires.
+            reset_ephemeral_resource_findings_count_task.si(
+                tenant_id=tenant_id, scan_id=scan_id
+            ),
        ),
        group(
            # Use optimized task that generates both reports with shared queries
@@ -393,7 +402,8 @@ class AttackPathsScanRLSTask(RLSTask):
    SDK initialization, or Neo4j configuration errors during setup).
    """

-    def on_failure(self, exc, task_id, args, kwargs, _einfo):
+    def on_failure(self, exc, task_id, args, kwargs, _einfo):  # noqa: ARG002
+        del args  # Required by Celery's Task.on_failure signature; not used.
        tenant_id = kwargs.get("tenant_id")
        scan_id = kwargs.get("scan_id")

@@ -790,6 +800,32 @@ def aggregate_daily_severity_task(tenant_id: str, scan_id: str):
    return aggregate_daily_severity(tenant_id=tenant_id, scan_id=scan_id)


+@shared_task(name="scan-reset-ephemeral-resources", queue="overview")
+@handle_provider_deletion
+def reset_ephemeral_resource_findings_count_task(tenant_id: str, scan_id: str):
+    """Reset failed_findings_count for resources missing from a completed full-scope scan.
+
+    Failures are swallowed and returned as a status: this task lives inside the
+    post-scan group, and Celery propagates group-member exceptions into the next
+    chain step — meaning a crash here would block compliance reports and
+    integrations. The reset is purely cosmetic (UI sort optimization), so a
+    bad run is logged and absorbed rather than allowed to cascade.
+    """
+    try:
+        return reset_ephemeral_resource_findings_count(
+            tenant_id=tenant_id, scan_id=scan_id
+        )
+    except Exception as exc:  # noqa: BLE001 — intentionally broad
+        logger.exception(
+            f"reset_ephemeral_resource_findings_count failed for scan {scan_id}: {exc}"
+        )
+        return {
+            "status": "failed",
+            "scan_id": str(scan_id),
+            "reason": str(exc),
+        }
+
+
@shared_task(base=RLSTask, name="scan-finding-group-summaries", queue="overview")
@set_tenant(keep_tenant=True)
@handle_provider_deletion
@@ -24,6 +24,7 @@ from tasks.jobs.scan import (
    aggregate_findings,
    create_compliance_requirements,
    perform_prowler_scan,
+    reset_ephemeral_resource_findings_count,
    update_provider_compliance_scores,
 )
 from tasks.utils import CustomEncoder
@@ -35,6 +36,7 @@ from api.models import (
    MuteRule,
    Provider,
    Resource,
+    ResourceScanSummary,
    Scan,
    ScanSummary,
    StateChoices,
@@ -4335,3 +4337,315 @@ class TestUpdateProviderComplianceScores:
        assert any("provider_compliance_scores" in c for c in calls)
        assert any("tenant_compliance_summaries" in c for c in calls)
        assert any("pg_advisory_xact_lock" in c for c in calls)
+
+
+class TestScanIsFullScope:
+    def _live_trigger(self):
+        return Scan.TriggerChoices.MANUAL
+
+    @pytest.mark.parametrize(
+        "scanner_args",
+        [
+            {},
+            {"unrelated": "value"},
+            {"checks": None},
+            {"services": []},
+            {"severities": ""},
+        ],
+    )
+    def test_full_scope_when_no_filters_present(self, scanner_args):
+        scan = Scan(scanner_args=scanner_args, trigger=self._live_trigger())
+        assert scan.is_full_scope() is True
+
+    def test_full_scope_covers_every_sdk_kwarg(self):
+        # Lock the predicate to whatever ProwlerScan's __init__ exposes today.
+        # If the SDK adds a new filter, this test still passes via the
+        # introspection-driven derivation; if it adds a non-filter kwarg
+        # (e.g. provider-like), keep the exclusion list in sync in models.py.
+        from prowler.lib.scan.scan import Scan as ProwlerScan
+        import inspect
+
+        expected = tuple(
+            name
+            for name in inspect.signature(ProwlerScan.__init__).parameters
+            if name not in ("self", "provider")
+        )
+        assert Scan.get_scoping_scanner_arg_keys() == expected
+        # Spot-check a few well-known filters survive the introspection.
+        assert "checks" in expected
+        assert "services" in expected
+        assert "severities" in expected
+
+    def test_partial_scope_for_each_sdk_filter(self):
+        for key in Scan.get_scoping_scanner_arg_keys():
+            scan = Scan(scanner_args={key: ["x"]}, trigger=self._live_trigger())
+            assert scan.is_full_scope() is False, f"{key} should mark scan as partial"
+
+    def test_imported_scan_is_never_full_scope(self):
+        # Forward-defensive: any trigger outside LIVE_SCAN_TRIGGERS (e.g. a
+        # future "imported" trigger) must never qualify, even with empty args.
+        scan = Scan(scanner_args={}, trigger="imported")
+        assert scan.is_full_scope() is False
+
+    def test_handles_none_scanner_args(self):
+        scan = Scan(scanner_args=None, trigger=self._live_trigger())
+        assert scan.is_full_scope() is True
+
+
+@pytest.mark.django_db
+class TestResetEphemeralResourceFindingsCount:
+    def _make_scan_summary(self, tenant_id, scan_id, resource):
+        return ResourceScanSummary.objects.create(
+            tenant_id=tenant_id,
+            scan_id=scan_id,
+            resource_id=resource.id,
+            service=resource.service,
+            region=resource.region,
+            resource_type=resource.type,
+        )
+
+    def test_resets_only_resources_missing_from_full_scope_scan(
+        self, tenants_fixture, scans_fixture, providers_fixture, resources_fixture
+    ):
+        tenant, *_ = tenants_fixture
+        scan1, scan2, *_ = scans_fixture
+        resource1, resource2, resource3 = resources_fixture
+
+        Resource.objects.filter(id=resource1.id).update(failed_findings_count=3)
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=5)
+        Resource.objects.filter(id=resource3.id).update(failed_findings_count=7)
+
+        # Only resource1 was scanned in scan1; resource2 is ephemeral.
+        self._make_scan_summary(tenant.id, scan1.id, resource1)
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "completed"
+        assert result["reset"] == 1
+
+        resource1.refresh_from_db()
+        resource2.refresh_from_db()
+        resource3.refresh_from_db()
+
+        assert resource1.failed_findings_count == 3
+        assert resource2.failed_findings_count == 0
+        # Other provider's resource is never touched.
+        assert resource3.failed_findings_count == 7
+
+    def test_skips_when_scan_not_completed(
+        self, tenants_fixture, scans_fixture, resources_fixture
+    ):
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        resource1, resource2, _ = resources_fixture
+
+        Scan.objects.filter(id=scan1.id).update(state=StateChoices.EXECUTING)
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=5)
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "skipped"
+        assert result["reason"] == "scan not completed"
+
+        resource2.refresh_from_db()
+        assert resource2.failed_findings_count == 5
+
+    def test_skips_when_scan_has_scoping_filters(
+        self, tenants_fixture, scans_fixture, resources_fixture
+    ):
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        _, resource2, _ = resources_fixture
+
+        Scan.objects.filter(id=scan1.id).update(scanner_args={"checks": ["check1"]})
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=5)
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "skipped"
+        assert result["reason"] == "partial scan scope"
+
+        resource2.refresh_from_db()
+        assert resource2.failed_findings_count == 5
+
+    def test_skips_when_scan_not_found(self, tenants_fixture):
+        tenant, *_ = tenants_fixture
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(uuid.uuid4())
+        )
+
+        assert result["status"] == "skipped"
+        assert result["reason"] == "scan not found"
+
+    def test_skips_when_newer_scan_completed_for_same_provider(
+        self, tenants_fixture, scans_fixture, providers_fixture, resources_fixture
+    ):
+        # If a newer completed scan exists for the same provider, our
+        # ResourceScanSummary set is stale relative to the resources' current
+        # counts, and applying the diff would corrupt them.
+        from datetime import timedelta
+
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        provider, *_ = providers_fixture
+        _, resource2, _ = resources_fixture
+
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=5)
+
+        # Create a newer COMPLETED scan for the same provider, with an
+        # explicit completed_at strictly after scan1's so ordering is
+        # deterministic regardless of clock resolution.
+        newer_completed_at = scan1.completed_at + timedelta(minutes=5)
+        Scan.objects.create(
+            name="Newer Scan",
+            provider=provider,
+            trigger=Scan.TriggerChoices.MANUAL,
+            state=StateChoices.COMPLETED,
+            tenant_id=tenant.id,
+            started_at=newer_completed_at,
+            completed_at=newer_completed_at,
+        )
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "skipped"
+        assert result["reason"] == "newer scan exists"
+
+        resource2.refresh_from_db()
+        assert resource2.failed_findings_count == 5
+
+    def test_does_not_touch_other_providers_resources(
+        self, tenants_fixture, scans_fixture, providers_fixture, resources_fixture
+    ):
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        _, _, resource3 = resources_fixture
+
+        # resource3 belongs to provider2 with failed_findings_count > 0 and is
+        # not in scan1's summary. It MUST NOT be reset.
+        Resource.objects.filter(id=resource3.id).update(failed_findings_count=9)
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "completed"
+        assert result["reset"] == 0
+
+        resource3.refresh_from_db()
+        assert resource3.failed_findings_count == 9
+
+    def test_resources_already_zero_are_not_rewritten(
+        self, tenants_fixture, scans_fixture, resources_fixture
+    ):
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        resource1, resource2, _ = resources_fixture
+
+        # Both resources already at 0, neither in summary -> nothing to update.
+        Resource.objects.filter(id=resource1.id).update(failed_findings_count=0)
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=0)
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "completed"
+        assert result["reset"] == 0
+
+    def test_skips_when_summaries_missing_for_scan_with_resources(
+        self, tenants_fixture, scans_fixture, resources_fixture
+    ):
+        # Catastrophic guard: if a scan reports unique_resource_count > 0 but
+        # no ResourceScanSummary rows are persisted (e.g. bulk_create silently
+        # failed), the anti-join would classify EVERY resource as ephemeral
+        # and zero their counts. The gate must skip and preserve the data.
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        resource1, resource2, _ = resources_fixture
+
+        Scan.objects.filter(id=scan1.id).update(unique_resource_count=10)
+        Resource.objects.filter(id=resource1.id).update(failed_findings_count=3)
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=5)
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "skipped"
+        assert result["reason"] == "summaries missing"
+
+        resource1.refresh_from_db()
+        resource2.refresh_from_db()
+        assert resource1.failed_findings_count == 3
+        assert resource2.failed_findings_count == 5
+
+    def test_ignores_sibling_scan_with_null_completed_at(
+        self, tenants_fixture, scans_fixture, providers_fixture, resources_fixture
+    ):
+        # Postgres orders NULL first in DESC; a sibling COMPLETED scan with a
+        # missing completed_at must not be treated as the latest scan and
+        # cause us to incorrectly skip the reset.
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        provider, *_ = providers_fixture
+        resource1, resource2, _ = resources_fixture
+
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=5)
+        self._make_scan_summary(tenant.id, scan1.id, resource1)
+
+        Scan.objects.create(
+            name="Ghost Scan",
+            provider=provider,
+            trigger=Scan.TriggerChoices.MANUAL,
+            state=StateChoices.COMPLETED,
+            tenant_id=tenant.id,
+            started_at=scan1.completed_at,
+            completed_at=None,
+        )
+
+        result = reset_ephemeral_resource_findings_count(
+            tenant_id=str(tenant.id), scan_id=str(scan1.id)
+        )
+
+        assert result["status"] == "completed"
+        assert result["reset"] == 1
+
+        resource2.refresh_from_db()
+        assert resource2.failed_findings_count == 0
+
+    def test_batches_updates_when_many_ephemeral_resources(
+        self, tenants_fixture, scans_fixture, resources_fixture
+    ):
+        # Forces multiple batches to confirm the chunked UPDATE path executes
+        # cleanly and the count is the sum across batches.
+        tenant, *_ = tenants_fixture
+        scan1, *_ = scans_fixture
+        resource1, resource2, _ = resources_fixture
+
+        Resource.objects.filter(id=resource1.id).update(failed_findings_count=2)
+        Resource.objects.filter(id=resource2.id).update(failed_findings_count=4)
+
+        # No ResourceScanSummary -> both resource1 and resource2 are ephemeral.
+        # Force a 1-row batch via the shared findings batch size knob.
+        with patch("tasks.jobs.scan.DJANGO_FINDINGS_BATCH_SIZE", 1):
+            result = reset_ephemeral_resource_findings_count(
+                tenant_id=str(tenant.id), scan_id=str(scan1.id)
+            )
+
+        assert result["status"] == "completed"
+        assert result["reset"] == 2
+
+        resource1.refresh_from_db()
+        resource2.refresh_from_db()
+        assert resource1.failed_findings_count == 0
+        assert resource2.failed_findings_count == 0