fix(scan): implement temporary workaround to skip findings with UID exceeding 300 characters (#9246)

2025-12-19 05:17:47 +00:00 · 2025-11-17 13:15:02 +01:00
parent 1ea0dabf42
commit d43455971b
3 changed files with 131 additions and 1 deletions
--- a/api/CHANGELOG.md
+++ b/api/CHANGELOG.md
@@ -21,6 +21,9 @@ All notable changes to the **Prowler API** are documented in this file.
 ### Changed
 - Optimized database write queries for scan related tasks [(#9190)](https://github.com/prowler-cloud/prowler/pull/9190)

+### Fixed
+- Scans no longer fail when findings have UIDs exceeding 300 characters; such findings are now skipped with detailed logging [(#9246)](https://github.com/prowler-cloud/prowler/pull/9246)
+
 ### Security
 - Django updated to the latest 5.1 security release, 5.1.14, due to problems with potential [SQL injection](https://github.com/prowler-cloud/prowler/security/dependabot/113) and [denial-of-service vulnerability](https://github.com/prowler-cloud/prowler/security/dependabot/114) [(#9176)](https://github.com/prowler-cloud/prowler/pull/9176)

--- a/api/src/backend/tasks/jobs/scan.py
+++ b/api/src/backend/tasks/jobs/scan.py
@@ -383,9 +383,13 @@ def _process_finding_micro_batch(
    mappings_to_create = []
    dirty_resources = {}
    resource_denormalized_data = []  # (finding_instance, resource_instance) pairs
+    skipped_findings_count = 0  # Track findings skipped due to UID length

    # Prefetch last statuses for all findings in this batch
-    finding_uids = [f.uid for f in findings_batch if f is not None]
+    # TEMPORARY WORKAROUND: Filter out UIDs > 300 chars to avoid query errors
+    finding_uids = [
+        f.uid for f in findings_batch if f is not None and len(f.uid) <= 300
+    ]
    with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
        last_statuses = {
            item["uid"]: (item["status"], item["first_seen_at"])
@@ -485,6 +489,20 @@ def _process_finding_micro_batch(

        # Prepare finding data
        finding_uid = finding.uid
+
+        # TEMPORARY WORKAROUND: Skip findings with UID > 300 chars
+        # TODO: Remove this after implementing text field migration for finding.uid
+        if len(finding_uid) > 300:
+            skipped_findings_count += 1
+            logger.warning(
+                f"Skipping finding with UID exceeding 300 characters. "
+                f"Length: {len(finding_uid)}, "
+                f"Check: {finding.check_id}, "
+                f"Resource: {finding.resource_name}, "
+                f"UID: {finding_uid}"
+            )
+            continue
+
        last_status, last_first_seen_at = last_status_cache.get(
            finding_uid, (None, None)
        )
@@ -606,6 +624,13 @@ def _process_finding_micro_batch(
            batch_size=1000,
        )

+    # Log skipped findings summary
+    if skipped_findings_count > 0:
+        logger.warning(
+            f"Scan {scan_instance.id}: Skipped {skipped_findings_count} finding(s) "
+            f"due to UID length exceeding 300 characters in this micro-batch."
+        )
+

 def perform_prowler_scan(
    tenant_id: str,
--- a/api/src/backend/tasks/tests/test_scan.py
+++ b/api/src/backend/tasks/tests/test_scan.py
@@ -1543,6 +1543,108 @@ class TestProcessFindingMicroBatch:
        assert resource_cache[finding.resource_uid].service == finding.service_name
        assert tag_cache.keys() == {("team", "devsec")}

+    def test_process_finding_micro_batch_skips_long_uid(
+        self, tenants_fixture, scans_fixture
+    ):
+        """Test that findings with UID > 300 chars are skipped (temporary workaround)."""
+        tenant = tenants_fixture[0]
+        scan = scans_fixture[0]
+        provider = scan.provider
+
+        # Create a finding with UID > 300 chars
+        long_uid = (
+            "prowler-aws-ec2_instance_public_ip-123456789012-us-east-1-" + "x" * 250
+        )
+        assert len(long_uid) > 300
+
+        finding_with_long_uid = FakeFinding(
+            uid=long_uid,
+            status=StatusChoices.FAIL,
+            status_extended="public instance",
+            severity=Severity.high,
+            check_id="ec2_instance_public_ip",
+            resource_uid="arn:aws:ec2:us-east-1:123456789012:instance/i-long",
+            resource_name="i-long-uid-instance",
+            region="us-east-1",
+            service_name="ec2",
+            resource_type="instance",
+            resource_tags={},
+            resource_metadata={},
+            resource_details={},
+            partition="aws",
+            raw={},
+            compliance={},
+            metadata={},
+            muted=False,
+        )
+
+        # Create a normal finding that should be processed
+        normal_finding = FakeFinding(
+            uid="finding-normal",
+            status=StatusChoices.PASS,
+            status_extended="all good",
+            severity=Severity.low,
+            check_id="s3_bucket_encryption",
+            resource_uid="arn:aws:s3:::bucket-normal",
+            resource_name="bucket-normal",
+            region="us-east-1",
+            service_name="s3",
+            resource_type="bucket",
+            resource_tags={},
+            resource_metadata={},
+            resource_details={},
+            partition="aws",
+            raw={},
+            compliance={},
+            metadata={},
+            muted=False,
+        )
+
+        resource_cache = {}
+        tag_cache = {}
+        last_status_cache = {}
+        resource_failed_findings_cache = {}
+        unique_resources: set[tuple[str, str]] = set()
+        scan_resource_cache: set[tuple[str, str, str, str]] = set()
+        mute_rules_cache = {}
+
+        with (
+            patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction),
+            patch("api.db_utils.rls_transaction", new=noop_rls_transaction),
+            patch("tasks.jobs.scan.logger") as mock_logger,
+        ):
+            _process_finding_micro_batch(
+                str(tenant.id),
+                [finding_with_long_uid, normal_finding],
+                scan,
+                provider,
+                resource_cache,
+                tag_cache,
+                last_status_cache,
+                resource_failed_findings_cache,
+                unique_resources,
+                scan_resource_cache,
+                mute_rules_cache,
+            )
+
+        # Verify the long UID finding was NOT created
+        assert not Finding.objects.filter(uid=long_uid).exists()
+
+        # Verify the normal finding WAS created
+        assert Finding.objects.filter(uid=normal_finding.uid).exists()
+
+        # Verify logging was called for skipped finding
+        assert mock_logger.warning.called
+        warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
+        assert any(
+            "Skipping finding with UID exceeding 300 characters" in str(call)
+            for call in warning_calls
+        )
+        assert any(
+            f"Scan {scan.id}: Skipped 1 finding(s)" in str(call)
+            for call in warning_calls
+        )
+

@pytest.mark.django_db
 class TestCreateComplianceRequirements: