fix(scan): implement temporary workaround to skip findings with UID exceeding 300 characters (#9246)

This commit is contained in:
Adrián Jesús Peña Rodríguez
2025-11-17 13:15:02 +01:00
committed by GitHub
parent 1ea0dabf42
commit d43455971b
3 changed files with 131 additions and 1 deletions

View File

@@ -21,6 +21,9 @@ All notable changes to the **Prowler API** are documented in this file.
### Changed
- Optimized database write queries for scan related tasks [(#9190)](https://github.com/prowler-cloud/prowler/pull/9190)
### Fixed
- Scans no longer fail when findings have UIDs exceeding 300 characters; such findings are now skipped with detailed logging [(#9246)](https://github.com/prowler-cloud/prowler/pull/9246)
### Security
- Django updated to the latest 5.1 security release, 5.1.14, due to problems with potential [SQL injection](https://github.com/prowler-cloud/prowler/security/dependabot/113) and [denial-of-service vulnerability](https://github.com/prowler-cloud/prowler/security/dependabot/114) [(#9176)](https://github.com/prowler-cloud/prowler/pull/9176)

View File

@@ -383,9 +383,13 @@ def _process_finding_micro_batch(
mappings_to_create = []
dirty_resources = {}
resource_denormalized_data = [] # (finding_instance, resource_instance) pairs
skipped_findings_count = 0 # Track findings skipped due to UID length
# Prefetch last statuses for all findings in this batch
finding_uids = [f.uid for f in findings_batch if f is not None]
# TEMPORARY WORKAROUND: Filter out UIDs > 300 chars to avoid query errors
finding_uids = [
f.uid for f in findings_batch if f is not None and len(f.uid) <= 300
]
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
last_statuses = {
item["uid"]: (item["status"], item["first_seen_at"])
@@ -485,6 +489,20 @@ def _process_finding_micro_batch(
# Prepare finding data
finding_uid = finding.uid
# TEMPORARY WORKAROUND: Skip findings with UID > 300 chars
# TODO: Remove this after implementing text field migration for finding.uid
if len(finding_uid) > 300:
skipped_findings_count += 1
logger.warning(
f"Skipping finding with UID exceeding 300 characters. "
f"Length: {len(finding_uid)}, "
f"Check: {finding.check_id}, "
f"Resource: {finding.resource_name}, "
f"UID: {finding_uid}"
)
continue
last_status, last_first_seen_at = last_status_cache.get(
finding_uid, (None, None)
)
@@ -606,6 +624,13 @@ def _process_finding_micro_batch(
batch_size=1000,
)
# Log skipped findings summary
if skipped_findings_count > 0:
logger.warning(
f"Scan {scan_instance.id}: Skipped {skipped_findings_count} finding(s) "
f"due to UID length exceeding 300 characters in this micro-batch."
)
def perform_prowler_scan(
tenant_id: str,

View File

@@ -1543,6 +1543,108 @@ class TestProcessFindingMicroBatch:
assert resource_cache[finding.resource_uid].service == finding.service_name
assert tag_cache.keys() == {("team", "devsec")}
def test_process_finding_micro_batch_skips_long_uid(
self, tenants_fixture, scans_fixture
):
"""Test that findings with UID > 300 chars are skipped (temporary workaround)."""
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = scan.provider
# Create a finding with UID > 300 chars
long_uid = (
"prowler-aws-ec2_instance_public_ip-123456789012-us-east-1-" + "x" * 250
)
assert len(long_uid) > 300
finding_with_long_uid = FakeFinding(
uid=long_uid,
status=StatusChoices.FAIL,
status_extended="public instance",
severity=Severity.high,
check_id="ec2_instance_public_ip",
resource_uid="arn:aws:ec2:us-east-1:123456789012:instance/i-long",
resource_name="i-long-uid-instance",
region="us-east-1",
service_name="ec2",
resource_type="instance",
resource_tags={},
resource_metadata={},
resource_details={},
partition="aws",
raw={},
compliance={},
metadata={},
muted=False,
)
# Create a normal finding that should be processed
normal_finding = FakeFinding(
uid="finding-normal",
status=StatusChoices.PASS,
status_extended="all good",
severity=Severity.low,
check_id="s3_bucket_encryption",
resource_uid="arn:aws:s3:::bucket-normal",
resource_name="bucket-normal",
region="us-east-1",
service_name="s3",
resource_type="bucket",
resource_tags={},
resource_metadata={},
resource_details={},
partition="aws",
raw={},
compliance={},
metadata={},
muted=False,
)
resource_cache = {}
tag_cache = {}
last_status_cache = {}
resource_failed_findings_cache = {}
unique_resources: set[tuple[str, str]] = set()
scan_resource_cache: set[tuple[str, str, str, str]] = set()
mute_rules_cache = {}
with (
patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction),
patch("api.db_utils.rls_transaction", new=noop_rls_transaction),
patch("tasks.jobs.scan.logger") as mock_logger,
):
_process_finding_micro_batch(
str(tenant.id),
[finding_with_long_uid, normal_finding],
scan,
provider,
resource_cache,
tag_cache,
last_status_cache,
resource_failed_findings_cache,
unique_resources,
scan_resource_cache,
mute_rules_cache,
)
# Verify the long UID finding was NOT created
assert not Finding.objects.filter(uid=long_uid).exists()
# Verify the normal finding WAS created
assert Finding.objects.filter(uid=normal_finding.uid).exists()
# Verify logging was called for skipped finding
assert mock_logger.warning.called
warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
assert any(
"Skipping finding with UID exceeding 300 characters" in str(call)
for call in warning_calls
)
assert any(
f"Scan {scan.id}: Skipped 1 finding(s)" in str(call)
for call in warning_calls
)
@pytest.mark.django_db
class TestCreateComplianceRequirements: