mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-05-06 08:47:18 +00:00
fix(api): merge Attack Paths findings on short UIDs for AWS resources (#10841)
Co-authored-by: Josema Camacho <josema@prowler.com>
This commit is contained in:
@@ -12,6 +12,7 @@ All notable changes to the **Prowler API** are documented in this file.
|
||||
|
||||
- Finding groups aggregated `status` now treats muted findings as resolved: a group is `FAIL` only while at least one non-muted FAIL remains, otherwise it is `PASS` (including fully-muted groups). The `filter[status]` filter and the `sort=status` ordering share the same semantics, keeping `status` consistent with `fail_count` and the orthogonal `muted` flag [(#10825)](https://github.com/prowler-cloud/prowler/pull/10825)
|
||||
- `aggregate_findings` is now idempotent: it deletes the scan's existing `ScanSummary` rows before `bulk_create`, so re-runs (such as the post-mute reaggregation pipeline) no longer violate the `unique_scan_summary` constraint and no longer abort the downstream `DailySeveritySummary` / `FindingGroupDailySummary` recomputation for the affected scan [(#10827)](https://github.com/prowler-cloud/prowler/pull/10827)
|
||||
- Attack Paths: Findings on AWS were silently dropped during the Neo4j merge for resources whose Cartography node is keyed by a short identifier (e.g. EC2 instances) rather than the full ARN [(#10839)](https://github.com/prowler-cloud/prowler/pull/10839)
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -313,3 +313,16 @@ def sync_aws_account(
|
||||
)
|
||||
|
||||
return failed_syncs
|
||||
|
||||
|
||||
def extract_short_uid(uid: str) -> str:
|
||||
"""Return the short identifier from an AWS ARN or resource ID.
|
||||
|
||||
Supported inputs end in one of:
|
||||
- `<type>/<id>` (e.g. `instance/i-xxx`)
|
||||
- `<type>:<id>` (e.g. `function:name`)
|
||||
- `<id>` (e.g. `bucket-name` or `i-xxx`)
|
||||
|
||||
If `uid` is already a short resource ID, it is returned unchanged.
|
||||
"""
|
||||
return uid.rsplit("/", 1)[-1].rsplit(":", 1)[-1]
|
||||
|
||||
@@ -37,6 +37,8 @@ class ProviderConfig:
|
||||
# Label for resources connected to the account node, enabling indexed finding lookups.
|
||||
resource_label: str # e.g., "_AWSResource"
|
||||
ingestion_function: Callable
|
||||
# Maps a Postgres resource UID (e.g. full ARN) to the short-id form Cartography stores on some node types (e.g. `i-xxx` for EC2Instance).
|
||||
short_uid_extractor: Callable[[str], str]
|
||||
|
||||
|
||||
# Provider Configurations
|
||||
@@ -48,6 +50,7 @@ AWS_CONFIG = ProviderConfig(
|
||||
uid_field="arn",
|
||||
resource_label="_AWSResource",
|
||||
ingestion_function=aws.start_aws_ingestion,
|
||||
short_uid_extractor=aws.extract_short_uid,
|
||||
)
|
||||
|
||||
PROVIDER_CONFIGS: dict[str, ProviderConfig] = {
|
||||
@@ -116,6 +119,21 @@ def get_provider_resource_label(provider_type: str) -> str:
|
||||
return config.resource_label if config else "_UnknownProviderResource"
|
||||
|
||||
|
||||
def _identity_short_uid(uid: str) -> str:
|
||||
"""Fallback short-uid extractor for providers without a custom mapping."""
|
||||
return uid
|
||||
|
||||
|
||||
def get_short_uid_extractor(provider_type: str) -> Callable[[str], str]:
|
||||
"""Get the short-uid extractor for a provider type.
|
||||
|
||||
Returns an identity function when the provider is unknown, so callers can
|
||||
rely on a callable always being returned.
|
||||
"""
|
||||
config = PROVIDER_CONFIGS.get(provider_type)
|
||||
return config.short_uid_extractor if config else _identity_short_uid
|
||||
|
||||
|
||||
# Dynamic Isolation Label Helpers
|
||||
# --------------------------------
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ This module handles:
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import Any, Generator
|
||||
from typing import Any, Callable, Generator
|
||||
from uuid import UUID
|
||||
|
||||
import neo4j
|
||||
@@ -21,6 +21,7 @@ from tasks.jobs.attack_paths.config import (
|
||||
get_node_uid_field,
|
||||
get_provider_resource_label,
|
||||
get_root_node_label,
|
||||
get_short_uid_extractor,
|
||||
)
|
||||
from tasks.jobs.attack_paths.queries import (
|
||||
ADD_RESOURCE_LABEL_TEMPLATE,
|
||||
@@ -57,7 +58,9 @@ _DB_QUERY_FIELDS = [
|
||||
]
|
||||
|
||||
|
||||
def _to_neo4j_dict(record: dict[str, Any], resource_uid: str) -> dict[str, Any]:
|
||||
def _to_neo4j_dict(
|
||||
record: dict[str, Any], resource_uid: str, resource_short_uid: str
|
||||
) -> dict[str, Any]:
|
||||
"""Transform a Django `.values()` record into a `dict` ready for Neo4j ingestion."""
|
||||
return {
|
||||
"id": str(record["id"]),
|
||||
@@ -75,6 +78,7 @@ def _to_neo4j_dict(record: dict[str, Any], resource_uid: str) -> dict[str, Any]:
|
||||
"muted": record["muted"],
|
||||
"muted_reason": record["muted_reason"],
|
||||
"resource_uid": resource_uid,
|
||||
"resource_short_uid": resource_short_uid,
|
||||
}
|
||||
|
||||
|
||||
@@ -170,6 +174,8 @@ def load_findings(
|
||||
|
||||
batch_num = 0
|
||||
total_records = 0
|
||||
edges_merged = 0
|
||||
edges_dropped = 0
|
||||
for batch in findings_batches:
|
||||
batch_num += 1
|
||||
batch_size = len(batch)
|
||||
@@ -178,9 +184,15 @@ def load_findings(
|
||||
parameters["findings_data"] = batch
|
||||
|
||||
logger.info(f"Loading findings batch {batch_num} ({batch_size} records)")
|
||||
neo4j_session.run(query, parameters)
|
||||
summary = neo4j_session.run(query, parameters).single()
|
||||
if summary is not None:
|
||||
edges_merged += summary.get("merged_count", 0)
|
||||
edges_dropped += summary.get("dropped_count", 0)
|
||||
|
||||
logger.info(f"Finished loading {total_records} records in {batch_num} batches")
|
||||
logger.info(
|
||||
f"Finished loading {total_records} records in {batch_num} batches "
|
||||
f"(edges_merged={edges_merged}, edges_dropped={edges_dropped})"
|
||||
)
|
||||
return total_records
|
||||
|
||||
|
||||
@@ -205,8 +217,9 @@ def stream_findings_with_resources(
|
||||
)
|
||||
|
||||
tenant_id = prowler_api_provider.tenant_id
|
||||
short_uid_extractor = get_short_uid_extractor(prowler_api_provider.provider)
|
||||
for batch in _paginate_findings(tenant_id, scan_id):
|
||||
enriched = _enrich_batch_with_resources(batch, tenant_id)
|
||||
enriched = _enrich_batch_with_resources(batch, tenant_id, short_uid_extractor)
|
||||
if enriched:
|
||||
yield enriched
|
||||
|
||||
@@ -269,6 +282,7 @@ def _fetch_findings_batch(
|
||||
def _enrich_batch_with_resources(
|
||||
findings_batch: list[dict[str, Any]],
|
||||
tenant_id: str,
|
||||
short_uid_extractor: Callable[[str], str],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Enrich findings with their resource UIDs.
|
||||
@@ -280,7 +294,7 @@ def _enrich_batch_with_resources(
|
||||
resource_map = _build_finding_resource_map(finding_ids, tenant_id)
|
||||
|
||||
return [
|
||||
_to_neo4j_dict(finding, resource_uid)
|
||||
_to_neo4j_dict(finding, resource_uid, short_uid_extractor(resource_uid))
|
||||
for finding in findings_batch
|
||||
for resource_uid in resource_map.get(finding["id"], [])
|
||||
]
|
||||
|
||||
@@ -35,46 +35,56 @@ INSERT_FINDING_TEMPLATE = f"""
|
||||
UNWIND $findings_data AS finding_data
|
||||
|
||||
OPTIONAL MATCH (resource_by_uid:__RESOURCE_LABEL__ {{__NODE_UID_FIELD__: finding_data.resource_uid}})
|
||||
WITH finding_data, resource_by_uid
|
||||
|
||||
OPTIONAL MATCH (resource_by_id:__RESOURCE_LABEL__ {{id: finding_data.resource_uid}})
|
||||
WHERE resource_by_uid IS NULL
|
||||
WITH finding_data, COALESCE(resource_by_uid, resource_by_id) AS resource
|
||||
WHERE resource IS NOT NULL
|
||||
OPTIONAL MATCH (resource_by_short:__RESOURCE_LABEL__ {{id: finding_data.resource_short_uid}})
|
||||
WHERE resource_by_uid IS NULL AND resource_by_id IS NULL
|
||||
WITH finding_data,
|
||||
resource_by_uid,
|
||||
resource_by_id,
|
||||
head(collect(resource_by_short)) AS resource_by_short
|
||||
WITH finding_data,
|
||||
COALESCE(resource_by_uid, resource_by_id, resource_by_short) AS resource
|
||||
|
||||
MERGE (finding:{PROWLER_FINDING_LABEL} {{id: finding_data.id}})
|
||||
ON CREATE SET
|
||||
finding.id = finding_data.id,
|
||||
finding.uid = finding_data.uid,
|
||||
finding.inserted_at = finding_data.inserted_at,
|
||||
finding.updated_at = finding_data.updated_at,
|
||||
finding.first_seen_at = finding_data.first_seen_at,
|
||||
finding.scan_id = finding_data.scan_id,
|
||||
finding.delta = finding_data.delta,
|
||||
finding.status = finding_data.status,
|
||||
finding.status_extended = finding_data.status_extended,
|
||||
finding.severity = finding_data.severity,
|
||||
finding.check_id = finding_data.check_id,
|
||||
finding.check_title = finding_data.check_title,
|
||||
finding.muted = finding_data.muted,
|
||||
finding.muted_reason = finding_data.muted_reason,
|
||||
finding.firstseen = timestamp(),
|
||||
finding.lastupdated = $last_updated,
|
||||
finding._module_name = 'cartography:prowler',
|
||||
finding._module_version = $prowler_version
|
||||
ON MATCH SET
|
||||
finding.status = finding_data.status,
|
||||
finding.status_extended = finding_data.status_extended,
|
||||
finding.lastupdated = $last_updated
|
||||
FOREACH (_ IN CASE WHEN resource IS NOT NULL THEN [1] ELSE [] END |
|
||||
MERGE (finding:{PROWLER_FINDING_LABEL} {{id: finding_data.id}})
|
||||
ON CREATE SET
|
||||
finding.id = finding_data.id,
|
||||
finding.uid = finding_data.uid,
|
||||
finding.inserted_at = finding_data.inserted_at,
|
||||
finding.updated_at = finding_data.updated_at,
|
||||
finding.first_seen_at = finding_data.first_seen_at,
|
||||
finding.scan_id = finding_data.scan_id,
|
||||
finding.delta = finding_data.delta,
|
||||
finding.status = finding_data.status,
|
||||
finding.status_extended = finding_data.status_extended,
|
||||
finding.severity = finding_data.severity,
|
||||
finding.check_id = finding_data.check_id,
|
||||
finding.check_title = finding_data.check_title,
|
||||
finding.muted = finding_data.muted,
|
||||
finding.muted_reason = finding_data.muted_reason,
|
||||
finding.firstseen = timestamp(),
|
||||
finding.lastupdated = $last_updated,
|
||||
finding._module_name = 'cartography:prowler',
|
||||
finding._module_version = $prowler_version
|
||||
ON MATCH SET
|
||||
finding.status = finding_data.status,
|
||||
finding.status_extended = finding_data.status_extended,
|
||||
finding.lastupdated = $last_updated
|
||||
MERGE (resource)-[rel:HAS_FINDING]->(finding)
|
||||
ON CREATE SET
|
||||
rel.firstseen = timestamp(),
|
||||
rel.lastupdated = $last_updated,
|
||||
rel._module_name = 'cartography:prowler',
|
||||
rel._module_version = $prowler_version
|
||||
ON MATCH SET
|
||||
rel.lastupdated = $last_updated
|
||||
)
|
||||
|
||||
MERGE (resource)-[rel:HAS_FINDING]->(finding)
|
||||
ON CREATE SET
|
||||
rel.firstseen = timestamp(),
|
||||
rel.lastupdated = $last_updated,
|
||||
rel._module_name = 'cartography:prowler',
|
||||
rel._module_version = $prowler_version
|
||||
ON MATCH SET
|
||||
rel.lastupdated = $last_updated
|
||||
WITH sum(CASE WHEN resource IS NOT NULL THEN 1 ELSE 0 END) AS merged_count,
|
||||
sum(CASE WHEN resource IS NULL THEN 1 ELSE 0 END) AS dropped_count
|
||||
|
||||
RETURN merged_count, dropped_count
|
||||
"""
|
||||
|
||||
# Internet queries (used by internet.py)
|
||||
|
||||
@@ -1285,6 +1285,12 @@ class TestAttackPathsFindingsHelpers:
|
||||
config = SimpleNamespace(update_tag=12345)
|
||||
mock_session = MagicMock()
|
||||
|
||||
first_result = MagicMock()
|
||||
first_result.single.return_value = {"merged_count": 1, "dropped_count": 0}
|
||||
second_result = MagicMock()
|
||||
second_result.single.return_value = {"merged_count": 0, "dropped_count": 1}
|
||||
mock_session.run.side_effect = [first_result, second_result]
|
||||
|
||||
with (
|
||||
patch(
|
||||
"tasks.jobs.attack_paths.findings.get_node_uid_field",
|
||||
@@ -1294,6 +1300,7 @@ class TestAttackPathsFindingsHelpers:
|
||||
"tasks.jobs.attack_paths.findings.get_provider_resource_label",
|
||||
return_value="_AWSResource",
|
||||
),
|
||||
patch("tasks.jobs.attack_paths.findings.logger") as mock_logger,
|
||||
):
|
||||
findings_module.load_findings(
|
||||
mock_session, findings_generator(), provider, config
|
||||
@@ -1305,6 +1312,14 @@ class TestAttackPathsFindingsHelpers:
|
||||
assert params["last_updated"] == config.update_tag
|
||||
assert "findings_data" in params
|
||||
|
||||
summary_log = next(
|
||||
call_args.args[0]
|
||||
for call_args in mock_logger.info.call_args_list
|
||||
if call_args.args and "Finished loading" in call_args.args[0]
|
||||
)
|
||||
assert "edges_merged=1" in summary_log
|
||||
assert "edges_dropped=1" in summary_log
|
||||
|
||||
def test_stream_findings_with_resources_returns_latest_scan_data(
|
||||
self,
|
||||
tenants_fixture,
|
||||
@@ -1484,11 +1499,12 @@ class TestAttackPathsFindingsHelpers:
|
||||
"default",
|
||||
):
|
||||
result = findings_module._enrich_batch_with_resources(
|
||||
[finding_dict], str(tenant.id)
|
||||
[finding_dict], str(tenant.id), lambda uid: f"short:{uid}"
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["resource_uid"] == resource.uid
|
||||
assert result[0]["resource_short_uid"] == f"short:{resource.uid}"
|
||||
assert result[0]["id"] == str(finding.id)
|
||||
assert result[0]["status"] == "FAIL"
|
||||
|
||||
@@ -1572,7 +1588,7 @@ class TestAttackPathsFindingsHelpers:
|
||||
"default",
|
||||
):
|
||||
result = findings_module._enrich_batch_with_resources(
|
||||
[finding_dict], str(tenant.id)
|
||||
[finding_dict], str(tenant.id), lambda uid: uid
|
||||
)
|
||||
|
||||
assert len(result) == 3
|
||||
@@ -1646,7 +1662,7 @@ class TestAttackPathsFindingsHelpers:
|
||||
patch("tasks.jobs.attack_paths.findings.logger") as mock_logger,
|
||||
):
|
||||
result = findings_module._enrich_batch_with_resources(
|
||||
[finding_dict], str(tenant.id)
|
||||
[finding_dict], str(tenant.id), lambda uid: uid
|
||||
)
|
||||
|
||||
assert len(result) == 0
|
||||
@@ -1693,6 +1709,63 @@ class TestAttackPathsFindingsHelpers:
|
||||
|
||||
mock_session.run.assert_not_called()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"uid, expected",
|
||||
[
|
||||
(
|
||||
"arn:aws:ec2:us-east-1:552455647653:instance/i-05075b63eb51baacb",
|
||||
"i-05075b63eb51baacb",
|
||||
),
|
||||
(
|
||||
"arn:aws:ec2:us-east-1:123456789012:volume/vol-0abcd1234ef567890",
|
||||
"vol-0abcd1234ef567890",
|
||||
),
|
||||
(
|
||||
"arn:aws:ec2:us-east-1:123456789012:security-group/sg-0123abcd",
|
||||
"sg-0123abcd",
|
||||
),
|
||||
("arn:aws:s3:::my-bucket-name", "my-bucket-name"),
|
||||
("arn:aws:iam::123456789012:role/MyRole", "MyRole"),
|
||||
(
|
||||
"arn:aws:lambda:us-east-1:123456789012:function:my-function",
|
||||
"my-function",
|
||||
),
|
||||
("i-05075b63eb51baacb", "i-05075b63eb51baacb"),
|
||||
],
|
||||
)
|
||||
def test_extract_short_uid_aws_variants(self, uid, expected):
|
||||
from tasks.jobs.attack_paths.aws import extract_short_uid
|
||||
|
||||
assert extract_short_uid(uid) == expected
|
||||
|
||||
def test_insert_finding_template_has_short_id_fallback(self):
|
||||
from tasks.jobs.attack_paths.queries import (
|
||||
INSERT_FINDING_TEMPLATE,
|
||||
render_cypher_template,
|
||||
)
|
||||
|
||||
rendered = render_cypher_template(
|
||||
INSERT_FINDING_TEMPLATE,
|
||||
{
|
||||
"__NODE_UID_FIELD__": "arn",
|
||||
"__RESOURCE_LABEL__": "_AWSResource",
|
||||
},
|
||||
)
|
||||
|
||||
assert (
|
||||
"resource_by_uid:_AWSResource {arn: finding_data.resource_uid}" in rendered
|
||||
)
|
||||
assert "resource_by_id:_AWSResource {id: finding_data.resource_uid}" in rendered
|
||||
assert (
|
||||
"resource_by_short:_AWSResource {id: finding_data.resource_short_uid}"
|
||||
in rendered
|
||||
)
|
||||
assert "head(collect(resource_by_short)) AS resource_by_short" in rendered
|
||||
assert (
|
||||
"COALESCE(resource_by_uid, resource_by_id, resource_by_short)" in rendered
|
||||
)
|
||||
assert "RETURN merged_count, dropped_count" in rendered
|
||||
|
||||
|
||||
class TestAddResourceLabel:
|
||||
def test_add_resource_label_applies_private_label(self):
|
||||
|
||||
Reference in New Issue
Block a user