chore(attack-pahts): Internet node is now created while Attack Paths scan (#9992)

This commit is contained in:
Josema Camacho
2026-02-09 12:17:51 +01:00
committed by GitHub
parent 5cbbceb3be
commit 530fef5106
7 changed files with 162 additions and 2 deletions
+1
View File
@@ -7,6 +7,7 @@ All notable changes to the **Prowler API** are documented in this file.
### 🔄 Changed
- Attack Paths: Queries definition now has short description and attribution [(#9983)](https://github.com/prowler-cloud/prowler/pull/9983)
- Attack Paths: Internet node is created while scan [(#9992)](https://github.com/prowler-cloud/prowler/pull/9992)
---
@@ -12,8 +12,10 @@ BATCH_SIZE = env.int("ATTACK_PATHS_BATCH_SIZE", 1000)
# Neo4j internal labels (Prowler-specific, not provider-specific)
# - `ProwlerFinding`: Label for finding nodes created by Prowler and linked to cloud resources.
# - `ProviderResource`: Added to ALL synced nodes for provider isolation and drop/query ops.
# - `Internet`: Singleton node representing external internet access for exposed-resource queries.
PROWLER_FINDING_LABEL = "ProwlerFinding"
PROVIDER_RESOURCE_LABEL = "ProviderResource"
INTERNET_NODE_LABEL = "Internet"
@dataclass(frozen=True)
@@ -6,6 +6,7 @@ from cartography.client.core.tx import run_write_query
from celery.utils.log import get_task_logger
from tasks.jobs.attack_paths.config import (
INTERNET_NODE_LABEL,
PROWLER_FINDING_LABEL,
PROVIDER_RESOURCE_LABEL,
)
@@ -30,6 +31,8 @@ FINDINGS_INDEX_STATEMENTS = [
f"CREATE INDEX prowler_finding_provider_uid IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.provider_uid);",
f"CREATE INDEX prowler_finding_lastupdated IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.lastupdated);",
f"CREATE INDEX prowler_finding_status IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.status);",
# Internet node index for MERGE lookups
f"CREATE INDEX internet_id IF NOT EXISTS FOR (n:{INTERNET_NODE_LABEL}) ON (n.id);",
]
# Indexes for provider resource sync operations
@@ -0,0 +1,67 @@
"""
Internet node enrichment for Attack Paths graph.
Creates a real Internet node and CAN_ACCESS relationships to
internet-exposed resources (EC2Instance, LoadBalancer, LoadBalancerV2)
in the temporary scan database before sync.
"""
import neo4j
from cartography.config import Config as CartographyConfig
from celery.utils.log import get_task_logger
from api.models import Provider
from prowler.config import config as ProwlerConfig
from tasks.jobs.attack_paths.config import get_root_node_label
from tasks.jobs.attack_paths.queries import (
CREATE_CAN_ACCESS_RELATIONSHIPS_TEMPLATE,
CREATE_INTERNET_NODE,
render_cypher_template,
)
logger = get_task_logger(__name__)
def analysis(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
config: CartographyConfig,
) -> int:
"""
Create Internet node and CAN_ACCESS relationships to exposed resources.
Args:
neo4j_session: Active Neo4j session (temp database).
prowler_api_provider: The Prowler API provider instance.
config: Cartography configuration with update_tag.
Returns:
Number of CAN_ACCESS relationships created.
"""
provider_uid = str(prowler_api_provider.uid)
parameters = {
"provider_uid": provider_uid,
"last_updated": config.update_tag,
"prowler_version": ProwlerConfig.prowler_version,
}
logger.info(f"Creating Internet node for provider {provider_uid}")
neo4j_session.run(CREATE_INTERNET_NODE, parameters)
query = render_cypher_template(
CREATE_CAN_ACCESS_RELATIONSHIPS_TEMPLATE,
{"__ROOT_LABEL__": get_root_node_label(prowler_api_provider.provider)},
)
logger.info(
f"Creating CAN_ACCESS relationships from Internet to exposed resources for {provider_uid}"
)
result = neo4j_session.run(query, parameters)
relationships_merged = result.single().get("relationships_merged", 0)
logger.info(
f"Created {relationships_merged} CAN_ACCESS relationships for provider {provider_uid}"
)
return relationships_merged
@@ -1,5 +1,6 @@
# Cypher query templates for Attack Paths operations
from tasks.jobs.attack_paths.config import (
INTERNET_NODE_LABEL,
PROWLER_FINDING_LABEL,
PROVIDER_RESOURCE_LABEL,
)
@@ -91,6 +92,37 @@ CLEANUP_FINDINGS_TEMPLATE = f"""
RETURN COUNT(finding) AS deleted_findings_count
"""
# Internet queries (used by internet.py)
# ---------------------------------------
CREATE_INTERNET_NODE = f"""
MERGE (internet:{INTERNET_NODE_LABEL} {{id: 'Internet'}})
ON CREATE SET
internet.name = 'Internet',
internet.firstseen = timestamp(),
internet.lastupdated = $last_updated,
internet._module_name = 'cartography:prowler',
internet._module_version = $prowler_version
ON MATCH SET
internet.lastupdated = $last_updated
"""
CREATE_CAN_ACCESS_RELATIONSHIPS_TEMPLATE = f"""
MATCH (account:__ROOT_LABEL__ {{id: $provider_uid}})-->(resource)
WHERE resource.exposed_internet = true
WITH resource
MATCH (internet:{INTERNET_NODE_LABEL} {{id: 'Internet'}})
MERGE (internet)-[r:CAN_ACCESS]->(resource)
ON CREATE SET
r.firstseen = timestamp(),
r.lastupdated = $last_updated,
r._module_name = 'cartography:prowler',
r._module_version = $prowler_version
ON MATCH SET
r.lastupdated = $last_updated
RETURN COUNT(r) AS relationships_merged
"""
# Sync queries (used by sync.py)
# -------------------------------
@@ -16,7 +16,7 @@ from api.models import (
StateChoices,
)
from api.utils import initialize_prowler_provider
from tasks.jobs.attack_paths import db_utils, findings, sync, utils
from tasks.jobs.attack_paths import db_utils, findings, internet, sync, utils
from tasks.jobs.attack_paths.config import get_cartography_ingestion_function
# Without this Celery goes crazy with Cartography logging
@@ -135,7 +135,15 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
cartography_analysis.run(tmp_neo4j_session, tmp_cartography_config)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 96)
# Adding Prowler nodes and relationships
# Creating Internet node and CAN_ACCESS relationships
logger.info(
f"Creating Internet graph for AWS account {prowler_api_provider.uid}"
)
internet.analysis(
tmp_neo4j_session, prowler_api_provider, tmp_cartography_config
)
# Adding Prowler Finding nodes and relationships
logger.info(
f"Syncing Prowler analysis for AWS account {prowler_api_provider.uid}"
)
@@ -4,6 +4,7 @@ from unittest.mock import MagicMock, call, patch
import pytest
from tasks.jobs.attack_paths import findings as findings_module
from tasks.jobs.attack_paths import internet as internet_module
from tasks.jobs.attack_paths.scan import run as attack_paths_run
from api.models import (
@@ -37,6 +38,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.sync.sync_graph")
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_subgraph")
@patch("tasks.jobs.attack_paths.scan.sync.create_sync_indexes")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_ontology.run")
@@ -67,6 +69,7 @@ class TestAttackPathsRun:
mock_cartography_ontology,
mock_findings_indexes,
mock_findings_analysis,
mock_internet_analysis,
mock_sync_indexes,
mock_drop_subgraph,
mock_sync,
@@ -139,6 +142,7 @@ class TestAttackPathsRun:
# These use tmp_cartography_config (neo4j_database="db-scan-id")
mock_cartography_analysis.assert_called_once()
mock_cartography_ontology.assert_called_once()
mock_internet_analysis.assert_called_once()
mock_findings_analysis.assert_called_once()
mock_drop_subgraph.assert_called_once_with(
database="tenant-db",
@@ -207,6 +211,7 @@ class TestAttackPathsRun:
patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run"),
patch("tasks.jobs.attack_paths.scan.cartography_analysis.run"),
patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes"),
patch("tasks.jobs.attack_paths.scan.internet.analysis"),
patch("tasks.jobs.attack_paths.scan.findings.analysis"),
patch(
"tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan",
@@ -757,3 +762,45 @@ class TestAttackPathsFindingsHelpers:
findings_module.load_findings(mock_session, empty_gen(), provider, config)
mock_session.run.assert_not_called()
class TestInternetAnalysis:
def _make_provider_and_config(self):
provider = MagicMock()
provider.provider = "aws"
provider.uid = "123456789012"
config = SimpleNamespace(update_tag=1234567890)
return provider, config
def test_analysis_creates_node_and_relationships(self):
"""Verify both Cypher statements are executed and relationship count returned."""
mock_session = MagicMock()
mock_result = MagicMock()
mock_result.single.return_value = {"relationships_merged": 3}
mock_session.run.side_effect = [None, mock_result]
provider, config = self._make_provider_and_config()
with patch(
"tasks.jobs.attack_paths.internet.get_root_node_label",
return_value="AWSAccount",
):
result = internet_module.analysis(mock_session, provider, config)
assert mock_session.run.call_count == 2
assert result == 3
def test_analysis_zero_exposed_resources(self):
"""When no resources are exposed, zero relationships are created."""
mock_session = MagicMock()
mock_result = MagicMock()
mock_result.single.return_value = {"relationships_merged": 0}
mock_session.run.side_effect = [None, mock_result]
provider, config = self._make_provider_and_config()
with patch(
"tasks.jobs.attack_paths.internet.get_root_node_label",
return_value="AWSAccount",
):
result = internet_module.analysis(mock_session, provider, config)
assert result == 0