diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index d99d1ea6f8..dde2775c39 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to the **Prowler API** are documented in this file. ### 🔄 Changed - Attack Paths: Queries definition now has short description and attribution [(#9983)](https://github.com/prowler-cloud/prowler/pull/9983) +- Attack Paths: Internet node is created while scan [(#9992)](https://github.com/prowler-cloud/prowler/pull/9992) --- diff --git a/api/src/backend/tasks/jobs/attack_paths/config.py b/api/src/backend/tasks/jobs/attack_paths/config.py index d39e6fa81f..af8094e172 100644 --- a/api/src/backend/tasks/jobs/attack_paths/config.py +++ b/api/src/backend/tasks/jobs/attack_paths/config.py @@ -12,8 +12,10 @@ BATCH_SIZE = env.int("ATTACK_PATHS_BATCH_SIZE", 1000) # Neo4j internal labels (Prowler-specific, not provider-specific) # - `ProwlerFinding`: Label for finding nodes created by Prowler and linked to cloud resources. # - `ProviderResource`: Added to ALL synced nodes for provider isolation and drop/query ops. +# - `Internet`: Singleton node representing external internet access for exposed-resource queries. PROWLER_FINDING_LABEL = "ProwlerFinding" PROVIDER_RESOURCE_LABEL = "ProviderResource" +INTERNET_NODE_LABEL = "Internet" @dataclass(frozen=True) diff --git a/api/src/backend/tasks/jobs/attack_paths/indexes.py b/api/src/backend/tasks/jobs/attack_paths/indexes.py index 708e9c7f84..9ccd8cab04 100644 --- a/api/src/backend/tasks/jobs/attack_paths/indexes.py +++ b/api/src/backend/tasks/jobs/attack_paths/indexes.py @@ -6,6 +6,7 @@ from cartography.client.core.tx import run_write_query from celery.utils.log import get_task_logger from tasks.jobs.attack_paths.config import ( + INTERNET_NODE_LABEL, PROWLER_FINDING_LABEL, PROVIDER_RESOURCE_LABEL, ) @@ -30,6 +31,8 @@ FINDINGS_INDEX_STATEMENTS = [ f"CREATE INDEX prowler_finding_provider_uid IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.provider_uid);", f"CREATE INDEX prowler_finding_lastupdated IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.lastupdated);", f"CREATE INDEX prowler_finding_status IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.status);", + # Internet node index for MERGE lookups + f"CREATE INDEX internet_id IF NOT EXISTS FOR (n:{INTERNET_NODE_LABEL}) ON (n.id);", ] # Indexes for provider resource sync operations diff --git a/api/src/backend/tasks/jobs/attack_paths/internet.py b/api/src/backend/tasks/jobs/attack_paths/internet.py new file mode 100644 index 0000000000..83517bc903 --- /dev/null +++ b/api/src/backend/tasks/jobs/attack_paths/internet.py @@ -0,0 +1,67 @@ +""" +Internet node enrichment for Attack Paths graph. + +Creates a real Internet node and CAN_ACCESS relationships to +internet-exposed resources (EC2Instance, LoadBalancer, LoadBalancerV2) +in the temporary scan database before sync. +""" + +import neo4j + +from cartography.config import Config as CartographyConfig +from celery.utils.log import get_task_logger + +from api.models import Provider +from prowler.config import config as ProwlerConfig +from tasks.jobs.attack_paths.config import get_root_node_label +from tasks.jobs.attack_paths.queries import ( + CREATE_CAN_ACCESS_RELATIONSHIPS_TEMPLATE, + CREATE_INTERNET_NODE, + render_cypher_template, +) + +logger = get_task_logger(__name__) + + +def analysis( + neo4j_session: neo4j.Session, + prowler_api_provider: Provider, + config: CartographyConfig, +) -> int: + """ + Create Internet node and CAN_ACCESS relationships to exposed resources. + + Args: + neo4j_session: Active Neo4j session (temp database). + prowler_api_provider: The Prowler API provider instance. + config: Cartography configuration with update_tag. + + Returns: + Number of CAN_ACCESS relationships created. + """ + provider_uid = str(prowler_api_provider.uid) + + parameters = { + "provider_uid": provider_uid, + "last_updated": config.update_tag, + "prowler_version": ProwlerConfig.prowler_version, + } + + logger.info(f"Creating Internet node for provider {provider_uid}") + neo4j_session.run(CREATE_INTERNET_NODE, parameters) + + query = render_cypher_template( + CREATE_CAN_ACCESS_RELATIONSHIPS_TEMPLATE, + {"__ROOT_LABEL__": get_root_node_label(prowler_api_provider.provider)}, + ) + + logger.info( + f"Creating CAN_ACCESS relationships from Internet to exposed resources for {provider_uid}" + ) + result = neo4j_session.run(query, parameters) + relationships_merged = result.single().get("relationships_merged", 0) + + logger.info( + f"Created {relationships_merged} CAN_ACCESS relationships for provider {provider_uid}" + ) + return relationships_merged diff --git a/api/src/backend/tasks/jobs/attack_paths/queries.py b/api/src/backend/tasks/jobs/attack_paths/queries.py index bf935cc9ca..75ef9ec5b3 100644 --- a/api/src/backend/tasks/jobs/attack_paths/queries.py +++ b/api/src/backend/tasks/jobs/attack_paths/queries.py @@ -1,5 +1,6 @@ # Cypher query templates for Attack Paths operations from tasks.jobs.attack_paths.config import ( + INTERNET_NODE_LABEL, PROWLER_FINDING_LABEL, PROVIDER_RESOURCE_LABEL, ) @@ -91,6 +92,37 @@ CLEANUP_FINDINGS_TEMPLATE = f""" RETURN COUNT(finding) AS deleted_findings_count """ +# Internet queries (used by internet.py) +# --------------------------------------- + +CREATE_INTERNET_NODE = f""" + MERGE (internet:{INTERNET_NODE_LABEL} {{id: 'Internet'}}) + ON CREATE SET + internet.name = 'Internet', + internet.firstseen = timestamp(), + internet.lastupdated = $last_updated, + internet._module_name = 'cartography:prowler', + internet._module_version = $prowler_version + ON MATCH SET + internet.lastupdated = $last_updated +""" + +CREATE_CAN_ACCESS_RELATIONSHIPS_TEMPLATE = f""" + MATCH (account:__ROOT_LABEL__ {{id: $provider_uid}})-->(resource) + WHERE resource.exposed_internet = true + WITH resource + MATCH (internet:{INTERNET_NODE_LABEL} {{id: 'Internet'}}) + MERGE (internet)-[r:CAN_ACCESS]->(resource) + ON CREATE SET + r.firstseen = timestamp(), + r.lastupdated = $last_updated, + r._module_name = 'cartography:prowler', + r._module_version = $prowler_version + ON MATCH SET + r.lastupdated = $last_updated + RETURN COUNT(r) AS relationships_merged +""" + # Sync queries (used by sync.py) # ------------------------------- diff --git a/api/src/backend/tasks/jobs/attack_paths/scan.py b/api/src/backend/tasks/jobs/attack_paths/scan.py index 1ffcbf55e7..faf6c4e176 100644 --- a/api/src/backend/tasks/jobs/attack_paths/scan.py +++ b/api/src/backend/tasks/jobs/attack_paths/scan.py @@ -16,7 +16,7 @@ from api.models import ( StateChoices, ) from api.utils import initialize_prowler_provider -from tasks.jobs.attack_paths import db_utils, findings, sync, utils +from tasks.jobs.attack_paths import db_utils, findings, internet, sync, utils from tasks.jobs.attack_paths.config import get_cartography_ingestion_function # Without this Celery goes crazy with Cartography logging @@ -135,7 +135,15 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]: cartography_analysis.run(tmp_neo4j_session, tmp_cartography_config) db_utils.update_attack_paths_scan_progress(attack_paths_scan, 96) - # Adding Prowler nodes and relationships + # Creating Internet node and CAN_ACCESS relationships + logger.info( + f"Creating Internet graph for AWS account {prowler_api_provider.uid}" + ) + internet.analysis( + tmp_neo4j_session, prowler_api_provider, tmp_cartography_config + ) + + # Adding Prowler Finding nodes and relationships logger.info( f"Syncing Prowler analysis for AWS account {prowler_api_provider.uid}" ) diff --git a/api/src/backend/tasks/tests/test_attack_paths_scan.py b/api/src/backend/tasks/tests/test_attack_paths_scan.py index ebee87c981..dbedc6ae7a 100644 --- a/api/src/backend/tasks/tests/test_attack_paths_scan.py +++ b/api/src/backend/tasks/tests/test_attack_paths_scan.py @@ -4,6 +4,7 @@ from unittest.mock import MagicMock, call, patch import pytest from tasks.jobs.attack_paths import findings as findings_module +from tasks.jobs.attack_paths import internet as internet_module from tasks.jobs.attack_paths.scan import run as attack_paths_run from api.models import ( @@ -37,6 +38,7 @@ class TestAttackPathsRun: @patch("tasks.jobs.attack_paths.scan.sync.sync_graph") @patch("tasks.jobs.attack_paths.scan.graph_database.drop_subgraph") @patch("tasks.jobs.attack_paths.scan.sync.create_sync_indexes") + @patch("tasks.jobs.attack_paths.scan.internet.analysis") @patch("tasks.jobs.attack_paths.scan.findings.analysis") @patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes") @patch("tasks.jobs.attack_paths.scan.cartography_ontology.run") @@ -67,6 +69,7 @@ class TestAttackPathsRun: mock_cartography_ontology, mock_findings_indexes, mock_findings_analysis, + mock_internet_analysis, mock_sync_indexes, mock_drop_subgraph, mock_sync, @@ -139,6 +142,7 @@ class TestAttackPathsRun: # These use tmp_cartography_config (neo4j_database="db-scan-id") mock_cartography_analysis.assert_called_once() mock_cartography_ontology.assert_called_once() + mock_internet_analysis.assert_called_once() mock_findings_analysis.assert_called_once() mock_drop_subgraph.assert_called_once_with( database="tenant-db", @@ -207,6 +211,7 @@ class TestAttackPathsRun: patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run"), patch("tasks.jobs.attack_paths.scan.cartography_analysis.run"), patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes"), + patch("tasks.jobs.attack_paths.scan.internet.analysis"), patch("tasks.jobs.attack_paths.scan.findings.analysis"), patch( "tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan", @@ -757,3 +762,45 @@ class TestAttackPathsFindingsHelpers: findings_module.load_findings(mock_session, empty_gen(), provider, config) mock_session.run.assert_not_called() + + +class TestInternetAnalysis: + def _make_provider_and_config(self): + provider = MagicMock() + provider.provider = "aws" + provider.uid = "123456789012" + config = SimpleNamespace(update_tag=1234567890) + return provider, config + + def test_analysis_creates_node_and_relationships(self): + """Verify both Cypher statements are executed and relationship count returned.""" + mock_session = MagicMock() + mock_result = MagicMock() + mock_result.single.return_value = {"relationships_merged": 3} + mock_session.run.side_effect = [None, mock_result] + provider, config = self._make_provider_and_config() + + with patch( + "tasks.jobs.attack_paths.internet.get_root_node_label", + return_value="AWSAccount", + ): + result = internet_module.analysis(mock_session, provider, config) + + assert mock_session.run.call_count == 2 + assert result == 3 + + def test_analysis_zero_exposed_resources(self): + """When no resources are exposed, zero relationships are created.""" + mock_session = MagicMock() + mock_result = MagicMock() + mock_result.single.return_value = {"relationships_merged": 0} + mock_session.run.side_effect = [None, mock_result] + provider, config = self._make_provider_and_config() + + with patch( + "tasks.jobs.attack_paths.internet.get_root_node_label", + return_value="AWSAccount", + ): + result = internet_module.analysis(mock_session, provider, config) + + assert result == 0