diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 45e520dc16..0e09dd8ebb 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -9,6 +9,7 @@ All notable changes to the **Prowler API** are documented in this file. - Attack Paths: Migrate network exposure queries from APOC to standard openCypher for Neo4j and Neptune compatibility [(#10266)](https://github.com/prowler-cloud/prowler/pull/10266) - Attack Paths: Complete migration to private graph labels and properties, removing deprecated dual-write support [(#10268)](https://github.com/prowler-cloud/prowler/pull/10268) - `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293) +- Attack Paths: Added tenant and provider related labels to the nodes so they can be easily filtered on custom queries [(#10308)](https://github.com/prowler-cloud/prowler/pull/10308) ### 🐞 Fixed diff --git a/api/poetry.lock b/api/poetry.lock index dba0433896..6fbdb874bc 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -6730,7 +6730,7 @@ tzlocal = "5.3.1" type = "git" url = "https://github.com/prowler-cloud/prowler.git" reference = "master" -resolved_reference = "6962622fd21401886371add25463f77228cd9c1f" +resolved_reference = "b31145616064bd6727139777dca1cea9b977346a" [[package]] name = "psutil" diff --git a/api/src/backend/api/attack_paths/views_helpers.py b/api/src/backend/api/attack_paths/views_helpers.py index 38cc0ec9e2..8e3ee203b4 100644 --- a/api/src/backend/api/attack_paths/views_helpers.py +++ b/api/src/backend/api/attack_paths/views_helpers.py @@ -17,6 +17,7 @@ from tasks.jobs.attack_paths.config import ( INTERNAL_LABELS, INTERNAL_PROPERTIES, PROVIDER_ID_PROPERTY, + is_dynamic_isolation_label, ) logger = logging.getLogger(BackendLogger.API) @@ -305,7 +306,11 @@ def _serialize_graph(graph, provider_id: str) -> dict[str, Any]: def _filter_labels(labels: Iterable[str]) -> list[str]: - return [label for label in labels if label not in INTERNAL_LABELS] + return [ + label + for label in labels + if label not in INTERNAL_LABELS and not is_dynamic_isolation_label(label) + ] def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]: diff --git a/api/src/backend/api/tests/test_attack_paths.py b/api/src/backend/api/tests/test_attack_paths.py index 348f0dd124..442a9c5dd2 100644 --- a/api/src/backend/api/tests/test_attack_paths.py +++ b/api/src/backend/api/tests/test_attack_paths.py @@ -363,6 +363,14 @@ def test_serialize_properties_filters_internal_fields(): assert result == {"name": "prod"} +def test_filter_labels_strips_dynamic_isolation_labels(): + labels = ["AWSRole", "_Tenant_abc123", "_Provider_def456", "_ProviderResource"] + + result = views_helpers._filter_labels(labels) + + assert result == ["AWSRole"] + + def test_serialize_graph_as_text_node_without_properties(): graph = { "nodes": [{"id": "n1", "labels": ["AWSAccount"], "properties": {}}], diff --git a/api/src/backend/tasks/jobs/attack_paths/config.py b/api/src/backend/tasks/jobs/attack_paths/config.py index 2f259721ed..be9305d8a5 100644 --- a/api/src/backend/tasks/jobs/attack_paths/config.py +++ b/api/src/backend/tasks/jobs/attack_paths/config.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from typing import Callable +from uuid import UUID from config.env import env @@ -17,6 +18,12 @@ INTERNET_NODE_LABEL = "Internet" PROWLER_FINDING_LABEL = "ProwlerFinding" PROVIDER_RESOURCE_LABEL = "_ProviderResource" +# Dynamic isolation labels that contain entity UUIDs and are added to every synced node during sync +# Format: _Tenant_{uuid_no_hyphens}, _Provider_{uuid_no_hyphens} +TENANT_LABEL_PREFIX = "_Tenant_" +PROVIDER_LABEL_PREFIX = "_Provider_" +DYNAMIC_ISOLATION_PREFIXES = [TENANT_LABEL_PREFIX, PROVIDER_LABEL_PREFIX] + @dataclass(frozen=True) class ProviderConfig: @@ -107,3 +114,27 @@ def get_provider_resource_label(provider_type: str) -> str: """Get the resource label for a provider type (e.g., `_AWSResource`).""" config = PROVIDER_CONFIGS.get(provider_type) return config.resource_label if config else "_UnknownProviderResource" + + +# Dynamic Isolation Label Helpers +# -------------------------------- + + +def _normalize_uuid(value: str | UUID) -> str: + """Strip hyphens from a UUID string for use in Neo4j labels.""" + return str(value).replace("-", "") + + +def get_tenant_label(tenant_id: str | UUID) -> str: + """Get the Neo4j label for a tenant (e.g., `_Tenant_019c41ee7df37deca684d839f95619f8`).""" + return f"{TENANT_LABEL_PREFIX}{_normalize_uuid(tenant_id)}" + + +def get_provider_label(provider_id: str | UUID) -> str: + """Get the Neo4j label for a provider (e.g., `_Provider_019c41ee7df37deca684d839f95619f8`).""" + return f"{PROVIDER_LABEL_PREFIX}{_normalize_uuid(provider_id)}" + + +def is_dynamic_isolation_label(label: str) -> bool: + """Check if a label is a dynamic tenant/provider isolation label.""" + return any(label.startswith(prefix) for prefix in DYNAMIC_ISOLATION_PREFIXES) diff --git a/api/src/backend/tasks/jobs/attack_paths/scan.py b/api/src/backend/tasks/jobs/attack_paths/scan.py index f12736d807..6624680a5e 100644 --- a/api/src/backend/tasks/jobs/attack_paths/scan.py +++ b/api/src/backend/tasks/jobs/attack_paths/scan.py @@ -237,6 +237,7 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]: sync.sync_graph( source_database=tmp_database_name, target_database=tenant_database_name, + tenant_id=str(prowler_api_provider.tenant_id), provider_id=str(prowler_api_provider.id), ) db_utils.set_graph_data_ready(attack_paths_scan, True) diff --git a/api/src/backend/tasks/jobs/attack_paths/sync.py b/api/src/backend/tasks/jobs/attack_paths/sync.py index 08d9432827..c0b1799b9a 100644 --- a/api/src/backend/tasks/jobs/attack_paths/sync.py +++ b/api/src/backend/tasks/jobs/attack_paths/sync.py @@ -15,6 +15,8 @@ from tasks.jobs.attack_paths.config import ( BATCH_SIZE, PROVIDER_ISOLATION_PROPERTIES, PROVIDER_RESOURCE_LABEL, + get_provider_label, + get_tenant_label, ) from tasks.jobs.attack_paths.indexes import IndexType, create_indexes from tasks.jobs.attack_paths.queries import ( @@ -36,6 +38,7 @@ def create_sync_indexes(neo4j_session) -> None: def sync_graph( source_database: str, target_database: str, + tenant_id: str, provider_id: str, ) -> dict[str, int]: """ @@ -44,6 +47,7 @@ def sync_graph( Args: `source_database`: The temporary scan database `target_database`: The tenant database + `tenant_id`: The tenant ID for isolation `provider_id`: The provider ID for isolation Returns: @@ -52,6 +56,7 @@ def sync_graph( nodes_synced = sync_nodes( source_database, target_database, + tenant_id, provider_id, ) relationships_synced = sync_relationships( @@ -69,12 +74,14 @@ def sync_graph( def sync_nodes( source_database: str, target_database: str, + tenant_id: str, provider_id: str, ) -> int: """ Sync nodes from source to target database. Adds `_ProviderResource` label and `_provider_id` property to all nodes. + Also adds dynamic `_Tenant_{id}` and `_Provider_{id}` isolation labels. """ last_id = -1 total_synced = 0 @@ -112,6 +119,8 @@ def sync_nodes( for labels, batch in grouped.items(): label_set = set(labels) label_set.add(PROVIDER_RESOURCE_LABEL) + label_set.add(get_tenant_label(tenant_id)) + label_set.add(get_provider_label(provider_id)) node_labels = ":".join(f"`{label}`" for label in sorted(label_set)) query = render_cypher_template( diff --git a/api/src/backend/tasks/tests/test_attack_paths_scan.py b/api/src/backend/tasks/tests/test_attack_paths_scan.py index 42da12411f..cd4dfcffd9 100644 --- a/api/src/backend/tasks/tests/test_attack_paths_scan.py +++ b/api/src/backend/tasks/tests/test_attack_paths_scan.py @@ -151,6 +151,7 @@ class TestAttackPathsRun: mock_sync.assert_called_once_with( source_database="db-scan-id", target_database="tenant-db", + tenant_id=str(provider.tenant_id), provider_id=str(provider.id), ) mock_get_ingestion.assert_called_once_with(provider.provider) @@ -1118,12 +1119,15 @@ class TestSyncNodes: "tasks.jobs.attack_paths.sync.graph_database.get_session", side_effect=[source_ctx, target_ctx], ): - total = sync_module.sync_nodes("source-db", "target-db", "prov-1") + total = sync_module.sync_nodes( + "source-db", "target-db", "tenant-1", "prov-1" + ) assert total == 1 query = mock_target_session.run.call_args.args[0] assert "_ProviderResource" in query - assert "ProviderResource" not in query.replace("_ProviderResource", "") + assert "_Tenant_tenant1" in query + assert "_Provider_prov1" in query class TestInternetAnalysis: diff --git a/skills/prowler-attack-paths-query/SKILL.md b/skills/prowler-attack-paths-query/SKILL.md index 78f53b4c0d..dcbb2d406b 100644 --- a/skills/prowler-attack-paths-query/SKILL.md +++ b/skills/prowler-attack-paths-query/SKILL.md @@ -252,13 +252,13 @@ https://raw.githubusercontent.com/cartography-cncf/cartography/refs/tags/0.126.0 **IMPORTANT**: Always match the schema version to the dependency version in `pyproject.toml`. Using master/main may reference node labels or properties that don't exist in the deployed version. -**Additional Prowler Labels**: The Attack Paths sync task adds extra labels: +**Additional Prowler Labels**: The Attack Paths sync task adds labels that queries can reference: - `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties -- `ProviderResource` - Generic resource marker -- `{Provider}Resource` - Provider-specific marker (e.g., `AWSResource`) - `Internet` - Internet sentinel node with `_provider_id` property (used in network exposure queries) +Other internal labels (`_ProviderResource`, `_AWSResource`, `_Tenant_*`, `_Provider_*`) exist for isolation but should never be used in queries. + These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`. ### 3. Consult the Schema for Available Data