feat(attack-paths): scans add tenant and provider related labels to nodes (#10308)

This commit is contained in:
Josema Camacho
2026-03-16 16:31:15 +01:00
committed by GitHub
parent 1cf6eaa0b7
commit 787a339cd9
9 changed files with 66 additions and 7 deletions

View File

@@ -9,6 +9,7 @@ All notable changes to the **Prowler API** are documented in this file.
- Attack Paths: Migrate network exposure queries from APOC to standard openCypher for Neo4j and Neptune compatibility [(#10266)](https://github.com/prowler-cloud/prowler/pull/10266)
- Attack Paths: Complete migration to private graph labels and properties, removing deprecated dual-write support [(#10268)](https://github.com/prowler-cloud/prowler/pull/10268)
- `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293)
- Attack Paths: Added tenant and provider related labels to the nodes so they can be easily filtered on custom queries [(#10308)](https://github.com/prowler-cloud/prowler/pull/10308)
### 🐞 Fixed

2
api/poetry.lock generated
View File

@@ -6730,7 +6730,7 @@ tzlocal = "5.3.1"
type = "git"
url = "https://github.com/prowler-cloud/prowler.git"
reference = "master"
resolved_reference = "6962622fd21401886371add25463f77228cd9c1f"
resolved_reference = "b31145616064bd6727139777dca1cea9b977346a"
[[package]]
name = "psutil"

View File

@@ -17,6 +17,7 @@ from tasks.jobs.attack_paths.config import (
INTERNAL_LABELS,
INTERNAL_PROPERTIES,
PROVIDER_ID_PROPERTY,
is_dynamic_isolation_label,
)
logger = logging.getLogger(BackendLogger.API)
@@ -305,7 +306,11 @@ def _serialize_graph(graph, provider_id: str) -> dict[str, Any]:
def _filter_labels(labels: Iterable[str]) -> list[str]:
return [label for label in labels if label not in INTERNAL_LABELS]
return [
label
for label in labels
if label not in INTERNAL_LABELS and not is_dynamic_isolation_label(label)
]
def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]:

View File

@@ -363,6 +363,14 @@ def test_serialize_properties_filters_internal_fields():
assert result == {"name": "prod"}
def test_filter_labels_strips_dynamic_isolation_labels():
labels = ["AWSRole", "_Tenant_abc123", "_Provider_def456", "_ProviderResource"]
result = views_helpers._filter_labels(labels)
assert result == ["AWSRole"]
def test_serialize_graph_as_text_node_without_properties():
graph = {
"nodes": [{"id": "n1", "labels": ["AWSAccount"], "properties": {}}],

View File

@@ -1,5 +1,6 @@
from dataclasses import dataclass
from typing import Callable
from uuid import UUID
from config.env import env
@@ -17,6 +18,12 @@ INTERNET_NODE_LABEL = "Internet"
PROWLER_FINDING_LABEL = "ProwlerFinding"
PROVIDER_RESOURCE_LABEL = "_ProviderResource"
# Dynamic isolation labels that contain entity UUIDs and are added to every synced node during sync
# Format: _Tenant_{uuid_no_hyphens}, _Provider_{uuid_no_hyphens}
TENANT_LABEL_PREFIX = "_Tenant_"
PROVIDER_LABEL_PREFIX = "_Provider_"
DYNAMIC_ISOLATION_PREFIXES = [TENANT_LABEL_PREFIX, PROVIDER_LABEL_PREFIX]
@dataclass(frozen=True)
class ProviderConfig:
@@ -107,3 +114,27 @@ def get_provider_resource_label(provider_type: str) -> str:
"""Get the resource label for a provider type (e.g., `_AWSResource`)."""
config = PROVIDER_CONFIGS.get(provider_type)
return config.resource_label if config else "_UnknownProviderResource"
# Dynamic Isolation Label Helpers
# --------------------------------
def _normalize_uuid(value: str | UUID) -> str:
"""Strip hyphens from a UUID string for use in Neo4j labels."""
return str(value).replace("-", "")
def get_tenant_label(tenant_id: str | UUID) -> str:
"""Get the Neo4j label for a tenant (e.g., `_Tenant_019c41ee7df37deca684d839f95619f8`)."""
return f"{TENANT_LABEL_PREFIX}{_normalize_uuid(tenant_id)}"
def get_provider_label(provider_id: str | UUID) -> str:
"""Get the Neo4j label for a provider (e.g., `_Provider_019c41ee7df37deca684d839f95619f8`)."""
return f"{PROVIDER_LABEL_PREFIX}{_normalize_uuid(provider_id)}"
def is_dynamic_isolation_label(label: str) -> bool:
"""Check if a label is a dynamic tenant/provider isolation label."""
return any(label.startswith(prefix) for prefix in DYNAMIC_ISOLATION_PREFIXES)

View File

@@ -237,6 +237,7 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
sync.sync_graph(
source_database=tmp_database_name,
target_database=tenant_database_name,
tenant_id=str(prowler_api_provider.tenant_id),
provider_id=str(prowler_api_provider.id),
)
db_utils.set_graph_data_ready(attack_paths_scan, True)

View File

@@ -15,6 +15,8 @@ from tasks.jobs.attack_paths.config import (
BATCH_SIZE,
PROVIDER_ISOLATION_PROPERTIES,
PROVIDER_RESOURCE_LABEL,
get_provider_label,
get_tenant_label,
)
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
from tasks.jobs.attack_paths.queries import (
@@ -36,6 +38,7 @@ def create_sync_indexes(neo4j_session) -> None:
def sync_graph(
source_database: str,
target_database: str,
tenant_id: str,
provider_id: str,
) -> dict[str, int]:
"""
@@ -44,6 +47,7 @@ def sync_graph(
Args:
`source_database`: The temporary scan database
`target_database`: The tenant database
`tenant_id`: The tenant ID for isolation
`provider_id`: The provider ID for isolation
Returns:
@@ -52,6 +56,7 @@ def sync_graph(
nodes_synced = sync_nodes(
source_database,
target_database,
tenant_id,
provider_id,
)
relationships_synced = sync_relationships(
@@ -69,12 +74,14 @@ def sync_graph(
def sync_nodes(
source_database: str,
target_database: str,
tenant_id: str,
provider_id: str,
) -> int:
"""
Sync nodes from source to target database.
Adds `_ProviderResource` label and `_provider_id` property to all nodes.
Also adds dynamic `_Tenant_{id}` and `_Provider_{id}` isolation labels.
"""
last_id = -1
total_synced = 0
@@ -112,6 +119,8 @@ def sync_nodes(
for labels, batch in grouped.items():
label_set = set(labels)
label_set.add(PROVIDER_RESOURCE_LABEL)
label_set.add(get_tenant_label(tenant_id))
label_set.add(get_provider_label(provider_id))
node_labels = ":".join(f"`{label}`" for label in sorted(label_set))
query = render_cypher_template(

View File

@@ -151,6 +151,7 @@ class TestAttackPathsRun:
mock_sync.assert_called_once_with(
source_database="db-scan-id",
target_database="tenant-db",
tenant_id=str(provider.tenant_id),
provider_id=str(provider.id),
)
mock_get_ingestion.assert_called_once_with(provider.provider)
@@ -1118,12 +1119,15 @@ class TestSyncNodes:
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[source_ctx, target_ctx],
):
total = sync_module.sync_nodes("source-db", "target-db", "prov-1")
total = sync_module.sync_nodes(
"source-db", "target-db", "tenant-1", "prov-1"
)
assert total == 1
query = mock_target_session.run.call_args.args[0]
assert "_ProviderResource" in query
assert "ProviderResource" not in query.replace("_ProviderResource", "")
assert "_Tenant_tenant1" in query
assert "_Provider_prov1" in query
class TestInternetAnalysis:

View File

@@ -252,13 +252,13 @@ https://raw.githubusercontent.com/cartography-cncf/cartography/refs/tags/0.126.0
**IMPORTANT**: Always match the schema version to the dependency version in `pyproject.toml`. Using master/main may reference node labels or properties that don't exist in the deployed version.
**Additional Prowler Labels**: The Attack Paths sync task adds extra labels:
**Additional Prowler Labels**: The Attack Paths sync task adds labels that queries can reference:
- `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties
- `ProviderResource` - Generic resource marker
- `{Provider}Resource` - Provider-specific marker (e.g., `AWSResource`)
- `Internet` - Internet sentinel node with `_provider_id` property (used in network exposure queries)
Other internal labels (`_ProviderResource`, `_AWSResource`, `_Tenant_*`, `_Provider_*`) exist for isolation but should never be used in queries.
These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`.
### 3. Consult the Schema for Available Data