mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-03-21 18:58:04 +00:00
feat(attack-paths): scans add tenant and provider related labels to nodes (#10308)
This commit is contained in:
@@ -9,6 +9,7 @@ All notable changes to the **Prowler API** are documented in this file.
|
||||
- Attack Paths: Migrate network exposure queries from APOC to standard openCypher for Neo4j and Neptune compatibility [(#10266)](https://github.com/prowler-cloud/prowler/pull/10266)
|
||||
- Attack Paths: Complete migration to private graph labels and properties, removing deprecated dual-write support [(#10268)](https://github.com/prowler-cloud/prowler/pull/10268)
|
||||
- `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293)
|
||||
- Attack Paths: Added tenant and provider related labels to the nodes so they can be easily filtered on custom queries [(#10308)](https://github.com/prowler-cloud/prowler/pull/10308)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
|
||||
2
api/poetry.lock
generated
2
api/poetry.lock
generated
@@ -6730,7 +6730,7 @@ tzlocal = "5.3.1"
|
||||
type = "git"
|
||||
url = "https://github.com/prowler-cloud/prowler.git"
|
||||
reference = "master"
|
||||
resolved_reference = "6962622fd21401886371add25463f77228cd9c1f"
|
||||
resolved_reference = "b31145616064bd6727139777dca1cea9b977346a"
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
|
||||
@@ -17,6 +17,7 @@ from tasks.jobs.attack_paths.config import (
|
||||
INTERNAL_LABELS,
|
||||
INTERNAL_PROPERTIES,
|
||||
PROVIDER_ID_PROPERTY,
|
||||
is_dynamic_isolation_label,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(BackendLogger.API)
|
||||
@@ -305,7 +306,11 @@ def _serialize_graph(graph, provider_id: str) -> dict[str, Any]:
|
||||
|
||||
|
||||
def _filter_labels(labels: Iterable[str]) -> list[str]:
|
||||
return [label for label in labels if label not in INTERNAL_LABELS]
|
||||
return [
|
||||
label
|
||||
for label in labels
|
||||
if label not in INTERNAL_LABELS and not is_dynamic_isolation_label(label)
|
||||
]
|
||||
|
||||
|
||||
def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
@@ -363,6 +363,14 @@ def test_serialize_properties_filters_internal_fields():
|
||||
assert result == {"name": "prod"}
|
||||
|
||||
|
||||
def test_filter_labels_strips_dynamic_isolation_labels():
|
||||
labels = ["AWSRole", "_Tenant_abc123", "_Provider_def456", "_ProviderResource"]
|
||||
|
||||
result = views_helpers._filter_labels(labels)
|
||||
|
||||
assert result == ["AWSRole"]
|
||||
|
||||
|
||||
def test_serialize_graph_as_text_node_without_properties():
|
||||
graph = {
|
||||
"nodes": [{"id": "n1", "labels": ["AWSAccount"], "properties": {}}],
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable
|
||||
from uuid import UUID
|
||||
|
||||
from config.env import env
|
||||
|
||||
@@ -17,6 +18,12 @@ INTERNET_NODE_LABEL = "Internet"
|
||||
PROWLER_FINDING_LABEL = "ProwlerFinding"
|
||||
PROVIDER_RESOURCE_LABEL = "_ProviderResource"
|
||||
|
||||
# Dynamic isolation labels that contain entity UUIDs and are added to every synced node during sync
|
||||
# Format: _Tenant_{uuid_no_hyphens}, _Provider_{uuid_no_hyphens}
|
||||
TENANT_LABEL_PREFIX = "_Tenant_"
|
||||
PROVIDER_LABEL_PREFIX = "_Provider_"
|
||||
DYNAMIC_ISOLATION_PREFIXES = [TENANT_LABEL_PREFIX, PROVIDER_LABEL_PREFIX]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderConfig:
|
||||
@@ -107,3 +114,27 @@ def get_provider_resource_label(provider_type: str) -> str:
|
||||
"""Get the resource label for a provider type (e.g., `_AWSResource`)."""
|
||||
config = PROVIDER_CONFIGS.get(provider_type)
|
||||
return config.resource_label if config else "_UnknownProviderResource"
|
||||
|
||||
|
||||
# Dynamic Isolation Label Helpers
|
||||
# --------------------------------
|
||||
|
||||
|
||||
def _normalize_uuid(value: str | UUID) -> str:
|
||||
"""Strip hyphens from a UUID string for use in Neo4j labels."""
|
||||
return str(value).replace("-", "")
|
||||
|
||||
|
||||
def get_tenant_label(tenant_id: str | UUID) -> str:
|
||||
"""Get the Neo4j label for a tenant (e.g., `_Tenant_019c41ee7df37deca684d839f95619f8`)."""
|
||||
return f"{TENANT_LABEL_PREFIX}{_normalize_uuid(tenant_id)}"
|
||||
|
||||
|
||||
def get_provider_label(provider_id: str | UUID) -> str:
|
||||
"""Get the Neo4j label for a provider (e.g., `_Provider_019c41ee7df37deca684d839f95619f8`)."""
|
||||
return f"{PROVIDER_LABEL_PREFIX}{_normalize_uuid(provider_id)}"
|
||||
|
||||
|
||||
def is_dynamic_isolation_label(label: str) -> bool:
|
||||
"""Check if a label is a dynamic tenant/provider isolation label."""
|
||||
return any(label.startswith(prefix) for prefix in DYNAMIC_ISOLATION_PREFIXES)
|
||||
|
||||
@@ -237,6 +237,7 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
|
||||
sync.sync_graph(
|
||||
source_database=tmp_database_name,
|
||||
target_database=tenant_database_name,
|
||||
tenant_id=str(prowler_api_provider.tenant_id),
|
||||
provider_id=str(prowler_api_provider.id),
|
||||
)
|
||||
db_utils.set_graph_data_ready(attack_paths_scan, True)
|
||||
|
||||
@@ -15,6 +15,8 @@ from tasks.jobs.attack_paths.config import (
|
||||
BATCH_SIZE,
|
||||
PROVIDER_ISOLATION_PROPERTIES,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
get_provider_label,
|
||||
get_tenant_label,
|
||||
)
|
||||
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
|
||||
from tasks.jobs.attack_paths.queries import (
|
||||
@@ -36,6 +38,7 @@ def create_sync_indexes(neo4j_session) -> None:
|
||||
def sync_graph(
|
||||
source_database: str,
|
||||
target_database: str,
|
||||
tenant_id: str,
|
||||
provider_id: str,
|
||||
) -> dict[str, int]:
|
||||
"""
|
||||
@@ -44,6 +47,7 @@ def sync_graph(
|
||||
Args:
|
||||
`source_database`: The temporary scan database
|
||||
`target_database`: The tenant database
|
||||
`tenant_id`: The tenant ID for isolation
|
||||
`provider_id`: The provider ID for isolation
|
||||
|
||||
Returns:
|
||||
@@ -52,6 +56,7 @@ def sync_graph(
|
||||
nodes_synced = sync_nodes(
|
||||
source_database,
|
||||
target_database,
|
||||
tenant_id,
|
||||
provider_id,
|
||||
)
|
||||
relationships_synced = sync_relationships(
|
||||
@@ -69,12 +74,14 @@ def sync_graph(
|
||||
def sync_nodes(
|
||||
source_database: str,
|
||||
target_database: str,
|
||||
tenant_id: str,
|
||||
provider_id: str,
|
||||
) -> int:
|
||||
"""
|
||||
Sync nodes from source to target database.
|
||||
|
||||
Adds `_ProviderResource` label and `_provider_id` property to all nodes.
|
||||
Also adds dynamic `_Tenant_{id}` and `_Provider_{id}` isolation labels.
|
||||
"""
|
||||
last_id = -1
|
||||
total_synced = 0
|
||||
@@ -112,6 +119,8 @@ def sync_nodes(
|
||||
for labels, batch in grouped.items():
|
||||
label_set = set(labels)
|
||||
label_set.add(PROVIDER_RESOURCE_LABEL)
|
||||
label_set.add(get_tenant_label(tenant_id))
|
||||
label_set.add(get_provider_label(provider_id))
|
||||
node_labels = ":".join(f"`{label}`" for label in sorted(label_set))
|
||||
|
||||
query = render_cypher_template(
|
||||
|
||||
@@ -151,6 +151,7 @@ class TestAttackPathsRun:
|
||||
mock_sync.assert_called_once_with(
|
||||
source_database="db-scan-id",
|
||||
target_database="tenant-db",
|
||||
tenant_id=str(provider.tenant_id),
|
||||
provider_id=str(provider.id),
|
||||
)
|
||||
mock_get_ingestion.assert_called_once_with(provider.provider)
|
||||
@@ -1118,12 +1119,15 @@ class TestSyncNodes:
|
||||
"tasks.jobs.attack_paths.sync.graph_database.get_session",
|
||||
side_effect=[source_ctx, target_ctx],
|
||||
):
|
||||
total = sync_module.sync_nodes("source-db", "target-db", "prov-1")
|
||||
total = sync_module.sync_nodes(
|
||||
"source-db", "target-db", "tenant-1", "prov-1"
|
||||
)
|
||||
|
||||
assert total == 1
|
||||
query = mock_target_session.run.call_args.args[0]
|
||||
assert "_ProviderResource" in query
|
||||
assert "ProviderResource" not in query.replace("_ProviderResource", "")
|
||||
assert "_Tenant_tenant1" in query
|
||||
assert "_Provider_prov1" in query
|
||||
|
||||
|
||||
class TestInternetAnalysis:
|
||||
|
||||
@@ -252,13 +252,13 @@ https://raw.githubusercontent.com/cartography-cncf/cartography/refs/tags/0.126.0
|
||||
|
||||
**IMPORTANT**: Always match the schema version to the dependency version in `pyproject.toml`. Using master/main may reference node labels or properties that don't exist in the deployed version.
|
||||
|
||||
**Additional Prowler Labels**: The Attack Paths sync task adds extra labels:
|
||||
**Additional Prowler Labels**: The Attack Paths sync task adds labels that queries can reference:
|
||||
|
||||
- `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties
|
||||
- `ProviderResource` - Generic resource marker
|
||||
- `{Provider}Resource` - Provider-specific marker (e.g., `AWSResource`)
|
||||
- `Internet` - Internet sentinel node with `_provider_id` property (used in network exposure queries)
|
||||
|
||||
Other internal labels (`_ProviderResource`, `_AWSResource`, `_Tenant_*`, `_Provider_*`) exist for isolation but should never be used in queries.
|
||||
|
||||
These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`.
|
||||
|
||||
### 3. Consult the Schema for Available Data
|
||||
|
||||
Reference in New Issue
Block a user