mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-03-21 18:58:04 +00:00
feat(attack-paths): scans add tenant and provider related labels to nodes (#10308)
This commit is contained in:
@@ -9,6 +9,7 @@ All notable changes to the **Prowler API** are documented in this file.
|
|||||||
- Attack Paths: Migrate network exposure queries from APOC to standard openCypher for Neo4j and Neptune compatibility [(#10266)](https://github.com/prowler-cloud/prowler/pull/10266)
|
- Attack Paths: Migrate network exposure queries from APOC to standard openCypher for Neo4j and Neptune compatibility [(#10266)](https://github.com/prowler-cloud/prowler/pull/10266)
|
||||||
- Attack Paths: Complete migration to private graph labels and properties, removing deprecated dual-write support [(#10268)](https://github.com/prowler-cloud/prowler/pull/10268)
|
- Attack Paths: Complete migration to private graph labels and properties, removing deprecated dual-write support [(#10268)](https://github.com/prowler-cloud/prowler/pull/10268)
|
||||||
- `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293)
|
- `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293)
|
||||||
|
- Attack Paths: Added tenant and provider related labels to the nodes so they can be easily filtered on custom queries [(#10308)](https://github.com/prowler-cloud/prowler/pull/10308)
|
||||||
|
|
||||||
### 🐞 Fixed
|
### 🐞 Fixed
|
||||||
|
|
||||||
|
|||||||
2
api/poetry.lock
generated
2
api/poetry.lock
generated
@@ -6730,7 +6730,7 @@ tzlocal = "5.3.1"
|
|||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/prowler-cloud/prowler.git"
|
url = "https://github.com/prowler-cloud/prowler.git"
|
||||||
reference = "master"
|
reference = "master"
|
||||||
resolved_reference = "6962622fd21401886371add25463f77228cd9c1f"
|
resolved_reference = "b31145616064bd6727139777dca1cea9b977346a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "psutil"
|
name = "psutil"
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from tasks.jobs.attack_paths.config import (
|
|||||||
INTERNAL_LABELS,
|
INTERNAL_LABELS,
|
||||||
INTERNAL_PROPERTIES,
|
INTERNAL_PROPERTIES,
|
||||||
PROVIDER_ID_PROPERTY,
|
PROVIDER_ID_PROPERTY,
|
||||||
|
is_dynamic_isolation_label,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(BackendLogger.API)
|
logger = logging.getLogger(BackendLogger.API)
|
||||||
@@ -305,7 +306,11 @@ def _serialize_graph(graph, provider_id: str) -> dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def _filter_labels(labels: Iterable[str]) -> list[str]:
|
def _filter_labels(labels: Iterable[str]) -> list[str]:
|
||||||
return [label for label in labels if label not in INTERNAL_LABELS]
|
return [
|
||||||
|
label
|
||||||
|
for label in labels
|
||||||
|
if label not in INTERNAL_LABELS and not is_dynamic_isolation_label(label)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]:
|
def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
|||||||
@@ -363,6 +363,14 @@ def test_serialize_properties_filters_internal_fields():
|
|||||||
assert result == {"name": "prod"}
|
assert result == {"name": "prod"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_labels_strips_dynamic_isolation_labels():
|
||||||
|
labels = ["AWSRole", "_Tenant_abc123", "_Provider_def456", "_ProviderResource"]
|
||||||
|
|
||||||
|
result = views_helpers._filter_labels(labels)
|
||||||
|
|
||||||
|
assert result == ["AWSRole"]
|
||||||
|
|
||||||
|
|
||||||
def test_serialize_graph_as_text_node_without_properties():
|
def test_serialize_graph_as_text_node_without_properties():
|
||||||
graph = {
|
graph = {
|
||||||
"nodes": [{"id": "n1", "labels": ["AWSAccount"], "properties": {}}],
|
"nodes": [{"id": "n1", "labels": ["AWSAccount"], "properties": {}}],
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
from config.env import env
|
from config.env import env
|
||||||
|
|
||||||
@@ -17,6 +18,12 @@ INTERNET_NODE_LABEL = "Internet"
|
|||||||
PROWLER_FINDING_LABEL = "ProwlerFinding"
|
PROWLER_FINDING_LABEL = "ProwlerFinding"
|
||||||
PROVIDER_RESOURCE_LABEL = "_ProviderResource"
|
PROVIDER_RESOURCE_LABEL = "_ProviderResource"
|
||||||
|
|
||||||
|
# Dynamic isolation labels that contain entity UUIDs and are added to every synced node during sync
|
||||||
|
# Format: _Tenant_{uuid_no_hyphens}, _Provider_{uuid_no_hyphens}
|
||||||
|
TENANT_LABEL_PREFIX = "_Tenant_"
|
||||||
|
PROVIDER_LABEL_PREFIX = "_Provider_"
|
||||||
|
DYNAMIC_ISOLATION_PREFIXES = [TENANT_LABEL_PREFIX, PROVIDER_LABEL_PREFIX]
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class ProviderConfig:
|
class ProviderConfig:
|
||||||
@@ -107,3 +114,27 @@ def get_provider_resource_label(provider_type: str) -> str:
|
|||||||
"""Get the resource label for a provider type (e.g., `_AWSResource`)."""
|
"""Get the resource label for a provider type (e.g., `_AWSResource`)."""
|
||||||
config = PROVIDER_CONFIGS.get(provider_type)
|
config = PROVIDER_CONFIGS.get(provider_type)
|
||||||
return config.resource_label if config else "_UnknownProviderResource"
|
return config.resource_label if config else "_UnknownProviderResource"
|
||||||
|
|
||||||
|
|
||||||
|
# Dynamic Isolation Label Helpers
|
||||||
|
# --------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_uuid(value: str | UUID) -> str:
|
||||||
|
"""Strip hyphens from a UUID string for use in Neo4j labels."""
|
||||||
|
return str(value).replace("-", "")
|
||||||
|
|
||||||
|
|
||||||
|
def get_tenant_label(tenant_id: str | UUID) -> str:
|
||||||
|
"""Get the Neo4j label for a tenant (e.g., `_Tenant_019c41ee7df37deca684d839f95619f8`)."""
|
||||||
|
return f"{TENANT_LABEL_PREFIX}{_normalize_uuid(tenant_id)}"
|
||||||
|
|
||||||
|
|
||||||
|
def get_provider_label(provider_id: str | UUID) -> str:
|
||||||
|
"""Get the Neo4j label for a provider (e.g., `_Provider_019c41ee7df37deca684d839f95619f8`)."""
|
||||||
|
return f"{PROVIDER_LABEL_PREFIX}{_normalize_uuid(provider_id)}"
|
||||||
|
|
||||||
|
|
||||||
|
def is_dynamic_isolation_label(label: str) -> bool:
|
||||||
|
"""Check if a label is a dynamic tenant/provider isolation label."""
|
||||||
|
return any(label.startswith(prefix) for prefix in DYNAMIC_ISOLATION_PREFIXES)
|
||||||
|
|||||||
@@ -237,6 +237,7 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
|
|||||||
sync.sync_graph(
|
sync.sync_graph(
|
||||||
source_database=tmp_database_name,
|
source_database=tmp_database_name,
|
||||||
target_database=tenant_database_name,
|
target_database=tenant_database_name,
|
||||||
|
tenant_id=str(prowler_api_provider.tenant_id),
|
||||||
provider_id=str(prowler_api_provider.id),
|
provider_id=str(prowler_api_provider.id),
|
||||||
)
|
)
|
||||||
db_utils.set_graph_data_ready(attack_paths_scan, True)
|
db_utils.set_graph_data_ready(attack_paths_scan, True)
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ from tasks.jobs.attack_paths.config import (
|
|||||||
BATCH_SIZE,
|
BATCH_SIZE,
|
||||||
PROVIDER_ISOLATION_PROPERTIES,
|
PROVIDER_ISOLATION_PROPERTIES,
|
||||||
PROVIDER_RESOURCE_LABEL,
|
PROVIDER_RESOURCE_LABEL,
|
||||||
|
get_provider_label,
|
||||||
|
get_tenant_label,
|
||||||
)
|
)
|
||||||
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
|
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
|
||||||
from tasks.jobs.attack_paths.queries import (
|
from tasks.jobs.attack_paths.queries import (
|
||||||
@@ -36,6 +38,7 @@ def create_sync_indexes(neo4j_session) -> None:
|
|||||||
def sync_graph(
|
def sync_graph(
|
||||||
source_database: str,
|
source_database: str,
|
||||||
target_database: str,
|
target_database: str,
|
||||||
|
tenant_id: str,
|
||||||
provider_id: str,
|
provider_id: str,
|
||||||
) -> dict[str, int]:
|
) -> dict[str, int]:
|
||||||
"""
|
"""
|
||||||
@@ -44,6 +47,7 @@ def sync_graph(
|
|||||||
Args:
|
Args:
|
||||||
`source_database`: The temporary scan database
|
`source_database`: The temporary scan database
|
||||||
`target_database`: The tenant database
|
`target_database`: The tenant database
|
||||||
|
`tenant_id`: The tenant ID for isolation
|
||||||
`provider_id`: The provider ID for isolation
|
`provider_id`: The provider ID for isolation
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@@ -52,6 +56,7 @@ def sync_graph(
|
|||||||
nodes_synced = sync_nodes(
|
nodes_synced = sync_nodes(
|
||||||
source_database,
|
source_database,
|
||||||
target_database,
|
target_database,
|
||||||
|
tenant_id,
|
||||||
provider_id,
|
provider_id,
|
||||||
)
|
)
|
||||||
relationships_synced = sync_relationships(
|
relationships_synced = sync_relationships(
|
||||||
@@ -69,12 +74,14 @@ def sync_graph(
|
|||||||
def sync_nodes(
|
def sync_nodes(
|
||||||
source_database: str,
|
source_database: str,
|
||||||
target_database: str,
|
target_database: str,
|
||||||
|
tenant_id: str,
|
||||||
provider_id: str,
|
provider_id: str,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""
|
"""
|
||||||
Sync nodes from source to target database.
|
Sync nodes from source to target database.
|
||||||
|
|
||||||
Adds `_ProviderResource` label and `_provider_id` property to all nodes.
|
Adds `_ProviderResource` label and `_provider_id` property to all nodes.
|
||||||
|
Also adds dynamic `_Tenant_{id}` and `_Provider_{id}` isolation labels.
|
||||||
"""
|
"""
|
||||||
last_id = -1
|
last_id = -1
|
||||||
total_synced = 0
|
total_synced = 0
|
||||||
@@ -112,6 +119,8 @@ def sync_nodes(
|
|||||||
for labels, batch in grouped.items():
|
for labels, batch in grouped.items():
|
||||||
label_set = set(labels)
|
label_set = set(labels)
|
||||||
label_set.add(PROVIDER_RESOURCE_LABEL)
|
label_set.add(PROVIDER_RESOURCE_LABEL)
|
||||||
|
label_set.add(get_tenant_label(tenant_id))
|
||||||
|
label_set.add(get_provider_label(provider_id))
|
||||||
node_labels = ":".join(f"`{label}`" for label in sorted(label_set))
|
node_labels = ":".join(f"`{label}`" for label in sorted(label_set))
|
||||||
|
|
||||||
query = render_cypher_template(
|
query = render_cypher_template(
|
||||||
|
|||||||
@@ -151,6 +151,7 @@ class TestAttackPathsRun:
|
|||||||
mock_sync.assert_called_once_with(
|
mock_sync.assert_called_once_with(
|
||||||
source_database="db-scan-id",
|
source_database="db-scan-id",
|
||||||
target_database="tenant-db",
|
target_database="tenant-db",
|
||||||
|
tenant_id=str(provider.tenant_id),
|
||||||
provider_id=str(provider.id),
|
provider_id=str(provider.id),
|
||||||
)
|
)
|
||||||
mock_get_ingestion.assert_called_once_with(provider.provider)
|
mock_get_ingestion.assert_called_once_with(provider.provider)
|
||||||
@@ -1118,12 +1119,15 @@ class TestSyncNodes:
|
|||||||
"tasks.jobs.attack_paths.sync.graph_database.get_session",
|
"tasks.jobs.attack_paths.sync.graph_database.get_session",
|
||||||
side_effect=[source_ctx, target_ctx],
|
side_effect=[source_ctx, target_ctx],
|
||||||
):
|
):
|
||||||
total = sync_module.sync_nodes("source-db", "target-db", "prov-1")
|
total = sync_module.sync_nodes(
|
||||||
|
"source-db", "target-db", "tenant-1", "prov-1"
|
||||||
|
)
|
||||||
|
|
||||||
assert total == 1
|
assert total == 1
|
||||||
query = mock_target_session.run.call_args.args[0]
|
query = mock_target_session.run.call_args.args[0]
|
||||||
assert "_ProviderResource" in query
|
assert "_ProviderResource" in query
|
||||||
assert "ProviderResource" not in query.replace("_ProviderResource", "")
|
assert "_Tenant_tenant1" in query
|
||||||
|
assert "_Provider_prov1" in query
|
||||||
|
|
||||||
|
|
||||||
class TestInternetAnalysis:
|
class TestInternetAnalysis:
|
||||||
|
|||||||
@@ -252,13 +252,13 @@ https://raw.githubusercontent.com/cartography-cncf/cartography/refs/tags/0.126.0
|
|||||||
|
|
||||||
**IMPORTANT**: Always match the schema version to the dependency version in `pyproject.toml`. Using master/main may reference node labels or properties that don't exist in the deployed version.
|
**IMPORTANT**: Always match the schema version to the dependency version in `pyproject.toml`. Using master/main may reference node labels or properties that don't exist in the deployed version.
|
||||||
|
|
||||||
**Additional Prowler Labels**: The Attack Paths sync task adds extra labels:
|
**Additional Prowler Labels**: The Attack Paths sync task adds labels that queries can reference:
|
||||||
|
|
||||||
- `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties
|
- `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties
|
||||||
- `ProviderResource` - Generic resource marker
|
|
||||||
- `{Provider}Resource` - Provider-specific marker (e.g., `AWSResource`)
|
|
||||||
- `Internet` - Internet sentinel node with `_provider_id` property (used in network exposure queries)
|
- `Internet` - Internet sentinel node with `_provider_id` property (used in network exposure queries)
|
||||||
|
|
||||||
|
Other internal labels (`_ProviderResource`, `_AWSResource`, `_Tenant_*`, `_Provider_*`) exist for isolation but should never be used in queries.
|
||||||
|
|
||||||
These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`.
|
These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`.
|
||||||
|
|
||||||
### 3. Consult the Schema for Available Data
|
### 3. Consult the Schema for Available Data
|
||||||
|
|||||||
Reference in New Issue
Block a user