mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-07-04 19:21:51 +00:00
fix(api): drop_subgraph deletes relationships then nodes to cut Neo4j memory (#11557)
This commit is contained in:
@@ -4,9 +4,11 @@ All notable changes to the **Prowler API** are documented in this file.
|
||||
|
||||
## [1.31.1] (Prowler UNRELEASED)
|
||||
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
- `compliance-overviews/attributes` now resolves the provider from the scan, so multi-provider universal frameworks (e.g. CSA CCM) return the check IDs of the scan's provider and Azure/GCP requirement details show their findings instead of appearing empty [(#11546)](https://github.com/prowler-cloud/prowler/pull/11546)
|
||||
- Attack Paths: `drop_subgraph` now deletes relationships first and then nodes in batches, using less memory on Neo4j when clearing a dense provider graph [(#11557)](https://github.com/prowler-cloud/prowler/pull/11557)
|
||||
- OCI scans now use API key credentials with the configured region instead of falling back to `/home/prowler/.oci/config` [(#11558)](https://github.com/prowler-cloud/prowler/pull/11558)
|
||||
|
||||
---
|
||||
|
||||
@@ -175,7 +175,8 @@ def drop_subgraph(database: str, provider_id: str) -> int:
|
||||
"""
|
||||
Delete all nodes for a provider from the tenant database.
|
||||
|
||||
Uses batched deletion to avoid memory issues with large graphs.
|
||||
Deletes relationships then nodes in batches (not `DETACH DELETE`) so a dense
|
||||
provider's graph cannot exceed Neo4j's transaction memory limit.
|
||||
Silently returns 0 if the database doesn't exist.
|
||||
"""
|
||||
provider_label = get_provider_label(provider_id)
|
||||
@@ -183,13 +184,28 @@ def drop_subgraph(database: str, provider_id: str) -> int:
|
||||
|
||||
try:
|
||||
with get_session(database) as session:
|
||||
# Phase 1: delete relationships incident to provider nodes in batches.
|
||||
deleted_count = 1
|
||||
while deleted_count > 0:
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (:`{provider_label}`)-[r]-()
|
||||
WITH DISTINCT r LIMIT $batch_size
|
||||
DELETE r
|
||||
RETURN COUNT(r) AS deleted_rels_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
)
|
||||
deleted_count = result.single().get("deleted_rels_count", 0)
|
||||
|
||||
# Phase 2: delete the now relationship-free nodes in batches.
|
||||
deleted_count = 1
|
||||
while deleted_count > 0:
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`)
|
||||
WITH n LIMIT $batch_size
|
||||
DETACH DELETE n
|
||||
DELETE n
|
||||
RETURN COUNT(n) AS deleted_nodes_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
|
||||
@@ -542,3 +542,84 @@ class TestHasProviderData:
|
||||
):
|
||||
with pytest.raises(db_module.GraphDatabaseQueryException):
|
||||
db_module.has_provider_data("db-tenant-abc", "provider-123")
|
||||
|
||||
|
||||
class TestDropSubgraph:
|
||||
"""Test drop_subgraph two-phase batched deletion of a provider's graph."""
|
||||
|
||||
@staticmethod
|
||||
def _result(count):
|
||||
result = MagicMock()
|
||||
result.single.return_value.get.return_value = count
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _session_ctx(session):
|
||||
ctx = MagicMock()
|
||||
ctx.__enter__.return_value = session
|
||||
ctx.__exit__.return_value = False
|
||||
return ctx
|
||||
|
||||
def test_deletes_relationships_then_nodes_in_batches(self):
|
||||
session = MagicMock()
|
||||
# Phase 1 (relationships): one full batch then empty.
|
||||
# Phase 2 (nodes): one full batch then empty.
|
||||
session.run.side_effect = [
|
||||
self._result(1000),
|
||||
self._result(0),
|
||||
self._result(1000),
|
||||
self._result(0),
|
||||
]
|
||||
|
||||
with patch(
|
||||
"api.attack_paths.database.get_session",
|
||||
return_value=self._session_ctx(session),
|
||||
):
|
||||
deleted = db_module.drop_subgraph("db-tenant-abc", "provider-123")
|
||||
|
||||
# Only phase-2 node counts contribute to the return value.
|
||||
assert deleted == 1000
|
||||
assert session.run.call_count == 4
|
||||
|
||||
queries = [call.args[0] for call in session.run.call_args_list]
|
||||
|
||||
# Regression guard: the memory blow-up was caused by DETACH DELETE.
|
||||
assert all("DETACH DELETE" not in query for query in queries)
|
||||
|
||||
rel_queries = [query for query in queries if "DELETE r" in query]
|
||||
node_queries = [query for query in queries if "DELETE n" in query]
|
||||
assert rel_queries and node_queries
|
||||
# DISTINCT avoids double-counting relationships matched from both ends.
|
||||
assert all("DISTINCT r" in query for query in rel_queries)
|
||||
|
||||
# Relationships must be fully drained before nodes are deleted.
|
||||
first_node = next(i for i, q in enumerate(queries) if "DELETE n" in q)
|
||||
last_rel = max(i for i, q in enumerate(queries) if "DELETE r" in q)
|
||||
assert last_rel < first_node
|
||||
|
||||
def test_returns_zero_when_database_not_found(self):
|
||||
session_ctx = MagicMock()
|
||||
session_ctx.__enter__.side_effect = db_module.GraphDatabaseQueryException(
|
||||
message="Database does not exist",
|
||||
code="Neo.ClientError.Database.DatabaseNotFound",
|
||||
)
|
||||
|
||||
with patch(
|
||||
"api.attack_paths.database.get_session",
|
||||
return_value=session_ctx,
|
||||
):
|
||||
assert db_module.drop_subgraph("db-tenant-gone", "provider-123") == 0
|
||||
|
||||
def test_raises_on_other_errors(self):
|
||||
session_ctx = MagicMock()
|
||||
session_ctx.__enter__.side_effect = db_module.GraphDatabaseQueryException(
|
||||
message="Connection refused",
|
||||
code="Neo.TransientError.General.UnknownError",
|
||||
)
|
||||
|
||||
with patch(
|
||||
"api.attack_paths.database.get_session",
|
||||
return_value=session_ctx,
|
||||
):
|
||||
with pytest.raises(db_module.GraphDatabaseQueryException):
|
||||
db_module.drop_subgraph("db-tenant-abc", "provider-123")
|
||||
|
||||
Reference in New Issue
Block a user