Compare commits

...

11 Commits

Author SHA1 Message Date
Prowler Bot
1b3ed72f0d chore(api): Update prowler dependency to v5.21 for release 5.21.0 (#10385)
Co-authored-by: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
2026-03-19 11:30:42 +01:00
Pepe Fagoaga
8317eff67b chore(changelog): prepare for v5.21.0 (#10380) 2026-03-19 11:09:51 +01:00
Rubén De la Torre Vico
5c4ee0bc48 chore(mcp): bump MCP server version to 0.5.0 (#10383) 2026-03-19 10:47:46 +01:00
rchotacode
0f2fdcfb3f chore(oraclecloud): Add Oracle Defense Cloud Support (#10376)
Co-authored-by: Ronan Chota <ronan.chota@saic.com>
Co-authored-by: Hugo Pereira Brito <101209179+HugoPBrito@users.noreply.github.com>
Co-authored-by: Hugo P.Brito <hugopbrit@gmail.com>
2026-03-19 09:41:58 +00:00
Rubén De la Torre Vico
11a8873155 feat(ui): add attack path custom query skill for Lighthouse AI (#10323)
Co-authored-by: alejandrobailo <alejandrobailo94@gmail.com>
2026-03-18 19:35:50 +01:00
Pedro Martín
5a3475bed3 feat(compliance): add SecNumCloud 3.2 for Oracle Cloud (#10371) 2026-03-18 12:28:38 +01:00
Pedro Martín
bc43eed736 feat(compliance): add SecNumCloud 3.2 for AlibabaCloud (#10370) 2026-03-18 10:40:58 +01:00
Rubén De la Torre Vico
8c1e69b542 feat(mcp): add cartography schema tool for attack paths (#10321) 2026-03-18 10:39:04 +01:00
Rubén De la Torre Vico
75c4f11475 feat(ui): add skills system infrastructure to Lighthouse AI (#10322)
Co-authored-by: alejandrobailo <alejandrobailo94@gmail.com>
2026-03-18 10:28:46 +01:00
Josema Camacho
1da10611e7 perf(attack-paths): reduce sync and findings memory usage with smaller batches and cursor iteration (#10359) 2026-03-18 10:08:30 +01:00
Andoni Alonso
e8aaf5266a chore(sdk): bump pygithub from 2.5.0 to 2.8.0 (#10353) 2026-03-18 09:58:40 +01:00
38 changed files with 4394 additions and 295 deletions

View File

@@ -72,12 +72,13 @@ jobs:
This PR updates the `OCI_COMMERCIAL_REGIONS` dictionary in `prowler/providers/oraclecloud/config.py` with the latest regions fetched from the OCI Identity API (`list_regions()`).
- Government regions (`OCI_GOVERNMENT_REGIONS`) are preserved unchanged
- DOD regions (`OCI_US_DOD_REGIONS`) are preserved unchanged
- Region display names are mapped from Oracle's official documentation
### Checklist
- [x] This is an automated update from OCI official sources
- [x] Government regions (us-langley-1, us-luke-1) preserved
- [x] Government regions (us-langley-1, us-luke-1) and DOD regions (us-gov-ashburn-1, us-gov-phoenix-1, us-gov-chicago-1) are preserved
- [x] No manual review of region data required
### License

View File

@@ -2,16 +2,17 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.22.0] (Prowler UNRELEASED)
## [1.22.0] (Prowler v5.21.0)
### 🚀 Added
- `CORS_ALLOWED_ORIGINS` configurable via environment variable [(#10355)](https://github.com/prowler-cloud/prowler/pull/10355)
- Attack Paths: Tenant and provider related labels to the nodes so they can be easily filtered on custom queries [(#10308)](https://github.com/prowler-cloud/prowler/pull/10308)
### 🔄 Changed
- Attack Paths: Complete migration to private graph labels and properties, removing deprecated dual-write support [(#10268)](https://github.com/prowler-cloud/prowler/pull/10268)
- Attack Paths: Added tenant and provider related labels to the nodes so they can be easily filtered on custom queries [(#10308)](https://github.com/prowler-cloud/prowler/pull/10308)
- Attack Paths: Reduce sync and findings memory usage with smaller batches, cursor iteration, and sequential sessions [(#10359)](https://github.com/prowler-cloud/prowler/pull/10359)
### 🐞 Fixed

414
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -24,7 +24,7 @@ dependencies = [
"drf-spectacular-jsonapi==0.5.1",
"gunicorn==23.0.0",
"lxml==5.3.2",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.21",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (>=1.0.1,<2.0.0)",
"sentry-sdk[django] (>=2.20.0,<3.0.0)",

View File

@@ -3,12 +3,14 @@ from typing import Callable
from uuid import UUID
from config.env import env
from tasks.jobs.attack_paths import aws
# Batch size for Neo4j operations
# Batch size for Neo4j write operations (resource labeling, cleanup)
BATCH_SIZE = env.int("ATTACK_PATHS_BATCH_SIZE", 1000)
# Batch size for Postgres findings fetch (keyset pagination page size)
FINDINGS_BATCH_SIZE = env.int("ATTACK_PATHS_FINDINGS_BATCH_SIZE", 500)
# Batch size for temp-to-tenant graph sync (nodes and relationships per cursor page)
SYNC_BATCH_SIZE = env.int("ATTACK_PATHS_SYNC_BATCH_SIZE", 250)
# Neo4j internal labels (Prowler-specific, not provider-specific)
# - `Internet`: Singleton node representing external internet access for exposed-resource queries

View File

@@ -9,22 +9,15 @@ This module handles:
"""
from collections import defaultdict
from dataclasses import asdict, dataclass, fields
from typing import Any, Generator
from uuid import UUID
import neo4j
from cartography.config import Config as CartographyConfig
from celery.utils.log import get_task_logger
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Finding as FindingModel
from api.models import Provider, ResourceFindingMapping
from prowler.config import config as ProwlerConfig
from tasks.jobs.attack_paths.config import (
BATCH_SIZE,
FINDINGS_BATCH_SIZE,
get_node_uid_field,
get_provider_resource_label,
get_root_node_label,
@@ -37,75 +30,54 @@ from tasks.jobs.attack_paths.queries import (
render_cypher_template,
)
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Finding as FindingModel
from api.models import Provider, ResourceFindingMapping
from prowler.config import config as ProwlerConfig
logger = get_task_logger(__name__)
# Type Definitions
# -----------------
# Maps dataclass field names to Django ORM query field names
_DB_FIELD_MAP: dict[str, str] = {
"check_title": "check_metadata__checktitle",
}
# Django ORM field names for `.values()` queries
# Most map 1:1 to Neo4j property names, exceptions are remapped in `_to_neo4j_dict`
_DB_QUERY_FIELDS = [
"id",
"uid",
"inserted_at",
"updated_at",
"first_seen_at",
"scan_id",
"delta",
"status",
"status_extended",
"severity",
"check_id",
"check_metadata__checktitle",
"muted",
"muted_reason",
]
@dataclass(slots=True)
class Finding:
"""
Finding data for Neo4j ingestion.
Can be created from a Django .values() query result using from_db_record().
"""
id: str
uid: str
inserted_at: str
updated_at: str
first_seen_at: str
scan_id: str
delta: str
status: str
status_extended: str
severity: str
check_id: str
check_title: str
muted: bool
muted_reason: str | None
resource_uid: str | None = None
@classmethod
def get_db_query_fields(cls) -> tuple[str, ...]:
"""Get field names for Django .values() query."""
return tuple(
_DB_FIELD_MAP.get(f.name, f.name)
for f in fields(cls)
if f.name != "resource_uid"
)
@classmethod
def from_db_record(cls, record: dict[str, Any], resource_uid: str) -> "Finding":
"""Create a Finding from a Django .values() query result."""
return cls(
id=str(record["id"]),
uid=record["uid"],
inserted_at=record["inserted_at"],
updated_at=record["updated_at"],
first_seen_at=record["first_seen_at"],
scan_id=str(record["scan_id"]),
delta=record["delta"],
status=record["status"],
status_extended=record["status_extended"],
severity=record["severity"],
check_id=str(record["check_id"]),
check_title=record["check_metadata__checktitle"],
muted=record["muted"],
muted_reason=record["muted_reason"],
resource_uid=resource_uid,
)
def to_dict(self) -> dict[str, Any]:
"""Convert to dict for Neo4j ingestion."""
return asdict(self)
def _to_neo4j_dict(record: dict[str, Any], resource_uid: str) -> dict[str, Any]:
"""Transform a Django `.values()` record into a `dict` ready for Neo4j ingestion."""
return {
"id": str(record["id"]),
"uid": record["uid"],
"inserted_at": record["inserted_at"],
"updated_at": record["updated_at"],
"first_seen_at": record["first_seen_at"],
"scan_id": str(record["scan_id"]),
"delta": record["delta"],
"status": record["status"],
"status_extended": record["status_extended"],
"severity": record["severity"],
"check_id": str(record["check_id"]),
"check_title": record["check_metadata__checktitle"],
"muted": record["muted"],
"muted_reason": record["muted_reason"],
"resource_uid": resource_uid,
}
# Public API
@@ -180,7 +152,7 @@ def add_resource_label(
def load_findings(
neo4j_session: neo4j.Session,
findings_batches: Generator[list[Finding], None, None],
findings_batches: Generator[list[dict[str, Any]], None, None],
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
@@ -209,7 +181,7 @@ def load_findings(
batch_size = len(batch)
total_records += batch_size
parameters["findings_data"] = [f.to_dict() for f in batch]
parameters["findings_data"] = batch
logger.info(f"Loading findings batch {batch_num} ({batch_size} records)")
neo4j_session.run(query, parameters)
@@ -247,16 +219,17 @@ def cleanup_findings(
def stream_findings_with_resources(
prowler_api_provider: Provider,
scan_id: str,
) -> Generator[list[Finding], None, None]:
) -> Generator[list[dict[str, Any]], None, None]:
"""
Stream findings with their associated resources in batches.
Uses keyset pagination for efficient traversal of large datasets.
Memory efficient: yields one batch at a time, never holds all findings in memory.
Memory efficient: yields one batch at a time as dicts ready for Neo4j ingestion,
never holds all findings in memory.
"""
logger.info(
f"Starting findings stream for scan {scan_id} "
f"(tenant {prowler_api_provider.tenant_id}) with batch size {BATCH_SIZE}"
f"(tenant {prowler_api_provider.tenant_id}) with batch size {FINDINGS_BATCH_SIZE}"
)
tenant_id = prowler_api_provider.tenant_id
@@ -305,15 +278,14 @@ def _fetch_findings_batch(
Uses read replica and RLS-scoped transaction.
"""
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
# Use all_objects to avoid the ActiveProviderManager's implicit JOIN
# through Scan -> Provider (to check is_deleted=False).
# The provider is already validated as active in this context.
# Use `all_objects` to get `Findings` even on soft-deleted `Providers`
# But even the provider is already validated as active in this context
qs = FindingModel.all_objects.filter(scan_id=scan_id).order_by("id")
if after_id is not None:
qs = qs.filter(id__gt=after_id)
return list(qs.values(*Finding.get_db_query_fields())[:BATCH_SIZE])
return list(qs.values(*_DB_QUERY_FIELDS)[:FINDINGS_BATCH_SIZE])
# Batch Enrichment
@@ -323,7 +295,7 @@ def _fetch_findings_batch(
def _enrich_batch_with_resources(
findings_batch: list[dict[str, Any]],
tenant_id: str,
) -> list[Finding]:
) -> list[dict[str, Any]]:
"""
Enrich findings with their resource UIDs.
@@ -334,7 +306,7 @@ def _enrich_batch_with_resources(
resource_map = _build_finding_resource_map(finding_ids, tenant_id)
return [
Finding.from_db_record(finding, resource_uid)
_to_neo4j_dict(finding, resource_uid)
for finding in findings_batch
for resource_uid in resource_map.get(finding["id"], [])
]

View File

@@ -8,13 +8,14 @@ to the tenant database, adding provider isolation labels and properties.
from collections import defaultdict
from typing import Any
import neo4j
from celery.utils.log import get_task_logger
from api.attack_paths import database as graph_database
from tasks.jobs.attack_paths.config import (
BATCH_SIZE,
PROVIDER_ISOLATION_PROPERTIES,
PROVIDER_RESOURCE_LABEL,
SYNC_BATCH_SIZE,
get_provider_label,
get_tenant_label,
)
@@ -82,40 +83,32 @@ def sync_nodes(
Adds `_ProviderResource` label and `_provider_id` property to all nodes.
Also adds dynamic `_Tenant_{id}` and `_Provider_{id}` isolation labels.
Source and target sessions are opened sequentially per batch to avoid
holding two Bolt connections simultaneously for the entire sync duration.
"""
last_id = -1
total_synced = 0
with (
graph_database.get_session(source_database) as source_session,
graph_database.get_session(target_database) as target_session,
):
while True:
rows = list(
source_session.run(
NODE_FETCH_QUERY,
{"last_id": last_id, "batch_size": BATCH_SIZE},
)
while True:
grouped: dict[tuple[str, ...], list[dict[str, Any]]] = defaultdict(list)
batch_count = 0
with graph_database.get_session(source_database) as source_session:
result = source_session.run(
NODE_FETCH_QUERY,
{"last_id": last_id, "batch_size": SYNC_BATCH_SIZE},
)
for record in result:
batch_count += 1
last_id = record["internal_id"]
key, value = _node_to_sync_dict(record, provider_id)
grouped[key].append(value)
if not rows:
break
last_id = rows[-1]["internal_id"]
grouped: dict[tuple[str, ...], list[dict[str, Any]]] = defaultdict(list)
for row in rows:
labels = tuple(sorted(set(row["labels"] or [])))
props = dict(row["props"] or {})
_strip_internal_properties(props)
provider_element_id = f"{provider_id}:{row['element_id']}"
grouped[labels].append(
{
"provider_element_id": provider_element_id,
"props": props,
}
)
if batch_count == 0:
break
with graph_database.get_session(target_database) as target_session:
for labels, batch in grouped.items():
label_set = set(labels)
label_set.add(PROVIDER_RESOURCE_LABEL)
@@ -134,10 +127,10 @@ def sync_nodes(
},
)
total_synced += len(rows)
logger.info(
f"Synced {total_synced} nodes from {source_database} to {target_database}"
)
total_synced += batch_count
logger.info(
f"Synced {total_synced} nodes from {source_database} to {target_database}"
)
return total_synced
@@ -151,41 +144,32 @@ def sync_relationships(
Sync relationships from source to target database.
Adds `_provider_id` property to all relationships.
Source and target sessions are opened sequentially per batch to avoid
holding two Bolt connections simultaneously for the entire sync duration.
"""
last_id = -1
total_synced = 0
with (
graph_database.get_session(source_database) as source_session,
graph_database.get_session(target_database) as target_session,
):
while True:
rows = list(
source_session.run(
RELATIONSHIPS_FETCH_QUERY,
{"last_id": last_id, "batch_size": BATCH_SIZE},
)
while True:
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
batch_count = 0
with graph_database.get_session(source_database) as source_session:
result = source_session.run(
RELATIONSHIPS_FETCH_QUERY,
{"last_id": last_id, "batch_size": SYNC_BATCH_SIZE},
)
for record in result:
batch_count += 1
last_id = record["internal_id"]
key, value = _rel_to_sync_dict(record, provider_id)
grouped[key].append(value)
if not rows:
break
last_id = rows[-1]["internal_id"]
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
for row in rows:
props = dict(row["props"] or {})
_strip_internal_properties(props)
rel_type = row["rel_type"]
grouped[rel_type].append(
{
"start_element_id": f"{provider_id}:{row['start_element_id']}",
"end_element_id": f"{provider_id}:{row['end_element_id']}",
"provider_element_id": f"{provider_id}:{rel_type}:{row['internal_id']}",
"props": props,
}
)
if batch_count == 0:
break
with graph_database.get_session(target_database) as target_session:
for rel_type, batch in grouped.items():
query = render_cypher_template(
RELATIONSHIP_SYNC_TEMPLATE, {"__REL_TYPE__": rel_type}
@@ -198,14 +182,42 @@ def sync_relationships(
},
)
total_synced += len(rows)
logger.info(
f"Synced {total_synced} relationships from {source_database} to {target_database}"
)
total_synced += batch_count
logger.info(
f"Synced {total_synced} relationships from {source_database} to {target_database}"
)
return total_synced
def _node_to_sync_dict(
record: neo4j.Record, provider_id: str
) -> tuple[tuple[str, ...], dict[str, Any]]:
"""Transform a source node record into a (grouping_key, sync_dict) pair."""
props = dict(record["props"] or {})
_strip_internal_properties(props)
labels = tuple(sorted(set(record["labels"] or [])))
return labels, {
"provider_element_id": f"{provider_id}:{record['element_id']}",
"props": props,
}
def _rel_to_sync_dict(
record: neo4j.Record, provider_id: str
) -> tuple[str, dict[str, Any]]:
"""Transform a source relationship record into a (grouping_key, sync_dict) pair."""
props = dict(record["props"] or {})
_strip_internal_properties(props)
rel_type = record["rel_type"]
return rel_type, {
"start_element_id": f"{provider_id}:{record['start_element_id']}",
"end_element_id": f"{provider_id}:{record['end_element_id']}",
"provider_element_id": f"{provider_id}:{rel_type}:{record['internal_id']}",
"props": props,
}
def _strip_internal_properties(props: dict[str, Any]) -> None:
"""Remove provider isolation properties before the += spread in sync templates."""
for key in PROVIDER_ISOLATION_PROPERTIES:

View File

@@ -1279,16 +1279,10 @@ class TestAttackPathsFindingsHelpers:
provider.provider = Provider.ProviderChoices.AWS
provider.save()
# Create mock Finding objects with to_dict() method
mock_finding_1 = MagicMock()
mock_finding_1.to_dict.return_value = {"id": "1", "resource_uid": "r-1"}
mock_finding_2 = MagicMock()
mock_finding_2.to_dict.return_value = {"id": "2", "resource_uid": "r-2"}
# Create a generator that yields two batches of Finding instances
# Create a generator that yields two batches of dicts (pre-converted)
def findings_generator():
yield [mock_finding_1]
yield [mock_finding_2]
yield [{"id": "1", "resource_uid": "r-1"}]
yield [{"id": "2", "resource_uid": "r-2"}]
config = SimpleNamespace(update_tag=12345)
mock_session = MagicMock()
@@ -1435,17 +1429,17 @@ class TestAttackPathsFindingsHelpers:
assert len(findings_data) == 1
finding_result = findings_data[0]
assert finding_result.id == str(finding.id)
assert finding_result.resource_uid == resource.uid
assert finding_result.check_title == "Check title"
assert finding_result.scan_id == str(latest_scan.id)
assert finding_result["id"] == str(finding.id)
assert finding_result["resource_uid"] == resource.uid
assert finding_result["check_title"] == "Check title"
assert finding_result["scan_id"] == str(latest_scan.id)
def test_enrich_batch_with_resources_single_resource(
self,
tenants_fixture,
providers_fixture,
):
"""One finding + one resource = one output Finding instance"""
"""One finding + one resource = one output dict"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
@@ -1519,16 +1513,16 @@ class TestAttackPathsFindingsHelpers:
)
assert len(result) == 1
assert result[0].resource_uid == resource.uid
assert result[0].id == str(finding.id)
assert result[0].status == "FAIL"
assert result[0]["resource_uid"] == resource.uid
assert result[0]["id"] == str(finding.id)
assert result[0]["status"] == "FAIL"
def test_enrich_batch_with_resources_multiple_resources(
self,
tenants_fixture,
providers_fixture,
):
"""One finding + three resources = three output Finding instances"""
"""One finding + three resources = three output dicts"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
@@ -1607,13 +1601,13 @@ class TestAttackPathsFindingsHelpers:
)
assert len(result) == 3
result_resource_uids = {r.resource_uid for r in result}
result_resource_uids = {r["resource_uid"] for r in result}
assert result_resource_uids == {r.uid for r in resources}
# All should have same finding data
for r in result:
assert r.id == str(finding.id)
assert r.status == "FAIL"
assert r["id"] == str(finding.id)
assert r["status"] == "FAIL"
def test_enrich_batch_with_resources_no_resources_skips(
self,
@@ -1690,16 +1684,12 @@ class TestAttackPathsFindingsHelpers:
provider.save()
scan_id = "some-scan-id"
with (
patch("tasks.jobs.attack_paths.findings.rls_transaction") as mock_rls,
patch("tasks.jobs.attack_paths.findings.Finding") as mock_finding,
):
with patch("tasks.jobs.attack_paths.findings.rls_transaction") as mock_rls:
# Create generator but don't iterate
findings_module.stream_findings_with_resources(provider, scan_id)
# Nothing should be called yet
mock_rls.assert_not_called()
mock_finding.objects.filter.assert_not_called()
def test_load_findings_empty_generator(self, providers_fixture):
"""Empty generator should not call neo4j"""
@@ -1752,41 +1742,226 @@ class TestAddResourceLabel:
assert "AWSResource" not in query.replace("_AWSResource", "")
def _make_session_ctx(session, call_order=None, name=None):
"""Create a mock context manager wrapping a mock session."""
ctx = MagicMock()
if call_order is not None and name is not None:
ctx.__enter__ = MagicMock(
side_effect=lambda: (call_order.append(f"{name}:enter"), session)[1]
)
ctx.__exit__ = MagicMock(
side_effect=lambda *a: (call_order.append(f"{name}:exit"), False)[1]
)
else:
ctx.__enter__ = MagicMock(return_value=session)
ctx.__exit__ = MagicMock(return_value=False)
return ctx
class TestSyncNodes:
def test_sync_nodes_adds_private_label(self):
mock_source_session = MagicMock()
mock_target_session = MagicMock()
row = {
"internal_id": 1,
"element_id": "elem-1",
"labels": ["SomeLabel"],
"props": {"key": "value"},
}
mock_source_session.run.side_effect = [[row], []]
source_ctx = MagicMock()
source_ctx.__enter__ = MagicMock(return_value=mock_source_session)
source_ctx.__exit__ = MagicMock(return_value=False)
target_ctx = MagicMock()
target_ctx.__enter__ = MagicMock(return_value=mock_target_session)
target_ctx.__exit__ = MagicMock(return_value=False)
mock_source_1 = MagicMock()
mock_source_1.run.return_value = [row]
mock_target = MagicMock()
mock_source_2 = MagicMock()
mock_source_2.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[source_ctx, target_ctx],
side_effect=[
_make_session_ctx(mock_source_1),
_make_session_ctx(mock_target),
_make_session_ctx(mock_source_2),
],
):
total = sync_module.sync_nodes(
"source-db", "target-db", "tenant-1", "prov-1"
)
assert total == 1
query = mock_target_session.run.call_args.args[0]
query = mock_target.run.call_args.args[0]
assert "_ProviderResource" in query
assert "_Tenant_tenant1" in query
assert "_Provider_prov1" in query
def test_sync_nodes_source_closes_before_target_opens(self):
row = {
"internal_id": 1,
"element_id": "elem-1",
"labels": ["SomeLabel"],
"props": {"key": "value"},
}
call_order = []
src_1 = MagicMock()
src_1.run.return_value = [row]
tgt = MagicMock()
src_2 = MagicMock()
src_2.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(src_1, call_order, "source1"),
_make_session_ctx(tgt, call_order, "target"),
_make_session_ctx(src_2, call_order, "source2"),
],
):
sync_module.sync_nodes("src-db", "tgt-db", "t-1", "p-1")
assert call_order.index("source1:exit") < call_order.index("target:enter")
def test_sync_nodes_pagination_with_batch_size_1(self):
row_a = {
"internal_id": 1,
"element_id": "elem-1",
"labels": ["LabelA"],
"props": {"a": 1},
}
row_b = {
"internal_id": 2,
"element_id": "elem-2",
"labels": ["LabelB"],
"props": {"b": 2},
}
src_1 = MagicMock()
src_1.run.return_value = [row_a]
src_2 = MagicMock()
src_2.run.return_value = [row_b]
src_3 = MagicMock()
src_3.run.return_value = []
tgt_1 = MagicMock()
tgt_2 = MagicMock()
with (
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(src_1),
_make_session_ctx(tgt_1),
_make_session_ctx(src_2),
_make_session_ctx(tgt_2),
_make_session_ctx(src_3),
],
),
patch("tasks.jobs.attack_paths.sync.SYNC_BATCH_SIZE", 1),
):
total = sync_module.sync_nodes("src", "tgt", "t-1", "p-1")
assert total == 2
assert src_1.run.call_args.args[1]["last_id"] == -1
assert src_2.run.call_args.args[1]["last_id"] == 1
def test_sync_nodes_empty_source_returns_zero(self):
src = MagicMock()
src.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[_make_session_ctx(src)],
) as mock_get_session:
total = sync_module.sync_nodes("src", "tgt", "t-1", "p-1")
assert total == 0
assert mock_get_session.call_count == 1
class TestSyncRelationships:
def test_sync_relationships_source_closes_before_target_opens(self):
row = {
"internal_id": 1,
"rel_type": "HAS",
"start_element_id": "s-1",
"end_element_id": "e-1",
"props": {},
}
call_order = []
src_1 = MagicMock()
src_1.run.return_value = [row]
tgt = MagicMock()
src_2 = MagicMock()
src_2.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(src_1, call_order, "source1"),
_make_session_ctx(tgt, call_order, "target"),
_make_session_ctx(src_2, call_order, "source2"),
],
):
sync_module.sync_relationships("src", "tgt", "p-1")
assert call_order.index("source1:exit") < call_order.index("target:enter")
def test_sync_relationships_pagination_with_batch_size_1(self):
row_a = {
"internal_id": 1,
"rel_type": "HAS",
"start_element_id": "s-1",
"end_element_id": "e-1",
"props": {"a": 1},
}
row_b = {
"internal_id": 2,
"rel_type": "CONNECTS",
"start_element_id": "s-2",
"end_element_id": "e-2",
"props": {"b": 2},
}
src_1 = MagicMock()
src_1.run.return_value = [row_a]
src_2 = MagicMock()
src_2.run.return_value = [row_b]
src_3 = MagicMock()
src_3.run.return_value = []
tgt_1 = MagicMock()
tgt_2 = MagicMock()
with (
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(src_1),
_make_session_ctx(tgt_1),
_make_session_ctx(src_2),
_make_session_ctx(tgt_2),
_make_session_ctx(src_3),
],
),
patch("tasks.jobs.attack_paths.sync.SYNC_BATCH_SIZE", 1),
):
total = sync_module.sync_relationships("src", "tgt", "p-1")
assert total == 2
assert src_1.run.call_args.args[1]["last_id"] == -1
assert src_2.run.call_args.args[1]["last_id"] == 1
def test_sync_relationships_empty_source_returns_zero(self):
src = MagicMock()
src.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[_make_session_ctx(src)],
) as mock_get_session:
total = sync_module.sync_relationships("src", "tgt", "p-1")
assert total == 0
assert mock_get_session.call_count == 1
class TestInternetAnalysis:
def _make_provider_and_config(self):

View File

@@ -0,0 +1,24 @@
import warnings
from dashboard.common_methods import get_section_containers_format3
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_DESCRIPTION",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
].copy()
return get_section_containers_format3(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)

View File

@@ -0,0 +1,24 @@
import warnings
from dashboard.common_methods import get_section_containers_format3
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_DESCRIPTION",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
].copy()
return get_section_containers_format3(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)

View File

@@ -80,6 +80,8 @@ def load_csv_files(csv_files):
result = result.replace("_M65", " - M65")
if "ALIBABACLOUD" in result:
result = result.replace("_ALIBABACLOUD", " - ALIBABACLOUD")
if "ORACLECLOUD" in result:
result = result.replace("_ORACLECLOUD", " - ORACLECLOUD")
results.append(result)
unique_results = set(results)

View File

@@ -2,6 +2,12 @@
All notable changes to the **Prowler MCP Server** are documented in this file.
## [0.5.0] (Prowler v5.21.0)
### 🚀 Added
- Attack Path tool to get Neo4j DB schema [(#10321)](https://github.com/prowler-cloud/prowler/pull/10321)
## [0.4.0] (Prowler v5.19.0)
### 🚀 Added

View File

@@ -5,7 +5,7 @@ This package provides MCP tools for accessing:
- Prowler Hub: All security artifacts (detections, remediations and frameworks) supported by Prowler
"""
__version__ = "0.4.0"
__version__ = "0.5.0"
__author__ = "Prowler Team"
__email__ = "engineering@prowler.com"

View File

@@ -118,6 +118,51 @@ class AttackPathScansListResponse(BaseModel):
)
class AttackPathCartographySchema(MinimalSerializerMixin, BaseModel):
"""Cartography graph schema metadata for a completed attack paths scan.
Contains the schema URL and provider info needed to fetch the full
Cartography schema markdown for openCypher query generation.
"""
model_config = ConfigDict(frozen=True)
id: str = Field(description="Unique identifier for the schema resource")
provider: str = Field(description="Cloud provider type (aws, azure, gcp, etc.)")
cartography_version: str = Field(description="Version of the Cartography schema")
schema_url: str = Field(description="URL to the Cartography schema page on GitHub")
raw_schema_url: str = Field(
description="Raw URL to fetch the Cartography schema markdown content"
)
schema_content: str | None = Field(
default=None,
description="Full Cartography schema markdown content (populated after fetch)",
)
@classmethod
def from_api_response(
cls, response: dict[str, Any]
) -> "AttackPathCartographySchema":
"""Transform JSON:API schema response to model.
Args:
response: Full API response with data and attributes
Returns:
AttackPathCartographySchema instance
"""
data = response.get("data", {})
attributes = data.get("attributes", {})
return cls(
id=data["id"],
provider=attributes["provider"],
cartography_version=attributes["cartography_version"],
schema_url=attributes["schema_url"],
raw_schema_url=attributes["raw_schema_url"],
)
class AttackPathQueryParameter(MinimalSerializerMixin, BaseModel):
"""Parameter definition for an attack paths query.

View File

@@ -8,6 +8,7 @@ through cloud infrastructure relationships.
from typing import Any, Literal
from prowler_mcp_server.prowler_app.models.attack_paths import (
AttackPathCartographySchema,
AttackPathQuery,
AttackPathQueryResult,
AttackPathScansListResponse,
@@ -225,3 +226,53 @@ class AttackPathsTools(BaseTool):
f"Failed to run attack paths query '{query_id}' on scan {scan_id}: {e}"
)
return {"error": f"Failed to run attack paths query '{query_id}': {str(e)}"}
async def get_attack_paths_cartography_schema(
self,
scan_id: str = Field(
description="UUID of a COMPLETED attack paths scan. Use `prowler_app_list_attack_paths_scans` with state=['completed'] to find scan IDs"
),
) -> dict[str, Any]:
"""Retrieve the Cartography graph schema for a completed attack paths scan.
This tool fetches the full Cartography schema (node labels, relationships,
and properties) so the LLM can write accurate custom openCypher queries
for attack paths analysis.
Two-step flow:
1. Calls the Prowler API to get schema metadata (provider, version, URLs)
2. Fetches the raw Cartography schema markdown from GitHub
Returns:
- id: Schema resource identifier
- provider: Cloud provider type
- cartography_version: Schema version
- schema_url: GitHub page URL for reference
- raw_schema_url: Raw markdown URL
- schema_content: Full Cartography schema markdown with node/relationship definitions
Workflow:
1. Use prowler_app_list_attack_paths_scans to find a completed scan
2. Use this tool to get the schema for the scan's provider
3. Use the schema to craft custom openCypher queries
4. Execute queries with prowler_app_run_attack_paths_query
"""
try:
api_response = await self.api_client.get(
f"/attack-paths-scans/{scan_id}/schema"
)
schema = AttackPathCartographySchema.from_api_response(api_response)
schema_content = await self.api_client.fetch_external_url(
schema.raw_schema_url
)
return schema.model_copy(
update={"schema_content": schema_content}
).model_dump()
except Exception as e:
self.logger.error(
f"Failed to get cartography schema for scan {scan_id}: {e}"
)
return {"error": f"Failed to get cartography schema: {str(e)}"}

View File

@@ -4,11 +4,15 @@ import asyncio
from datetime import datetime, timedelta
from enum import Enum
from typing import Any, Dict
from urllib.parse import urlparse
import httpx
from prowler_mcp_server import __version__
from prowler_mcp_server.lib.logger import logger
from prowler_mcp_server.prowler_app.utils.auth import ProwlerAppAuth
ALLOWED_EXTERNAL_DOMAINS: frozenset[str] = frozenset({"raw.githubusercontent.com"})
class HTTPMethod(str, Enum):
"""HTTP methods enum."""
@@ -187,6 +191,47 @@ class ProwlerAPIClient(metaclass=SingletonMeta):
"""
return await self._make_request(HTTPMethod.DELETE, path, params=params)
async def fetch_external_url(self, url: str) -> str:
"""Fetch content from an allowed external URL (unauthenticated).
Uses the existing singleton httpx client with a domain allowlist
to prevent SSRF attacks.
Args:
url: The external URL to fetch content from
Returns:
Raw text content from the URL
Raises:
ValueError: If the URL domain is not in the allowlist
Exception: If the HTTP request fails
"""
parsed = urlparse(url)
if parsed.scheme != "https":
raise ValueError(f"Only HTTPS URLs are allowed, got '{parsed.scheme}'")
if parsed.hostname not in ALLOWED_EXTERNAL_DOMAINS:
raise ValueError(
f"Domain '{parsed.hostname}' is not allowed. "
f"Allowed domains: {', '.join(sorted(ALLOWED_EXTERNAL_DOMAINS))}"
)
try:
response = await self.client.get(
url,
headers={"User-Agent": f"prowler-mcp-server/{__version__}"},
)
response.raise_for_status()
return response.text
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error fetching external URL {url}: {e}")
raise Exception(
f"Failed to fetch external URL: {e.response.status_code}"
) from e
except Exception as e:
logger.error(f"Error fetching external URL {url}: {e}")
raise
async def poll_task_until_complete(
self,
task_id: str,

View File

@@ -11,7 +11,7 @@ description = "MCP server for Prowler ecosystem"
name = "prowler-mcp"
readme = "README.md"
requires-python = ">=3.12"
version = "0.4.0"
version = "0.5.0"
[project.scripts]
prowler-mcp = "prowler_mcp_server.main:main"

2
mcp_server/uv.lock generated
View File

@@ -717,7 +717,7 @@ wheels = [
[[package]]
name = "prowler-mcp"
version = "0.3.0"
version = "0.5.0"
source = { editable = "." }
dependencies = [
{ name = "fastmcp" },

39
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
[[package]]
name = "about-time"
@@ -2151,24 +2151,6 @@ files = [
{file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
]
[[package]]
name = "deprecated"
version = "1.2.18"
description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
groups = ["main"]
files = [
{file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"},
{file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"},
]
[package.dependencies]
wrapt = ">=1.10,<2"
[package.extras]
dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools ; python_version >= \"3.12\"", "tox"]
[[package]]
name = "detect-secrets"
version = "1.5.0"
@@ -3972,14 +3954,14 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
[[package]]
name = "multipart"
version = "1.3.1"
version = "1.3.0"
description = "Parser for multipart/form-data"
optional = false
python-versions = ">=3.8"
groups = ["dev"]
files = [
{file = "multipart-1.3.1-py3-none-any.whl", hash = "sha256:a82b59e1befe74d3d30b3d3f70efd5a2eba4d938f845dcff9faace968888ff29"},
{file = "multipart-1.3.1.tar.gz", hash = "sha256:211d7cfc1a7a43e75c4d24ee0e8e0f4f61d522f1a21575303ae85333dea687bf"},
{file = "multipart-1.3.0-py3-none-any.whl", hash = "sha256:439bf4b00fd7cb2dbff08ae13f49f4f49798931ecd8d496372c63537fa19f304"},
{file = "multipart-1.3.0.tar.gz", hash = "sha256:a46bd6b0eb4c1ba865beb88ddd886012a3da709b6e7b86084fc37e99087e5cf1"},
]
[package.extras]
@@ -5050,22 +5032,21 @@ files = [
[[package]]
name = "pygithub"
version = "2.5.0"
version = "2.8.0"
description = "Use the full Github API v3"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "PyGithub-2.5.0-py3-none-any.whl", hash = "sha256:b0b635999a658ab8e08720bdd3318893ff20e2275f6446fcf35bf3f44f2c0fd2"},
{file = "pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf"},
{file = "pygithub-2.8.0-py3-none-any.whl", hash = "sha256:11a3473c1c2f1c39c525d0ee8c559f369c6d46c272cb7321c9b0cabc7aa1ce7d"},
{file = "pygithub-2.8.0.tar.gz", hash = "sha256:72f5f2677d86bc3a8843aa720c6ce4c1c42fb7500243b136e3d5e14ddb5c3386"},
]
[package.dependencies]
Deprecated = "*"
pyjwt = {version = ">=2.4.0", extras = ["crypto"]}
pynacl = ">=1.4.0"
requests = ">=2.14.0"
typing-extensions = ">=4.0.0"
typing-extensions = ">=4.5.0"
urllib3 = ">=1.26.0"
[[package]]
@@ -6519,7 +6500,7 @@ version = "1.17.2"
description = "Module for decorators, wrappers and monkey patching."
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
groups = ["dev"]
files = [
{file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"},
{file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"},
@@ -6905,4 +6886,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">3.9.1,<3.13"
content-hash = "386f6cf2bed49290cc4661aa2093ceb018aa6cdaf6864bdfab36f6c2c50e241e"
content-hash = "fa67f98ae1b75ec5a54d1d6a1c33c5412d888ec60cf35fc407606dc48329c0bf"

View File

@@ -2,7 +2,7 @@
All notable changes to the **Prowler SDK** are documented in this file.
## [5.21.0] (Prowler UNRELEASED)
## [5.21.0] (Prowler v5.21.0)
### 🚀 Added
@@ -10,22 +10,22 @@ All notable changes to the **Prowler SDK** are documented in this file.
- `entra_conditional_access_policy_device_code_flow_blocked` check for M365 provider [(#10218)](https://github.com/prowler-cloud/prowler/pull/10218)
- RBI compliance for the Azure provider [(#10339)](https://github.com/prowler-cloud/prowler/pull/10339)
-`entra_conditional_access_policy_require_mfa_for_admin_portals` check for Azure provider and update CIS compliance [(#10330)](https://github.com/prowler-cloud/prowler/pull/10330)
- CheckMetadata Pydantic validators [(#8584)](https://github.com/prowler-cloud/prowler/pull/8583)
- CheckMetadata Pydantic validators [(#8583)](https://github.com/prowler-cloud/prowler/pull/8583)
- `organization_repository_deletion_limited` check for GitHub provider [(#10185)](https://github.com/prowler-cloud/prowler/pull/10185)
- SecNumCloud 3.2 compliance framework for the GCP provider [(#10364)](https://github.com/prowler-cloud/prowler/pull/10364)
- SecNumCloud 3.2 for the GCP provider [(#10364)](https://github.com/prowler-cloud/prowler/pull/10364)
- SecNumCloud 3.2 for the Azure provider [(#10358)](https://github.com/prowler-cloud/prowler/pull/10358)
- SecNumCloud 3.2 for the Alibaba Cloud provider [(#10370)](https://github.com/prowler-cloud/prowler/pull/10370)
- SecNumCloud 3.2 for the Oracle Cloud provider [(#10371)](https://github.com/prowler-cloud/prowler/pull/10371)
### 🔄 Changed
- Bump `pygithub` from 2.5.0 to 2.8.0 to use native Organization properties
- Update M365 SharePoint service metadata to new format [(#9684)](https://github.com/prowler-cloud/prowler/pull/9684)
- Update M365 Exchange service metadata to new format [(#9683)](https://github.com/prowler-cloud/prowler/pull/9683)
- Update M365 Teams service metadata to new format [(#9685)](https://github.com/prowler-cloud/prowler/pull/9685)
- Update M365 Entra ID service metadata to new format [(#9682)](https://github.com/prowler-cloud/prowler/pull/9682)
- Update ResourceType and Categories for Azure Entra ID service metadata [(#10334)](https://github.com/prowler-cloud/prowler/pull/10334)
### 🔐 Security
- Bump `multipart` to 1.3.1 to fix [GHSA-p2m9-wcp5-6qw3](https://github.com/defnull/multipart/security/advisories/GHSA-p2m9-wcp5-6qw3) [(#10331)](https://github.com/prowler-cloud/prowler/pull/10331)
- Update OCI Regions to include US DoD regions [(#10375)](https://github.com/prowler-cloud/prowler/pull/10376)
### 🐞 Fixed
@@ -33,6 +33,10 @@ All notable changes to the **Prowler SDK** are documented in this file.
- RBI compliance framework support on Prowler Dashboard for the Azure provider [(#10360)](https://github.com/prowler-cloud/prowler/pull/10360)
- CheckMetadata strict validators rejecting valid external tool provider data (image, iac, llm) [(#10363)](https://github.com/prowler-cloud/prowler/pull/10363)
### 🔐 Security
- Bump `multipart` to 1.3.1 to fix [GHSA-p2m9-wcp5-6qw3](https://github.com/defnull/multipart/security/advisories/GHSA-p2m9-wcp5-6qw3) [(#10331)](https://github.com/prowler-cloud/prowler/pull/10331)
---
## [5.20.0] (Prowler v5.20.0)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -224,6 +224,7 @@ class IacProvider(Provider):
},
},
"Categories": [],
"AdditionalURLs": [],
"DependsOn": [],
"RelatedTo": [],
"Notes": "",

View File

@@ -65,5 +65,16 @@ OCI_GOVERNMENT_REGIONS = {
"us-luke-1": "US Gov East",
}
# OCI Defense Regions
OCI_US_DOD_REGIONS = {
"us-gov-ashburn-1": "US DoD East (Ashburn)",
"us-gov-chicago-1": "US DoD North (Chicago)",
"us-gov-phoenix-1": "US DoD West (Phoenix)",
}
# All OCI Regions
OCI_REGIONS = {**OCI_COMMERCIAL_REGIONS, **OCI_GOVERNMENT_REGIONS}
OCI_REGIONS = {
**OCI_COMMERCIAL_REGIONS,
**OCI_GOVERNMENT_REGIONS,
**OCI_US_DOD_REGIONS,
}

View File

@@ -62,7 +62,7 @@ dependencies = [
"pandas==2.2.3",
"py-ocsf-models==0.8.1",
"pydantic (>=2.0,<3.0)",
"pygithub==2.5.0",
"pygithub==2.8.0",
"python-dateutil (>=2.9.0.post0,<3.0.0)",
"pytz==2025.1",
"schema==0.7.5",

View File

@@ -61,7 +61,9 @@ class TestIacProvider:
assert report.check_metadata.CheckID == SAMPLE_FAILED_CHECK["ID"]
assert report.check_metadata.CheckTitle == SAMPLE_FAILED_CHECK["Title"]
assert report.check_metadata.Severity == "low"
assert report.check_metadata.RelatedUrl == SAMPLE_FAILED_CHECK["PrimaryURL"]
assert report.check_metadata.RelatedUrl == SAMPLE_FAILED_CHECK.get(
"PrimaryURL", ""
)
def test_iac_provider_process_finding_passed(self):
"""Test processing a passed finding"""

View File

@@ -2,14 +2,19 @@
All notable changes to the **Prowler UI** are documented in this file.
## [1.21.0] (Prowler v5.21.0 UNRELEASED)
## [1.21.0] (Prowler v5.21.0)
### 🚀 Added
- Skill system to Lighthouse AI [(#10322)](https://github.com/prowler-cloud/prowler/pull/10322)
- Skill for creating custom queries on Attack Paths [(#10323)](https://github.com/prowler-cloud/prowler/pull/10323)
### 🔄 Changed
- Google Workspace provider support [(#10333)](https://github.com/prowler-cloud/prowler/pull/10333)
- Image (Container Registry) provider support in UI: badge icon, credentials form, and provider-type filtering [(#10167)](https://github.com/prowler-cloud/prowler/pull/10167)
- Organization and organizational unit row actions (Edit Name, Update Credentials, Test Connections, Delete) in providers table dropdown [(#10317)](https://github.com/prowler-cloud/prowler/pull/10317)
- Events tab in Findings and Resource detail cards showing an AWS CloudTrail timeline with expandable event rows, actor info, request/response JSON payloads, and error details [(#10320)](https://github.com/prowler-cloud/prowler/pull/10320)
- AWS Organization and organizational unit row actions (Edit Name, Update Credentials, Test Connections, Delete) in providers table dropdown [(#10317)](https://github.com/prowler-cloud/prowler/pull/10317)
---

View File

@@ -9,6 +9,7 @@ import {
MESSAGE_ROLES,
MESSAGE_STATUS,
META_TOOLS,
SKILL_PREFIX,
} from "@/lib/lighthouse/constants";
import type { ChainOfThoughtData, Message } from "@/lib/lighthouse/types";
@@ -70,17 +71,28 @@ export function getChainOfThoughtStepLabel(
return `Executing ${tool}`;
}
if (metaTool === META_TOOLS.LOAD_SKILL && tool) {
const skillId = tool.startsWith(SKILL_PREFIX)
? tool.slice(SKILL_PREFIX.length)
: tool;
return `Loading ${skillId} skill`;
}
return tool || "Completed";
}
/**
* Determines if a meta-tool is a wrapper tool (describe_tool or execute_tool)
* Determines if a tool name is a meta-tool (describe_tool, execute_tool, or load_skill)
*
* @param metaTool - The meta-tool name to check
* @returns True if it's a meta-tool, false otherwise
*/
export function isMetaTool(metaTool: string): boolean {
return metaTool === META_TOOLS.DESCRIBE || metaTool === META_TOOLS.EXECUTE;
return (
metaTool === META_TOOLS.DESCRIBE ||
metaTool === META_TOOLS.EXECUTE ||
metaTool === META_TOOLS.LOAD_SKILL
);
}
/**

View File

@@ -9,6 +9,7 @@ import {
ERROR_PREFIX,
LIGHTHOUSE_AGENT_TAG,
META_TOOLS,
SKILL_PREFIX,
STREAM_MESSAGE_ID,
} from "@/lib/lighthouse/constants";
import type { ChainOfThoughtData, StreamEvent } from "@/lib/lighthouse/types";
@@ -16,10 +17,35 @@ import type { ChainOfThoughtData, StreamEvent } from "@/lib/lighthouse/types";
// Re-export for convenience
export { CHAIN_OF_THOUGHT_ACTIONS, ERROR_PREFIX, STREAM_MESSAGE_ID };
/**
* Safely parses the JSON string nested inside a meta-tool's input wrapper.
* In tool stream events, meta-tools receive their arguments as `{ input: "<JSON string>" }`.
* Note: In chat_model_end events, args are pre-parsed by LangChain (see handleChatModelEndEvent).
*
* @returns The parsed object, or null if parsing fails
*/
function parseMetaToolInput(
toolInput: unknown,
): Record<string, unknown> | null {
try {
if (
toolInput &&
typeof toolInput === "object" &&
"input" in toolInput &&
typeof toolInput.input === "string"
) {
return JSON.parse(toolInput.input) as Record<string, unknown>;
}
} catch {
// Failed to parse
}
return null;
}
/**
* Extracts the actual tool name from meta-tool input.
*
* Meta-tools (describe_tool, execute_tool) wrap actual tool calls.
* Meta-tools (describe_tool, execute_tool, load_skill) wrap actual tool calls.
* This function parses the input to extract the real tool name.
*
* @param metaToolName - The name of the meta-tool or actual tool
@@ -30,26 +56,19 @@ export function extractActualToolName(
metaToolName: string,
toolInput: unknown,
): string | null {
// Check if this is a meta-tool
if (
metaToolName === META_TOOLS.DESCRIBE ||
metaToolName === META_TOOLS.EXECUTE
) {
// Meta-tool: Parse the JSON string in input.input
try {
if (
toolInput &&
typeof toolInput === "object" &&
"input" in toolInput &&
typeof toolInput.input === "string"
) {
const parsedInput = JSON.parse(toolInput.input);
return parsedInput.toolName || null;
}
} catch {
// Failed to parse, return null
return null;
}
const parsed = parseMetaToolInput(toolInput);
return (parsed?.toolName as string) || null;
}
if (metaToolName === META_TOOLS.LOAD_SKILL) {
const parsed = parseMetaToolInput(toolInput);
return parsed?.skillId
? `${SKILL_PREFIX}${parsed.skillId as string}`
: null;
}
// Actual tool execution: use the name directly
@@ -172,11 +191,18 @@ export function handleChatModelEndEvent(
const metaToolName = toolCall.name;
const toolArgs = toolCall.args;
// Extract actual tool name from toolArgs.toolName (camelCase)
const actualToolName =
toolArgs && typeof toolArgs === "object" && "toolName" in toolArgs
? (toolArgs.toolName as string)
: null;
// Extract actual tool name from toolArgs
let actualToolName: string | null = null;
if (toolArgs && typeof toolArgs === "object") {
if ("toolName" in toolArgs) {
actualToolName = toolArgs.toolName as string;
} else if (
metaToolName === META_TOOLS.LOAD_SKILL &&
"skillId" in toolArgs
) {
actualToolName = `${SKILL_PREFIX}${toolArgs.skillId as string}`;
}
}
controller.enqueue(
createChainOfThoughtEvent({

View File

@@ -6,6 +6,7 @@
export const META_TOOLS = {
DESCRIBE: "describe_tool",
EXECUTE: "execute_tool",
LOAD_SKILL: "load_skill",
} as const;
export type MetaTool = (typeof META_TOOLS)[keyof typeof META_TOOLS];
@@ -68,5 +69,7 @@ export const STREAM_MESSAGE_ID = "msg-1";
export const ERROR_PREFIX = "[LIGHTHOUSE_ANALYST_ERROR]:";
export const SKILL_PREFIX = "skill:";
export const TOOLS_UNAVAILABLE_MESSAGE =
"\nProwler tools are unavailable. You cannot access cloud accounts or security scan data. If asked about security status or scan results, inform the user that this data is currently inaccessible.\n";

View File

@@ -0,0 +1,311 @@
import type { SkillDefinition } from "../types";
export const customAttackPathQuerySkill: SkillDefinition = {
metadata: {
id: "attack-path-custom-query",
name: "Attack Paths Custom Query",
description:
"Write an openCypher graph query against Cartography-ingested cloud infrastructure to analyze attack paths, privilege escalation, and network exposure.",
},
instructions: `# Attack Paths Custom Query Skill
This skill provides openCypher syntax guidance and Cartography schema knowledge for writing graph queries against Prowler's cloud infrastructure data.
## Workflow
Follow these steps when the user asks you to write a custom openCypher query:
1. **Find a completed scan**: Use \`prowler_app_list_attack_paths_scans\` (filter by \`state=['completed']\`) to find a scan for the user's provider. You need the \`scan_id\` for the next step.
2. **Fetch the Cartography schema**: Use \`prowler_app_get_attack_paths_cartography_schema\` with the \`scan_id\`. This returns the full schema markdown with all node labels, relationships, and properties for the scan's provider and Cartography version. If this tool fails, use the Cartography Schema Reference section below as a fallback (AWS only).
3. **Analyze the schema**: From \`schema_content\`, identify the node labels, properties, and relationships relevant to the user's request. Cross-reference with the Common openCypher Patterns section below.
4. **Write the query**: Compose the openCypher query following all rules in this skill:
- Scope every MATCH to the root account node (see Provider Isolation)
- Use \`$provider_uid\` and \`$provider_id\` parameters (see Query Parameters)
- Include \`ProwlerFinding\` OPTIONAL MATCH (see Include Prowler Findings)
- Use openCypher v9 compatible syntax only (see openCypher Version 9 Compatibility)
5. **Present the query**: Show the complete query in a \`cypher\` code block with:
- A brief explanation of what the query finds
- The node types and relationships it traverses
- What results to expect
**Note**: Custom queries cannot be executed through the available tools yet. Present the query to the user for review and manual execution.
## Query Parameters
All queries receive these built-in parameters (do NOT hardcode these values):
| Parameter | Matches property | Used on | Purpose |
|-----------|-----------------|---------|---------|
| \`$provider_uid\` | \`id\` | \`AWSAccount\` | Scopes to a specific cloud account |
| \`$provider_id\` | \`_provider_id\` | Any non-account node | Scopes nodes to the provider context |
Use \`$provider_uid\` on account root nodes. Use \`$provider_id\` on other nodes that need provider scoping (e.g., \`Internet\`).
## openCypher Query Guidelines
### Provider Isolation (CRITICAL)
Every query MUST chain from the root account node to prevent cross-provider data leakage.
The tenant database contains data from multiple providers.
\`\`\`cypher
// CORRECT: scoped to the specific account's subgraph
MATCH (aws:AWSAccount {id: $provider_uid})--(role:AWSRole)
WHERE role.name = 'admin'
// WRONG: matches ALL AWSRoles across all providers
MATCH (role:AWSRole) WHERE role.name = 'admin'
\`\`\`
Every \`MATCH\` clause must connect to the \`aws\` variable (or another variable already bound to the account's subgraph). An unanchored \`MATCH\` returns nodes from all providers.
**Exception**: The \`Internet\` sentinel node uses \`OPTIONAL MATCH\` with \`_provider_id\` for scoping instead of chaining from \`aws\`.
### Include Prowler Findings
Always include Prowler findings to enrich results with security context:
\`\`\`cypher
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {status: 'FAIL', provider_uid: $provider_uid})
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
\`\`\`
For network exposure queries, also return the internet node and relationship:
\`\`\`cypher
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr,
internet, can_access
\`\`\`
### openCypher Version 9 Compatibility
Queries must use openCypher Version 9 (compatible with both Neo4j and Amazon Neptune).
| Avoid | Reason | Use instead |
|-------|--------|-------------|
| APOC procedures (\`apoc.*\`) | Neo4j-specific plugin | Real nodes and relationships in the graph |
| Neptune extensions | Not available in Neo4j | Standard openCypher |
| \`reduce()\` function | Not in openCypher spec | \`UNWIND\` + \`collect()\` |
| \`FOREACH\` clause | Not in openCypher spec | \`WITH\` + \`UNWIND\` + \`SET\` |
| Regex operator (\`=~\`) | Not supported in Neptune | \`toLower()\` + exact match, or \`CONTAINS\`/\`STARTS WITH\` |
| \`CALL () { UNION }\` | Complex, hard to maintain | Multi-label OR in WHERE (see patterns below) |
**Supported with limitations:**
- \`CALL\` subqueries require \`WITH\` clause to import variables
## Cartography Schema Reference (Quick Reference / Fallback)
### AWS Node Labels
| Label | Description |
|-------|-------------|
| \`AWSAccount\` | AWS account root node |
| \`AWSPrincipal\` | IAM principal (user, role, service) |
| \`AWSRole\` | IAM role |
| \`AWSUser\` | IAM user |
| \`AWSPolicy\` | IAM policy |
| \`AWSPolicyStatement\` | Policy statement with effect, action, resource |
| \`EC2Instance\` | EC2 instance |
| \`EC2SecurityGroup\` | Security group |
| \`EC2PrivateIp\` | EC2 private IP (has \`public_ip\`) |
| \`IpPermissionInbound\` | Inbound security group rule |
| \`IpRange\` | IP range (e.g., \`0.0.0.0/0\`) |
| \`NetworkInterface\` | ENI (has \`public_ip\`) |
| \`ElasticIPAddress\` | Elastic IP (has \`public_ip\`) |
| \`S3Bucket\` | S3 bucket |
| \`RDSInstance\` | RDS database instance |
| \`LoadBalancer\` | Classic ELB |
| \`LoadBalancerV2\` | ALB/NLB |
| \`ELBListener\` | Classic ELB listener |
| \`ELBV2Listener\` | ALB/NLB listener |
| \`LaunchTemplate\` | EC2 launch template |
| \`AWSTag\` | Resource tag with key/value properties |
### Prowler-Specific Labels
| Label | Description |
|-------|-------------|
| \`ProwlerFinding\` | Prowler finding node with \`status\`, \`provider_uid\`, \`severity\` properties |
| \`Internet\` | Internet sentinel node, scoped by \`_provider_id\` (used in network exposure queries) |
### Common Relationships
| Relationship | Description |
|-------------|-------------|
| \`TRUSTS_AWS_PRINCIPAL\` | Role trust relationship |
| \`STS_ASSUMEROLE_ALLOW\` | Can assume role (variable-length for chains) |
| \`CAN_ACCESS\` | Internet-to-resource exposure link |
| \`POLICY\` | Has policy attached |
| \`STATEMENT\` | Policy has statement |
### Key Properties
- \`AWSAccount\`: \`id\` (account ID used with \`$provider_uid\`)
- \`AWSPolicyStatement\`: \`effect\` ('Allow'/'Deny'), \`action\` (list), \`resource\` (list)
- \`EC2Instance\`: \`exposed_internet\` (boolean), \`publicipaddress\`
- \`EC2PrivateIp\`: \`public_ip\`
- \`NetworkInterface\`: \`public_ip\`
- \`ElasticIPAddress\`: \`public_ip\`
- \`EC2SecurityGroup\`: \`name\`, \`id\`
- \`IpPermissionInbound\`: \`toport\`, \`fromport\`, \`protocol\`
- \`S3Bucket\`: \`name\`, \`anonymous_access\` (boolean)
- \`RDSInstance\`: \`storage_encrypted\` (boolean)
- \`ProwlerFinding\`: \`status\` ('FAIL'/'PASS'/'MANUAL'), \`severity\`, \`provider_uid\`
- \`Internet\`: \`_provider_id\` (provider UUID used with \`$provider_id\`)
## Common openCypher Patterns
### Match Account and Principal
\`\`\`cypher
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(policy:AWSPolicy)--(stmt:AWSPolicyStatement)
\`\`\`
### Check IAM Action Permissions
\`\`\`cypher
WHERE stmt.effect = 'Allow'
AND any(action IN stmt.action WHERE
toLower(action) = 'iam:passrole'
OR toLower(action) = 'iam:*'
OR action = '*'
)
\`\`\`
### Find Roles Trusting a Service
\`\`\`cypher
MATCH path_target = (aws)--(target_role:AWSRole)-[:TRUSTS_AWS_PRINCIPAL]->(:AWSPrincipal {arn: 'ec2.amazonaws.com'})
\`\`\`
### Check Resource Scope
\`\`\`cypher
WHERE any(resource IN stmt.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
\`\`\`
### Match Internet Sentinel Node
Used in network exposure queries. The Internet node is a real graph node, scoped by \`_provider_id\`:
\`\`\`cypher
OPTIONAL MATCH (internet:Internet {_provider_id: $provider_id})
\`\`\`
### Link Internet to Exposed Resource
The \`CAN_ACCESS\` relationship links the Internet node to exposed resources:
\`\`\`cypher
OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(resource)
\`\`\`
### Multi-label OR (match multiple resource types)
When a query needs to match different resource types in the same position, use label checks in WHERE:
\`\`\`cypher
MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x)-[q]-(y)
WHERE (x:EC2PrivateIp AND x.public_ip = $ip)
OR (x:EC2Instance AND x.publicipaddress = $ip)
OR (x:NetworkInterface AND x.public_ip = $ip)
OR (x:ElasticIPAddress AND x.public_ip = $ip)
\`\`\`
## Example Query Patterns
### Resource Inventory
\`\`\`cypher
MATCH path = (aws:AWSAccount {id: $provider_uid})--(rds:RDSInstance)
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {status: 'FAIL', provider_uid: $provider_uid})
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
\`\`\`
### Network Exposure
\`\`\`cypher
// Match the Internet sentinel node
OPTIONAL MATCH (internet:Internet {_provider_id: $provider_id})
// Match exposed resources (MUST chain from aws)
MATCH path = (aws:AWSAccount {id: $provider_uid})--(resource:EC2Instance)
WHERE resource.exposed_internet = true
// Link Internet to resource
OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(resource)
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {status: 'FAIL', provider_uid: $provider_uid})
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr,
internet, can_access
\`\`\`
### IAM Permission Check
\`\`\`cypher
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(policy:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(action IN stmt.action WHERE
toLower(action) = 'iam:passrole'
OR toLower(action) = 'iam:*'
OR action = '*'
)
UNWIND nodes(path_principal) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {status: 'FAIL', provider_uid: $provider_uid})
RETURN path_principal, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
\`\`\`
### Privilege Escalation (Role Assumption Chain)
\`\`\`cypher
// Find principals with iam:PassRole
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(policy:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(action IN stmt.action WHERE
toLower(action) = 'iam:passrole'
OR toLower(action) = 'iam:*'
OR action = '*'
)
// Find target roles trusting a service
MATCH path_target = (aws)--(target_role:AWSRole)-[:TRUSTS_AWS_PRINCIPAL]->(:AWSPrincipal {arn: 'ec2.amazonaws.com'})
WHERE any(resource IN stmt.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
UNWIND nodes(path_principal) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {status: 'FAIL', provider_uid: $provider_uid})
RETURN path_principal, path_target,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
\`\`\`
## Best Practices
1. **Always scope by provider**: Use \`{id: $provider_uid}\` on \`AWSAccount\` nodes. Use \`{_provider_id: $provider_id}\` on non-account nodes that need provider scoping (e.g., \`Internet\`).
2. **Chain all MATCHes from the root account node**: Every \`MATCH\` must connect to the \`aws\` variable. The \`Internet\` node is the only exception (uses \`OPTIONAL MATCH\` with \`_provider_id\`).
3. **Include Prowler findings**: Always add the \`OPTIONAL MATCH\` for \`ProwlerFinding\` nodes.
4. **Return distinct findings**: Use \`collect(DISTINCT pf)\` to avoid duplicates.
5. **Comment the query purpose**: Add inline comments explaining each \`MATCH\` clause.
6. **Use alternatives for unsupported features**: Replace \`=~\` with \`toLower()\` + exact match or \`CONTAINS\`/\`STARTS WITH\`. Replace \`reduce()\` with \`UNWIND\` + \`collect()\`.
`,
};

View File

@@ -0,0 +1,14 @@
import { customAttackPathQuerySkill } from "./definitions/attack-path-custom-query";
import { registerSkill } from "./registry";
// Explicit registration — tree-shake-proof
registerSkill(customAttackPathQuerySkill);
// Re-export registry functions and types
export {
getAllSkillMetadata,
getRegisteredSkillIds,
getSkillById,
registerSkill,
} from "./registry";
export type { SkillDefinition, SkillMetadata } from "./types";

View File

@@ -0,0 +1,21 @@
import "server-only";
import type { SkillDefinition, SkillMetadata } from "./types";
const skillRegistry = new Map<string, SkillDefinition>();
export function registerSkill(skill: SkillDefinition): void {
skillRegistry.set(skill.metadata.id, skill);
}
export function getAllSkillMetadata(): SkillMetadata[] {
return Array.from(skillRegistry.values()).map((skill) => skill.metadata);
}
export function getSkillById(id: string): SkillDefinition | undefined {
return skillRegistry.get(id);
}
export function getRegisteredSkillIds(): string[] {
return Array.from(skillRegistry.keys());
}

View File

@@ -0,0 +1,10 @@
export interface SkillMetadata {
id: string;
name: string;
description: string;
}
export interface SkillDefinition {
metadata: SkillMetadata;
instructions: string;
}

View File

@@ -3,6 +3,8 @@
*
* {{TOOL_LISTING}} placeholder will be replaced with dynamically generated tool list
*/
import type { SkillMetadata } from "@/lib/lighthouse/skills/types";
export const LIGHTHOUSE_SYSTEM_PROMPT_TEMPLATE = `
## Introduction
@@ -45,7 +47,7 @@ You have access to tools from multiple sources:
## Tool Usage
You have access to TWO meta-tools to interact with the available tools:
You have access to THREE meta-tools to interact with the available tools and skills:
1. **describe_tool** - Get detailed schema for a specific tool
- Use exact tool name from the list above
@@ -59,6 +61,13 @@ You have access to TWO meta-tools to interact with the available tools:
- Example: execute_tool({ "toolName": "prowler_hub_list_providers", "toolInput": {} })
- Example: execute_tool({ "toolName": "prowler_app_search_security_findings", "toolInput": { "severity": ["critical", "high"], "status": ["FAIL"] } })
3. **load_skill** - Load specialized instructions for a complex task
- Use when you identify a matching skill from the skill catalog below
- Returns detailed workflows, schema knowledge, and examples
- Example: load_skill({ "skillId": "<skill-id-from-catalog-below>" })
{{SKILL_CATALOG}}
## General Instructions
- **DON'T ASSUME**. Base your answers on the system prompt or tool outputs before responding to the user.
@@ -229,6 +238,26 @@ When providing proactive recommendations to secure users' cloud accounts, follow
- Prowler Documentation: https://docs.prowler.com/
`;
/**
* Generates the skill catalog section for the system prompt.
* Lists all registered skills with their metadata so the LLM can match user requests.
*/
export function generateSkillCatalog(skills: SkillMetadata[]): string {
if (skills.length === 0) {
return "";
}
let catalog = "## Skill Catalog\n\n";
catalog +=
"When a user request matches a skill below, use load_skill to get detailed instructions before proceeding.\n\n";
for (const skill of skills) {
catalog += `- **${skill.id}**: ${skill.name} - ${skill.description}\n`;
}
return catalog;
}
/**
* Generates the user-provided data section with security boundary
*/

View File

@@ -0,0 +1,82 @@
import "server-only";
import { tool } from "@langchain/core/tools";
import { addBreadcrumb } from "@sentry/nextjs";
import { z } from "zod";
import {
getRegisteredSkillIds,
getSkillById,
} from "@/lib/lighthouse/skills/index";
interface SkillLoadedResult {
found: true;
skillId: string;
name: string;
instructions: string;
}
interface SkillNotFoundResult {
found: false;
skillId: string;
message: string;
availableSkills: string[];
}
type LoadSkillResult = SkillLoadedResult | SkillNotFoundResult;
export const loadSkill = tool(
async ({ skillId }: { skillId: string }): Promise<LoadSkillResult> => {
addBreadcrumb({
category: "skill",
message: `load_skill called for: ${skillId}`,
level: "info",
data: { skillId },
});
const skill = getSkillById(skillId);
if (!skill) {
const availableSkills = getRegisteredSkillIds();
addBreadcrumb({
category: "skill",
message: `Skill not found: ${skillId}`,
level: "warning",
data: { skillId, availableSkills },
});
return {
found: false,
skillId,
message: `Skill '${skillId}' not found.`,
availableSkills,
};
}
return {
found: true,
skillId: skill.metadata.id,
name: skill.metadata.name,
instructions: skill.instructions,
};
},
{
name: "load_skill",
description: `Load detailed instructions for a specialized skill.
Skills provide domain-specific guidance, workflows, and schema knowledge for complex tasks.
Use this when you identify a relevant skill from the skill catalog in your system prompt.
Returns:
- Skill metadata (id, name)
- Full skill instructions with workflows and examples`,
schema: z.object({
skillId: z
.string()
.describe(
"The ID of the skill to load (from the skill catalog in your system prompt)",
),
}),
},
);

View File

@@ -12,10 +12,13 @@ import {
initializeMCPClient,
isMCPAvailable,
} from "@/lib/lighthouse/mcp-client";
import { getAllSkillMetadata } from "@/lib/lighthouse/skills/index";
import {
generateSkillCatalog,
generateUserDataSection,
LIGHTHOUSE_SYSTEM_PROMPT_TEMPLATE,
} from "@/lib/lighthouse/system-prompt";
import { loadSkill } from "@/lib/lighthouse/tools/load-skill";
import { describeTool, executeTool } from "@/lib/lighthouse/tools/meta-tool";
import { getModelParams } from "@/lib/lighthouse/utils";
@@ -84,6 +87,7 @@ const ALLOWED_TOOLS = new Set([
"prowler_app_list_attack_paths_queries",
"prowler_app_list_attack_paths_scans",
"prowler_app_run_attack_paths_query",
"prowler_app_get_attack_paths_cartography_schema",
]);
/**
@@ -136,6 +140,10 @@ export async function initLighthouseWorkflow(runtimeConfig?: RuntimeConfig) {
toolListing,
);
// Generate and inject skill catalog
const skillCatalog = generateSkillCatalog(getAllSkillMetadata());
systemPrompt = systemPrompt.replace("{{SKILL_CATALOG}}", skillCatalog);
// Add user-provided data section if available
const userDataSection = generateUserDataSection(
runtimeConfig?.businessContext,
@@ -177,7 +185,7 @@ export async function initLighthouseWorkflow(runtimeConfig?: RuntimeConfig) {
const agent = createAgent({
model: llm,
tools: [describeTool, executeTool],
tools: [describeTool, executeTool, loadSkill],
systemPrompt,
});

View File

@@ -156,6 +156,10 @@ def update_config_file(regions, config_file_path):
raise Exception(
"Validation failed: OCI_GOVERNMENT_REGIONS section missing after update. Aborting to prevent data loss."
)
if "OCI_US_DOD_REGIONS" not in updated_content:
raise Exception(
"Validation failed: OCI_US_DOD_REGIONS section missing after update. Aborting to prevent data loss."
)
# Verify the replacement was successful
if updated_content == config_content: