Compare commits

..

9 Commits

Author SHA1 Message Date
Prowler Bot
e1f70321c8 chore(api): Update prowler dependency to v5.18 for release 5.18.0 (#9963)
Co-authored-by: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
2026-02-05 14:06:12 +01:00
Josema Camacho
d016039b18 chore(ui): prepare changelog for v5.18.0 release (#9962) 2026-02-05 13:07:51 +01:00
Daniel Barranquero
ac013ec6fc feat(docs): permission error while deploying docker (#9954) 2026-02-05 11:44:22 +01:00
Josema Camacho
4ebded6ab1 chore(attack-paths): A Neo4j database per tenant (#9955) 2026-02-05 10:29:37 +01:00
Alan Buscaglia
770269772a test(ui): stabilize auth and provider e2e flows (#9945) 2026-02-05 09:56:49 +01:00
Josema Camacho
ab18ddb81a chore(api): prepare changelog for 5.18.0 release (#9960) 2026-02-05 09:34:54 +01:00
Pedro Martín
cda7f89091 feat(azure): add HIPAA compliance framework (#9957) 2026-02-05 08:45:52 +01:00
Josema Camacho
658ae755ae chore(attack-paths): pin cartography to 0.126.1 (#9893)
Co-authored-by: César Arroba <cesar@prowler.com>
2026-02-04 19:20:15 +01:00
Daniel Barranquero
486719737b chore(sdk): prepare changelog for v5.18.0 (#9958) 2026-02-04 19:16:19 +01:00
42 changed files with 3128 additions and 1409 deletions

2
.env
View File

@@ -66,7 +66,7 @@ NEO4J_DBMS_SECURITY_PROCEDURES_ALLOWLIST=apoc.*
NEO4J_DBMS_SECURITY_PROCEDURES_UNRESTRICTED=apoc.*
NEO4J_DBMS_CONNECTOR_BOLT_LISTEN_ADDRESS=0.0.0.0:7687
# Neo4j Prowler settings
ATTACK_PATHS_FINDINGS_BATCH_SIZE=1000
ATTACK_PATHS_BATCH_SIZE=1000
# Celery-Prowler task settings
TASK_RETRY_DELAY_SECONDS=0.1

View File

@@ -2,7 +2,7 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.19.0] (Prowler UNRELEASED)
## [1.19.0] (Prowler v5.18.0)
### 🚀 Added
@@ -16,11 +16,9 @@ All notable changes to the **Prowler API** are documented in this file.
### 🔄 Changed
- Lazy-load providers and compliance data to reduce API/worker startup memory and time [(#9857)](https://github.com/prowler-cloud/prowler/pull/9857)
- Attack Paths: Pinned Cartography to version `0.126.1`, adding AWS scans for SageMaker, CloudFront and Bedrock [(#9893)](https://github.com/prowler-cloud/prowler/issues/9893)
- Remove unused indexes [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904)
---
## [1.18.2] (Prowler UNRELEASED)
- Attack Paths: Modified the behaviour of the Cartography scans to use the same Neo4j database per tenant, instead of individual databases per scans [(#9955)](https://github.com/prowler-cloud/prowler/pull/9955)
### 🐞 Fixed

109
api/poetry.lock generated
View File

@@ -1144,6 +1144,24 @@ msal = ">=1.30.0"
msal-extensions = ">=1.2.0"
typing-extensions = ">=4.0.0"
[[package]]
name = "azure-keyvault-certificates"
version = "4.10.0"
description = "Microsoft Corporation Key Vault Certificates Client Library for Python"
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "platform_system == \"Windows\" or platform_system == \"Darwin\" or platform_system == \"Linux\" or platform_system != \"Windows\" and platform_system != \"Darwin\" and platform_system != \"Linux\" or sys_platform != \"win32\""
files = [
{file = "azure_keyvault_certificates-4.10.0-py3-none-any.whl", hash = "sha256:fa76cbc329274cb5f4ab61b0ed7d209d44377df4b4d6be2fd01e741c2fbb83a9"},
{file = "azure_keyvault_certificates-4.10.0.tar.gz", hash = "sha256:004ff47a73152f9f40f678e5a07719b753a3ca86f0460bfeaaf6a23304872e05"},
]
[package.dependencies]
azure-core = ">=1.31.0"
isodate = ">=0.6.1"
typing-extensions = ">=4.6.0"
[[package]]
name = "azure-keyvault-keys"
version = "4.10.0"
@@ -1163,6 +1181,24 @@ cryptography = ">=2.1.4"
isodate = ">=0.6.1"
typing-extensions = ">=4.0.1"
[[package]]
name = "azure-keyvault-secrets"
version = "4.10.0"
description = "Microsoft Corporation Key Vault Secrets Client Library for Python"
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "platform_system == \"Windows\" or platform_system == \"Darwin\" or platform_system == \"Linux\" or platform_system != \"Windows\" and platform_system != \"Darwin\" and platform_system != \"Linux\" or sys_platform != \"win32\""
files = [
{file = "azure_keyvault_secrets-4.10.0-py3-none-any.whl", hash = "sha256:9dbde256077a4ee1a847646671580692e3f9bea36bcfc189c3cf2b9a94eb38b9"},
{file = "azure_keyvault_secrets-4.10.0.tar.gz", hash = "sha256:666fa42892f9cee749563e551a90f060435ab878977c95265173a8246d546a36"},
]
[package.dependencies]
azure-core = ">=1.31.0"
isodate = ">=0.6.1"
typing-extensions = ">=4.6.0"
[[package]]
name = "azure-mgmt-apimanagement"
version = "5.0.0"
@@ -1385,6 +1421,25 @@ azure-mgmt-core = ">=1.3.2"
isodate = ">=0.6.1"
typing-extensions = ">=4.6.0"
[[package]]
name = "azure-mgmt-eventhub"
version = "11.2.0"
description = "Microsoft Azure Event Hub Management Client Library for Python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
markers = "platform_system == \"Windows\" or platform_system == \"Darwin\" or platform_system == \"Linux\" or platform_system != \"Windows\" and platform_system != \"Darwin\" and platform_system != \"Linux\" or sys_platform != \"win32\""
files = [
{file = "azure_mgmt_eventhub-11.2.0-py3-none-any.whl", hash = "sha256:a7e2618eca58d8e52c7ff7d4a04a4fae12685351746e6d01b933b43e7ea3b906"},
{file = "azure_mgmt_eventhub-11.2.0.tar.gz", hash = "sha256:31c47f18f73d2d83345cde5909568e28858c2548a35b10e23194b4767a9ce7e3"},
]
[package.dependencies]
azure-common = ">=1.1"
azure-mgmt-core = ">=1.3.2"
isodate = ">=0.6.1"
typing-extensions = ">=4.6.0"
[[package]]
name = "azure-mgmt-keyvault"
version = "10.3.1"
@@ -1662,6 +1717,24 @@ azure-common = ">=1.1,<2.0"
azure-mgmt-core = ">=1.3.2,<2.0.0"
msrest = ">=0.7.1"
[[package]]
name = "azure-mgmt-synapse"
version = "2.0.0"
description = "Microsoft Azure Synapse Management Client Library for Python"
optional = false
python-versions = "*"
groups = ["main"]
markers = "platform_system == \"Windows\" or platform_system == \"Darwin\" or platform_system == \"Linux\" or platform_system != \"Windows\" and platform_system != \"Darwin\" and platform_system != \"Linux\" or sys_platform != \"win32\""
files = [
{file = "azure-mgmt-synapse-2.0.0.zip", hash = "sha256:bec6bdfaeb55b4fdd159f2055e8875bf50a720bb0fce80a816e92a2359b898c8"},
{file = "azure_mgmt_synapse-2.0.0-py2.py3-none-any.whl", hash = "sha256:e901274009be843a7bf2eedeab32c0941fabb2addea9a1ad1560395073965f0f"},
]
[package.dependencies]
azure-common = ">=1.1,<2.0"
azure-mgmt-core = ">=1.2.0,<2.0.0"
msrest = ">=0.6.21"
[[package]]
name = "azure-mgmt-web"
version = "8.0.0"
@@ -1721,6 +1794,25 @@ typing-extensions = ">=4.6.0"
[package.extras]
aio = ["azure-core[aio] (>=1.30.0)"]
[[package]]
name = "azure-synapse-artifacts"
version = "0.21.0"
description = "Microsoft Azure Synapse Artifacts Client Library for Python"
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "platform_system == \"Windows\" or platform_system == \"Darwin\" or platform_system == \"Linux\" or platform_system != \"Windows\" and platform_system != \"Darwin\" and platform_system != \"Linux\" or sys_platform != \"win32\""
files = [
{file = "azure_synapse_artifacts-0.21.0-py3-none-any.whl", hash = "sha256:3311919df13a2b42f1fb9debf5d512080c35d64d02b9f84ff944848835289a8d"},
{file = "azure_synapse_artifacts-0.21.0.tar.gz", hash = "sha256:d7e37516cf8569e03c604d921e3407d7140cf7523b67b67f757caf999e3c8ee7"},
]
[package.dependencies]
azure-common = ">=1.1"
azure-mgmt-core = ">=1.6.0"
isodate = ">=0.6.1"
typing-extensions = ">=4.6.0"
[[package]]
name = "backoff"
version = "2.2.1"
@@ -1830,7 +1922,7 @@ crt = ["awscrt (==0.27.6)"]
[[package]]
name = "cartography"
version = "0.0.1.dev1268+gc134846c0"
version = "0.126.1"
description = "Explore assets and their relationships across your technical infrastructure."
optional = false
python-versions = ">=3.10"
@@ -1844,6 +1936,9 @@ adal = ">=1.2.4"
aioboto3 = ">=13.0.0"
azure-cli-core = ">=2.26.0"
azure-identity = ">=1.5.0"
azure-keyvault-certificates = ">=4.0.0"
azure-keyvault-keys = ">=4.0.0"
azure-keyvault-secrets = ">=4.0.0"
azure-mgmt-authorization = ">=0.60.0"
azure-mgmt-compute = ">=5.0.0"
azure-mgmt-containerinstance = ">=10.0.0"
@@ -1851,6 +1946,8 @@ azure-mgmt-containerservice = ">=30.0.0"
azure-mgmt-cosmosdb = ">=6.0.0"
azure-mgmt-datafactory = ">=8.0.0"
azure-mgmt-eventgrid = ">=10.0.0"
azure-mgmt-eventhub = ">=10.1.0"
azure-mgmt-keyvault = ">=10.0.0"
azure-mgmt-logic = ">=10.0.0"
azure-mgmt-monitor = ">=3.0.0"
azure-mgmt-network = ">=25.0.0"
@@ -1858,7 +1955,9 @@ azure-mgmt-resource = ">=10.2.0"
azure-mgmt-security = ">=5.0.0"
azure-mgmt-sql = ">=3.0.1,<4"
azure-mgmt-storage = ">=16.0.0"
azure-mgmt-synapse = ">=2.0.0"
azure-mgmt-web = ">=7.0.0"
azure-synapse-artifacts = ">=0.17.0"
backoff = ">=2.1.2"
boto3 = ">=1.15.1"
botocore = ">=1.18.1"
@@ -1895,7 +1994,7 @@ xmltodict = "*"
[package.source]
type = "git"
url = "https://github.com/prowler-cloud/cartography"
reference = "master"
reference = "0.126.1"
resolved_reference = "9e3dd6459bec027461e1fe998c034a0f3fb83e3d"
[[package]]
@@ -6913,8 +7012,8 @@ tzlocal = "5.3.1"
[package.source]
type = "git"
url = "https://github.com/prowler-cloud/prowler.git"
reference = "master"
resolved_reference = "b1f99716171856bf787a7695a588ffad6bf8d596"
reference = "v5.18"
resolved_reference = "d016039b18448db8bcc709c70aa1c6ebf5586dfb"
[[package]]
name = "psutil"
@@ -9656,4 +9755,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.11,<3.13"
content-hash = "ecdb317bb551f0683353c2270da856c2e42fd2d8f7f8a590f1c56b257306a2a1"
content-hash = "d825300bf51d2c76453097e00d15070a20573941dcd3182392ed0bf156f69e00"

View File

@@ -24,7 +24,7 @@ dependencies = [
"drf-spectacular-jsonapi==0.5.1",
"gunicorn==23.0.0",
"lxml==5.3.2",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.18",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (>=1.0.1,<2.0.0)",
"sentry-sdk[django] (>=2.20.0,<3.0.0)",
@@ -37,7 +37,7 @@ dependencies = [
"matplotlib (>=3.10.6,<4.0.0)",
"reportlab (>=4.4.4,<5.0.0)",
"neo4j (<6.0.0)",
"cartography @ git+https://github.com/prowler-cloud/cartography@master",
"cartography @ git+https://github.com/prowler-cloud/cartography@0.126.1",
"gevent (>=25.9.1,<26.0.0)",
"werkzeug (>=3.1.4)",
"sqlparse (>=0.5.4)",

View File

@@ -1,10 +1,11 @@
from api.attack_paths.query_definitions import (
from api.attack_paths.queries import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
get_queries_for_provider,
get_query_by_id,
)
__all__ = [
"AttackPathsQueryDefinition",
"AttackPathsQueryParameterDefinition",

View File

@@ -1,15 +1,18 @@
import atexit
import logging
import threading
from contextlib import contextmanager
from typing import Iterator
from uuid import UUID
import neo4j
import neo4j.exceptions
from django.conf import settings
from api.attack_paths.retryable_session import RetryableSession
from tasks.jobs.attack_paths.config import BATCH_SIZE, PROVIDER_RESOURCE_LABEL
# Without this Celery goes crazy with Neo4j logging
logging.getLogger("neo4j").setLevel(logging.ERROR)
@@ -83,7 +86,8 @@ def get_session(database: str | None = None) -> Iterator[RetryableSession]:
yield session_wrapper
except neo4j.exceptions.Neo4jError as exc:
raise GraphDatabaseQueryException(message=exc.message, code=exc.code)
message = exc.message if exc.message is not None else str(exc)
raise GraphDatabaseQueryException(message=message, code=exc.code)
finally:
if session_wrapper is not None:
@@ -105,24 +109,41 @@ def drop_database(database: str) -> None:
session.run(query)
def drop_subgraph(database: str, root_node_label: str, root_node_id: str) -> int:
query = """
MATCH (a:__ROOT_NODE_LABEL__ {id: $root_node_id})
CALL apoc.path.subgraphNodes(a, {})
YIELD node
DETACH DELETE node
RETURN COUNT(node) AS deleted_nodes_count
""".replace("__ROOT_NODE_LABEL__", root_node_label)
parameters = {"root_node_id": root_node_id}
def drop_subgraph(database: str, provider_id: str) -> int:
"""
Delete all nodes for a provider from the tenant database.
with get_session(database) as session:
result = session.run(query, parameters)
Uses batched deletion to avoid memory issues with large graphs.
Silently returns 0 if the database doesn't exist.
"""
deleted_nodes = 0
parameters = {
"provider_id": provider_id,
"batch_size": BATCH_SIZE,
}
try:
return result.single()["deleted_nodes_count"]
try:
with get_session(database) as session:
deleted_count = 1
while deleted_count > 0:
result = session.run(
f"""
MATCH (n:{PROVIDER_RESOURCE_LABEL} {{provider_id: $provider_id}})
WITH n LIMIT $batch_size
DETACH DELETE n
RETURN COUNT(n) AS deleted_nodes_count
""",
parameters,
)
deleted_count = result.single().get("deleted_nodes_count", 0)
deleted_nodes += deleted_count
except neo4j.exceptions.ResultConsumedError:
return 0 # As there are no nodes to delete, the result is empty
except GraphDatabaseQueryException as exc:
if exc.code == "Neo.ClientError.Database.DatabaseNotFound":
return 0
raise
return deleted_nodes
def clear_cache(database: str) -> None:
@@ -137,12 +158,11 @@ def clear_cache(database: str) -> None:
# Neo4j functions related to Prowler + Cartography
DATABASE_NAME_TEMPLATE = "db-{attack_paths_scan_id}"
def get_database_name(attack_paths_scan_id: UUID) -> str:
attack_paths_scan_id_str = str(attack_paths_scan_id).lower()
return DATABASE_NAME_TEMPLATE.format(attack_paths_scan_id=attack_paths_scan_id_str)
def get_database_name(entity_id: str | UUID, temporary: bool = False) -> str:
prefix = "tmp-scan" if temporary else "tenant"
return f"db-{prefix}-{str(entity_id).lower()}"
# Exceptions

View File

@@ -0,0 +1,16 @@
from api.attack_paths.queries.types import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
)
from api.attack_paths.queries.registry import (
get_queries_for_provider,
get_query_by_id,
)
__all__ = [
"AttackPathsQueryDefinition",
"AttackPathsQueryParameterDefinition",
"get_queries_for_provider",
"get_query_by_id",
]

View File

@@ -0,0 +1,695 @@
from api.attack_paths.queries.types import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
)
from tasks.jobs.attack_paths.config import PROWLER_FINDING_LABEL
# Privilege Escalation Queries (based on pathfinding.cloud research)
# https://github.com/DataDog/pathfinding.cloud
# -------------------------------------------------------------------
AWS_INTERNET_EXPOSED_EC2_SENSITIVE_S3_ACCESS = AttackPathsQueryDefinition(
id="aws-internet-exposed-ec2-sensitive-s3-access",
name="Identify internet-exposed EC2 instances with sensitive S3 access",
description="Detect EC2 instances with SSH exposed to the internet that can assume higher-privileged roles to read tagged sensitive S3 buckets despite bucket-level public access blocks.",
provider="aws",
cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet'}})
YIELD node AS internet
MATCH path_s3 = (aws:AWSAccount {{id: $provider_uid}})--(s3:S3Bucket)--(t:AWSTag)
WHERE toLower(t.key) = toLower($tag_key) AND toLower(t.value) = toLower($tag_value)
MATCH path_ec2 = (aws)--(ec2:EC2Instance)--(sg:EC2SecurityGroup)--(ipi:IpPermissionInbound)
WHERE ec2.exposed_internet = true
AND ipi.toport = 22
MATCH path_role = (r:AWSRole)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE ANY(x IN stmt.resource WHERE x CONTAINS s3.name)
AND ANY(x IN stmt.action WHERE toLower(x) =~ 's3:(listbucket|getobject).*')
MATCH path_assume_role = (ec2)-[p:STS_ASSUMEROLE_ALLOW*1..9]-(r:AWSRole)
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{}}, ec2)
YIELD rel AS can_access
UNWIND nodes(path_s3) + nodes(path_ec2) + nodes(path_role) + nodes(path_assume_role) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path_s3, path_ec2, path_role, path_assume_role, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[
AttackPathsQueryParameterDefinition(
name="tag_key",
label="Tag key",
description="Tag key to filter the S3 bucket, e.g. DataClassification.",
placeholder="DataClassification",
),
AttackPathsQueryParameterDefinition(
name="tag_value",
label="Tag value",
description="Tag value to filter the S3 bucket, e.g. Sensitive.",
placeholder="Sensitive",
),
],
)
# Basic Resource Queries
# ----------------------
AWS_RDS_INSTANCES = AttackPathsQueryDefinition(
id="aws-rds-instances",
name="Identify provisioned RDS instances",
description="List the selected AWS account alongside the RDS instances it owns.",
provider="aws",
cypher=f"""
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(rds:RDSInstance)
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
AWS_RDS_UNENCRYPTED_STORAGE = AttackPathsQueryDefinition(
id="aws-rds-unencrypted-storage",
name="Identify RDS instances without storage encryption",
description="Find RDS instances with storage encryption disabled within the selected account.",
provider="aws",
cypher=f"""
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(rds:RDSInstance)
WHERE rds.storage_encrypted = false
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
AWS_S3_ANONYMOUS_ACCESS_BUCKETS = AttackPathsQueryDefinition(
id="aws-s3-anonymous-access-buckets",
name="Identify S3 buckets with anonymous access",
description="Find S3 buckets that allow anonymous access within the selected account.",
provider="aws",
cypher=f"""
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(s3:S3Bucket)
WHERE s3.anonymous_access = true
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
AWS_IAM_STATEMENTS_ALLOW_ALL_ACTIONS = AttackPathsQueryDefinition(
id="aws-iam-statements-allow-all-actions",
name="Identify IAM statements that allow all actions",
description="Find IAM policy statements that allow all actions via '*' within the selected account.",
provider="aws",
cypher=f"""
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(x IN stmt.action WHERE x = '*')
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
AWS_IAM_STATEMENTS_ALLOW_DELETE_POLICY = AttackPathsQueryDefinition(
id="aws-iam-statements-allow-delete-policy",
name="Identify IAM statements that allow iam:DeletePolicy",
description="Find IAM policy statements that allow the iam:DeletePolicy action within the selected account.",
provider="aws",
cypher=f"""
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(x IN stmt.action WHERE x = "iam:DeletePolicy")
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
AWS_IAM_STATEMENTS_ALLOW_CREATE_ACTIONS = AttackPathsQueryDefinition(
id="aws-iam-statements-allow-create-actions",
name="Identify IAM statements that allow create actions",
description="Find IAM policy statements that allow actions containing 'create' within the selected account.",
provider="aws",
cypher=f"""
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = "Allow"
AND any(x IN stmt.action WHERE toLower(x) CONTAINS "create")
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
# Network Exposure Queries
# ------------------------
AWS_EC2_INSTANCES_INTERNET_EXPOSED = AttackPathsQueryDefinition(
id="aws-ec2-instances-internet-exposed",
name="Identify internet-exposed EC2 instances",
description="Find EC2 instances flagged as exposed to the internet within the selected account.",
provider="aws",
cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet'}})
YIELD node AS internet
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(ec2:EC2Instance)
WHERE ec2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{}}, ec2)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
)
AWS_SECURITY_GROUPS_OPEN_INTERNET_FACING = AttackPathsQueryDefinition(
id="aws-security-groups-open-internet-facing",
name="Identify internet-facing resources with open security groups",
description="Find internet-facing resources associated with security groups that allow inbound access from '0.0.0.0/0'.",
provider="aws",
cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet'}})
YIELD node AS internet
// Match EC2 instances that are internet-exposed with open security groups (0.0.0.0/0)
MATCH path_ec2 = (aws:AWSAccount {{id: $provider_uid}})--(ec2:EC2Instance)--(sg:EC2SecurityGroup)--(ipi:IpPermissionInbound)--(ir:IpRange)
WHERE ec2.exposed_internet = true
AND ir.range = "0.0.0.0/0"
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{}}, ec2)
YIELD rel AS can_access
UNWIND nodes(path_ec2) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path_ec2, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
)
AWS_CLASSIC_ELB_INTERNET_EXPOSED = AttackPathsQueryDefinition(
id="aws-classic-elb-internet-exposed",
name="Identify internet-exposed Classic Load Balancers",
description="Find Classic Load Balancers exposed to the internet along with their listeners.",
provider="aws",
cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet'}})
YIELD node AS internet
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(elb:LoadBalancer)--(listener:ELBListener)
WHERE elb.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{}}, elb)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
)
AWS_ELBV2_INTERNET_EXPOSED = AttackPathsQueryDefinition(
id="aws-elbv2-internet-exposed",
name="Identify internet-exposed ELBv2 load balancers",
description="Find ELBv2 load balancers exposed to the internet along with their listeners.",
provider="aws",
cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet'}})
YIELD node AS internet
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(elbv2:LoadBalancerV2)--(listener:ELBV2Listener)
WHERE elbv2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{}}, elbv2)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
)
AWS_PUBLIC_IP_RESOURCE_LOOKUP = AttackPathsQueryDefinition(
id="aws-public-ip-resource-lookup",
name="Identify resources by public IP address",
description="Given a public IP address, find the related AWS resource and its adjacent node within the selected account.",
provider="aws",
cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet'}})
YIELD node AS internet
CALL () {{
MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:EC2PrivateIp)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:EC2Instance)-[q]-(y)
WHERE x.publicipaddress = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:NetworkInterface)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:ElasticIPAddress)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
}}
WITH path, x, internet
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{}}, x)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[
AttackPathsQueryParameterDefinition(
name="ip",
label="IP address",
description="Public IP address, e.g. 192.0.2.0.",
placeholder="192.0.2.0",
),
],
)
AWS_IAM_PRIVESC_PASSROLE_EC2 = AttackPathsQueryDefinition(
id="aws-iam-privesc-passrole-ec2",
name="Privilege Escalation: iam:PassRole + ec2:RunInstances",
description="Detect principals who can launch EC2 instances with privileged IAM roles attached. This allows gaining the permissions of the passed role by accessing the EC2 instance metadata service. This is a new-passrole escalation path (pathfinding.cloud: ec2-001).",
provider="aws",
cypher=f"""
// Create a single shared virtual EC2 instance node
CALL apoc.create.vNode(['EC2Instance'], {{
id: 'potential-ec2-passrole',
name: 'New EC2 Instance',
description: 'Attacker-controlled EC2 with privileged role'
}})
YIELD node AS ec2_node
// Create a single shared virtual escalation outcome node (styled like a finding)
CALL apoc.create.vNode(['PrivilegeEscalation'], {{
id: 'effective-administrator-passrole-ec2',
check_title: 'Privilege Escalation',
name: 'Effective Administrator',
status: 'FAIL',
severity: 'critical'
}})
YIELD node AS escalation_outcome
WITH ec2_node, escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {{id: $provider_uid}})--(principal:AWSPrincipal)
// Find statements granting iam:PassRole
MATCH path_passrole = (principal)--(passrole_policy:AWSPolicy)--(stmt_passrole:AWSPolicyStatement)
WHERE stmt_passrole.effect = 'Allow'
AND any(action IN stmt_passrole.action WHERE
toLower(action) = 'iam:passrole'
OR toLower(action) = 'iam:*'
OR action = '*'
)
// Find statements granting ec2:RunInstances
MATCH path_ec2 = (principal)--(ec2_policy:AWSPolicy)--(stmt_ec2:AWSPolicyStatement)
WHERE stmt_ec2.effect = 'Allow'
AND any(action IN stmt_ec2.action WHERE
toLower(action) = 'ec2:runinstances'
OR toLower(action) = 'ec2:*'
OR action = '*'
)
// Find roles that trust EC2 service (can be passed to EC2)
MATCH path_target = (aws)--(target_role:AWSRole)
WHERE target_role.arn CONTAINS $provider_uid
// Check if principal can pass this role
AND any(resource IN stmt_passrole.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
// Check if target role has elevated permissions (optional, for severity assessment)
OPTIONAL MATCH (target_role)--(role_policy:AWSPolicy)--(role_stmt:AWSPolicyStatement)
WHERE role_stmt.effect = 'Allow'
AND (
any(action IN role_stmt.action WHERE action = '*')
OR any(action IN role_stmt.action WHERE toLower(action) = 'iam:*')
)
CALL apoc.create.vRelationship(principal, 'CAN_LAUNCH', {{
via: 'ec2:RunInstances + iam:PassRole'
}}, ec2_node)
YIELD rel AS launch_rel
CALL apoc.create.vRelationship(ec2_node, 'ASSUMES_ROLE', {{}}, target_role)
YIELD rel AS assumes_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {{
reference: 'https://pathfinding.cloud/paths/ec2-001'
}}, escalation_outcome)
YIELD rel AS grants_rel
UNWIND nodes(path_principal) + nodes(path_passrole) + nodes(path_ec2) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path_principal, path_passrole, path_ec2, path_target,
ec2_node, escalation_outcome, launch_rel, assumes_rel, grants_rel,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
# TODO: Add ProwlerFinding nodes
AWS_GLUE_PRIVESC_PASSROLE_DEV_ENDPOINT = AttackPathsQueryDefinition(
id="aws-glue-privesc-passrole-dev-endpoint",
name="Privilege Escalation: Glue Dev Endpoint with PassRole",
description="Detect principals that can escalate privileges by passing a role to a Glue development endpoint. The attacker creates a dev endpoint with an arbitrary role attached, then accesses those credentials through the endpoint.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator-glue',
check_title: 'Privilege Escalation',
name: 'Effective Administrator (Glue)',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS escalation_outcome
WITH escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Principal can assume roles (up to 2 hops)
OPTIONAL MATCH path_assume = (principal)-[:STS_ASSUMEROLE_ALLOW*0..2]->(acting_as:AWSRole)
WITH escalation_outcome, principal, path_principal, path_assume,
CASE WHEN path_assume IS NULL THEN principal ELSE acting_as END AS effective_principal
// Find iam:PassRole permission
MATCH path_passrole = (effective_principal)--(passrole_policy:AWSPolicy)--(passrole_stmt:AWSPolicyStatement)
WHERE passrole_stmt.effect = 'Allow'
AND any(action IN passrole_stmt.action WHERE toLower(action) = 'iam:passrole' OR action = '*')
// Find Glue CreateDevEndpoint permission
MATCH (effective_principal)--(glue_policy:AWSPolicy)--(glue_stmt:AWSPolicyStatement)
WHERE glue_stmt.effect = 'Allow'
AND any(action IN glue_stmt.action WHERE toLower(action) = 'glue:createdevendpoint' OR action = '*' OR toLower(action) = 'glue:*')
// Find target role with elevated permissions
MATCH (aws)--(target_role:AWSRole)--(target_policy:AWSPolicy)--(target_stmt:AWSPolicyStatement)
WHERE target_stmt.effect = 'Allow'
AND (
any(action IN target_stmt.action WHERE action = '*')
OR any(action IN target_stmt.action WHERE toLower(action) = 'iam:*')
)
// Deduplicate before creating virtual nodes
WITH DISTINCT escalation_outcome, aws, principal, effective_principal, target_role
// Create virtual Glue endpoint node (one per unique principal->target pair)
CALL apoc.create.vNode(['GlueDevEndpoint'], {
name: 'New Dev Endpoint',
description: 'Glue endpoint with target role attached',
id: effective_principal.arn + '->' + target_role.arn
})
YIELD node AS glue_endpoint
CALL apoc.create.vRelationship(effective_principal, 'CREATES_ENDPOINT', {
permissions: ['iam:PassRole', 'glue:CreateDevEndpoint'],
technique: 'new-passrole'
}, glue_endpoint)
YIELD rel AS create_rel
CALL apoc.create.vRelationship(glue_endpoint, 'RUNS_AS', {}, target_role)
YIELD rel AS runs_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {
reference: 'https://pathfinding.cloud/paths/glue-001'
}, escalation_outcome)
YIELD rel AS grants_rel
// Re-match paths for visualization
MATCH path_principal = (aws)--(principal)
MATCH path_target = (aws)--(target_role)
RETURN path_principal, path_target,
glue_endpoint, escalation_outcome, create_rel, runs_rel, grants_rel
""",
parameters=[],
)
AWS_IAM_PRIVESC_ATTACH_ROLE_POLICY_ASSUME_ROLE = AttackPathsQueryDefinition(
id="aws-iam-privesc-attach-role-policy-assume-role",
name="Privilege Escalation: iam:AttachRolePolicy + sts:AssumeRole",
description="Detect principals who can both attach policies to roles AND assume those roles. This two-step attack allows modifying a role's permissions then assuming it to gain elevated access. This is a principal-access escalation path (pathfinding.cloud: iam-014).",
provider="aws",
cypher=f"""
// Create a virtual escalation outcome node (styled like a finding)
CALL apoc.create.vNode(['PrivilegeEscalation'], {{
id: 'effective-administrator',
check_title: 'Privilege Escalation',
name: 'Effective Administrator',
status: 'FAIL',
severity: 'critical'
}})
YIELD node AS admin_outcome
WITH admin_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {{id: $provider_uid}})--(principal:AWSPrincipal)
// Find statements granting iam:AttachRolePolicy
MATCH path_attach = (principal)--(attach_policy:AWSPolicy)--(stmt_attach:AWSPolicyStatement)
WHERE stmt_attach.effect = 'Allow'
AND any(action IN stmt_attach.action WHERE
toLower(action) = 'iam:attachrolepolicy'
OR toLower(action) = 'iam:*'
OR action = '*'
)
// Find statements granting sts:AssumeRole
MATCH path_assume = (principal)--(assume_policy:AWSPolicy)--(stmt_assume:AWSPolicyStatement)
WHERE stmt_assume.effect = 'Allow'
AND any(action IN stmt_assume.action WHERE
toLower(action) = 'sts:assumerole'
OR toLower(action) = 'sts:*'
OR action = '*'
)
// Find target roles that the principal can both modify AND assume
MATCH path_target = (aws)--(target_role:AWSRole)
WHERE target_role.arn CONTAINS $provider_uid
// Can attach policy to this role
AND any(resource IN stmt_attach.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
// Can assume this role
AND any(resource IN stmt_assume.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
// Deduplicate before creating virtual relationships
WITH DISTINCT admin_outcome, aws, principal, target_role
// Create virtual relationships showing the attack path
CALL apoc.create.vRelationship(principal, 'CAN_MODIFY', {{
via: 'iam:AttachRolePolicy'
}}, target_role)
YIELD rel AS modify_rel
CALL apoc.create.vRelationship(target_role, 'LEADS_TO', {{
technique: 'iam:AttachRolePolicy + sts:AssumeRole',
via: 'sts:AssumeRole',
reference: 'https://pathfinding.cloud/paths/iam-014'
}}, admin_outcome)
YIELD rel AS escalation_rel
// Re-match paths for visualization
MATCH path_principal = (aws)--(principal)
MATCH path_target = (aws)--(target_role)
UNWIND nodes(path_principal) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL})
WHERE pf.status = 'FAIL'
RETURN path_principal, path_target,
admin_outcome, modify_rel, escalation_rel,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
)
# TODO: Add ProwlerFinding nodes
AWS_BEDROCK_PRIVESC_PASSROLE_CODE_INTERPRETER = AttackPathsQueryDefinition(
id="aws-bedrock-privesc-passrole-code-interpreter",
name="Privilege Escalation: Bedrock Code Interpreter with PassRole",
description="Detect principals that can escalate privileges by passing a role to a Bedrock AgentCore Code Interpreter. The attacker creates a code interpreter with an arbitrary role, then invokes it to execute code with those credentials.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator-bedrock',
check_title: 'Privilege Escalation',
name: 'Effective Administrator (Bedrock)',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS escalation_outcome
WITH escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Principal can assume roles (up to 2 hops)
OPTIONAL MATCH path_assume = (principal)-[:STS_ASSUMEROLE_ALLOW*0..2]->(acting_as:AWSRole)
WITH escalation_outcome, aws, principal, path_principal, path_assume,
CASE WHEN path_assume IS NULL THEN principal ELSE acting_as END AS effective_principal
// Find iam:PassRole permission
MATCH path_passrole = (effective_principal)--(passrole_policy:AWSPolicy)--(passrole_stmt:AWSPolicyStatement)
WHERE passrole_stmt.effect = 'Allow'
AND any(action IN passrole_stmt.action WHERE toLower(action) = 'iam:passrole' OR action = '*')
// Find Bedrock AgentCore permissions
MATCH (effective_principal)--(bedrock_policy:AWSPolicy)--(bedrock_stmt:AWSPolicyStatement)
WHERE bedrock_stmt.effect = 'Allow'
AND (
any(action IN bedrock_stmt.action WHERE toLower(action) = 'bedrock-agentcore:createcodeinterpreter' OR action = '*' OR toLower(action) = 'bedrock-agentcore:*')
)
AND (
any(action IN bedrock_stmt.action WHERE toLower(action) = 'bedrock-agentcore:startsession' OR action = '*' OR toLower(action) = 'bedrock-agentcore:*')
)
AND (
any(action IN bedrock_stmt.action WHERE toLower(action) = 'bedrock-agentcore:invoke' OR action = '*' OR toLower(action) = 'bedrock-agentcore:*')
)
// Find target roles with elevated permissions that could be passed
MATCH (aws)--(target_role:AWSRole)--(target_policy:AWSPolicy)--(target_stmt:AWSPolicyStatement)
WHERE target_stmt.effect = 'Allow'
AND (
any(action IN target_stmt.action WHERE action = '*')
OR any(action IN target_stmt.action WHERE toLower(action) = 'iam:*')
)
// Deduplicate per (principal, target_role) pair
WITH DISTINCT escalation_outcome, aws, principal, target_role
// Group by principal, collect target_roles
WITH escalation_outcome, aws, principal,
collect(DISTINCT target_role) AS target_roles,
count(DISTINCT target_role) AS target_count
// Create single virtual Bedrock node per principal
CALL apoc.create.vNode(['BedrockCodeInterpreter'], {
name: 'New Code Interpreter',
description: toString(target_count) + ' admin role(s) can be passed',
id: principal.arn,
target_role_count: target_count
})
YIELD node AS bedrock_agent
// Connect from principal (not effective_principal) to keep graph connected
CALL apoc.create.vRelationship(principal, 'CREATES_INTERPRETER', {
permissions: ['iam:PassRole', 'bedrock-agentcore:CreateCodeInterpreter', 'bedrock-agentcore:StartSession', 'bedrock-agentcore:Invoke'],
technique: 'new-passrole'
}, bedrock_agent)
YIELD rel AS create_rel
// UNWIND target_roles to show which roles can be passed
UNWIND target_roles AS target_role
CALL apoc.create.vRelationship(bedrock_agent, 'PASSES_ROLE', {}, target_role)
YIELD rel AS pass_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {
reference: 'https://pathfinding.cloud/paths/bedrock-001'
}, escalation_outcome)
YIELD rel AS grants_rel
// Re-match path for visualization
MATCH path_principal = (aws)--(principal)
RETURN path_principal,
bedrock_agent, target_role, escalation_outcome, create_rel, pass_rel, grants_rel, target_count
""",
parameters=[],
)
# AWS Queries List
# ----------------
AWS_QUERIES: list[AttackPathsQueryDefinition] = [
AWS_INTERNET_EXPOSED_EC2_SENSITIVE_S3_ACCESS,
AWS_RDS_INSTANCES,
AWS_RDS_UNENCRYPTED_STORAGE,
AWS_S3_ANONYMOUS_ACCESS_BUCKETS,
AWS_IAM_STATEMENTS_ALLOW_ALL_ACTIONS,
AWS_IAM_STATEMENTS_ALLOW_DELETE_POLICY,
AWS_IAM_STATEMENTS_ALLOW_CREATE_ACTIONS,
AWS_EC2_INSTANCES_INTERNET_EXPOSED,
AWS_SECURITY_GROUPS_OPEN_INTERNET_FACING,
AWS_CLASSIC_ELB_INTERNET_EXPOSED,
AWS_ELBV2_INTERNET_EXPOSED,
AWS_PUBLIC_IP_RESOURCE_LOOKUP,
AWS_IAM_PRIVESC_PASSROLE_EC2,
AWS_GLUE_PRIVESC_PASSROLE_DEV_ENDPOINT,
AWS_IAM_PRIVESC_ATTACH_ROLE_POLICY_ASSUME_ROLE,
AWS_BEDROCK_PRIVESC_PASSROLE_CODE_INTERPRETER,
]

View File

@@ -0,0 +1,25 @@
from api.attack_paths.queries.types import AttackPathsQueryDefinition
from api.attack_paths.queries.aws import AWS_QUERIES
# Query definitions organized by provider
_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
"aws": AWS_QUERIES,
}
# Flat lookup by query ID for O(1) access
_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
definition.id: definition
for definitions in _QUERY_DEFINITIONS.values()
for definition in definitions
}
def get_queries_for_provider(provider: str) -> list[AttackPathsQueryDefinition]:
"""Get all attack path queries for a specific provider."""
return _QUERY_DEFINITIONS.get(provider, [])
def get_query_by_id(query_id: str) -> AttackPathsQueryDefinition | None:
"""Get a specific attack path query by its ID."""
return _QUERIES_BY_ID.get(query_id)

View File

@@ -0,0 +1,29 @@
from dataclasses import dataclass, field
@dataclass
class AttackPathsQueryParameterDefinition:
"""
Metadata describing a parameter that must be provided to an Attack Paths query.
"""
name: str
label: str
data_type: str = "string"
cast: type = str
description: str | None = None
placeholder: str | None = None
@dataclass
class AttackPathsQueryDefinition:
"""
Immutable representation of an Attack Path query.
"""
id: str
name: str
description: str
provider: str
cypher: str
parameters: list[AttackPathsQueryParameterDefinition] = field(default_factory=list)

View File

@@ -1,690 +0,0 @@
from dataclasses import dataclass, field
# Dataclases for handling API's Attack Path query definitions and their parameters
@dataclass
class AttackPathsQueryParameterDefinition:
"""
Metadata describing a parameter that must be provided to an Attack Paths query.
"""
name: str
label: str
data_type: str = "string"
cast: type = str
description: str | None = None
placeholder: str | None = None
@dataclass
class AttackPathsQueryDefinition:
"""
Immutable representation of an Attack Path query.
"""
id: str
name: str
description: str
provider: str
cypher: str
parameters: list[AttackPathsQueryParameterDefinition] = field(default_factory=list)
# Accessor functions for API's Attack Paths query definitions
def get_queries_for_provider(provider: str) -> list[AttackPathsQueryDefinition]:
return _QUERY_DEFINITIONS.get(provider, [])
def get_query_by_id(query_id: str) -> AttackPathsQueryDefinition | None:
return _QUERIES_BY_ID.get(query_id)
# API's Attack Paths query definitions
_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
"aws": [
# Custom query for detecting internet-exposed EC2 instances with sensitive S3 access
AttackPathsQueryDefinition(
id="aws-internet-exposed-ec2-sensitive-s3-access",
name="Identify internet-exposed EC2 instances with sensitive S3 access",
description="Detect EC2 instances with SSH exposed to the internet that can assume higher-privileged roles to read tagged sensitive S3 buckets despite bucket-level public access blocks.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path_s3 = (aws:AWSAccount {id: $provider_uid})--(s3:S3Bucket)--(t:AWSTag)
WHERE toLower(t.key) = toLower($tag_key) AND toLower(t.value) = toLower($tag_value)
MATCH path_ec2 = (aws)--(ec2:EC2Instance)--(sg:EC2SecurityGroup)--(ipi:IpPermissionInbound)
WHERE ec2.exposed_internet = true
AND ipi.toport = 22
MATCH path_role = (r:AWSRole)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE ANY(x IN stmt.resource WHERE x CONTAINS s3.name)
AND ANY(x IN stmt.action WHERE toLower(x) =~ 's3:(listbucket|getobject).*')
MATCH path_assume_role = (ec2)-[p:STS_ASSUMEROLE_ALLOW*1..9]-(r:AWSRole)
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, ec2)
YIELD rel AS can_access
UNWIND nodes(path_s3) + nodes(path_ec2) + nodes(path_role) + nodes(path_assume_role) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path_s3, path_ec2, path_role, path_assume_role, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[
AttackPathsQueryParameterDefinition(
name="tag_key",
label="Tag key",
description="Tag key to filter the S3 bucket, e.g. DataClassification.",
placeholder="DataClassification",
),
AttackPathsQueryParameterDefinition(
name="tag_value",
label="Tag value",
description="Tag value to filter the S3 bucket, e.g. Sensitive.",
placeholder="Sensitive",
),
],
),
# Regular Cartography Attack Paths queries
AttackPathsQueryDefinition(
id="aws-rds-instances",
name="Identify provisioned RDS instances",
description="List the selected AWS account alongside the RDS instances it owns.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(rds:RDSInstance)
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-rds-unencrypted-storage",
name="Identify RDS instances without storage encryption",
description="Find RDS instances with storage encryption disabled within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(rds:RDSInstance)
WHERE rds.storage_encrypted = false
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-s3-anonymous-access-buckets",
name="Identify S3 buckets with anonymous access",
description="Find S3 buckets that allow anonymous access within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(s3:S3Bucket)
WHERE s3.anonymous_access = true
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-iam-statements-allow-all-actions",
name="Identify IAM statements that allow all actions",
description="Find IAM policy statements that allow all actions via '*' within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(x IN stmt.action WHERE x = '*')
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-iam-statements-allow-delete-policy",
name="Identify IAM statements that allow iam:DeletePolicy",
description="Find IAM policy statements that allow the iam:DeletePolicy action within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(x IN stmt.action WHERE x = "iam:DeletePolicy")
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-iam-statements-allow-create-actions",
name="Identify IAM statements that allow create actions",
description="Find IAM policy statements that allow actions containing 'create' within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = "Allow"
AND any(x IN stmt.action WHERE toLower(x) CONTAINS "create")
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-ec2-instances-internet-exposed",
name="Identify internet-exposed EC2 instances",
description="Find EC2 instances flagged as exposed to the internet within the selected account.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path = (aws:AWSAccount {id: $provider_uid})--(ec2:EC2Instance)
WHERE ec2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, ec2)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-security-groups-open-internet-facing",
name="Identify internet-facing resources with open security groups",
description="Find internet-facing resources associated with security groups that allow inbound access from '0.0.0.0/0'.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
// Match EC2 instances that are internet-exposed with open security groups (0.0.0.0/0)
MATCH path_ec2 = (aws:AWSAccount {id: $provider_uid})--(ec2:EC2Instance)--(sg:EC2SecurityGroup)--(ipi:IpPermissionInbound)--(ir:IpRange)
WHERE ec2.exposed_internet = true
AND ir.range = "0.0.0.0/0"
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, ec2)
YIELD rel AS can_access
UNWIND nodes(path_ec2) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path_ec2, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-classic-elb-internet-exposed",
name="Identify internet-exposed Classic Load Balancers",
description="Find Classic Load Balancers exposed to the internet along with their listeners.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path = (aws:AWSAccount {id: $provider_uid})--(elb:LoadBalancer)--(listener:ELBListener)
WHERE elb.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, elb)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-elbv2-internet-exposed",
name="Identify internet-exposed ELBv2 load balancers",
description="Find ELBv2 load balancers exposed to the internet along with their listeners.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path = (aws:AWSAccount {id: $provider_uid})--(elbv2:LoadBalancerV2)--(listener:ELBV2Listener)
WHERE elbv2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, elbv2)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-public-ip-resource-lookup",
name="Identify resources by public IP address",
description="Given a public IP address, find the related AWS resource and its adjacent node within the selected account.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
CALL () {
MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:EC2PrivateIp)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:EC2Instance)-[q]-(y)
WHERE x.publicipaddress = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:NetworkInterface)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:ElasticIPAddress)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
}
WITH path, x, internet
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, x)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[
AttackPathsQueryParameterDefinition(
name="ip",
label="IP address",
description="Public IP address, e.g. 192.0.2.0.",
placeholder="192.0.2.0",
),
],
),
# Privilege Escalation Queries (based on pathfinding.cloud research): https://github.com/DataDog/pathfinding.cloud
AttackPathsQueryDefinition(
id="aws-iam-privesc-passrole-ec2",
name="Privilege Escalation: iam:PassRole + ec2:RunInstances",
description="Detect principals who can launch EC2 instances with privileged IAM roles attached. This allows gaining the permissions of the passed role by accessing the EC2 instance metadata service. This is a new-passrole escalation path (pathfinding.cloud: ec2-001).",
provider="aws",
cypher="""
// Create a single shared virtual EC2 instance node
CALL apoc.create.vNode(['EC2Instance'], {
id: 'potential-ec2-passrole',
name: 'New EC2 Instance',
description: 'Attacker-controlled EC2 with privileged role'
})
YIELD node AS ec2_node
// Create a single shared virtual escalation outcome node (styled like a finding)
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator-passrole-ec2',
check_title: 'Privilege Escalation',
name: 'Effective Administrator',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS escalation_outcome
WITH ec2_node, escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Find statements granting iam:PassRole
MATCH path_passrole = (principal)--(passrole_policy:AWSPolicy)--(stmt_passrole:AWSPolicyStatement)
WHERE stmt_passrole.effect = 'Allow'
AND any(action IN stmt_passrole.action WHERE
toLower(action) = 'iam:passrole'
OR toLower(action) = 'iam:*'
OR action = '*'
)
// Find statements granting ec2:RunInstances
MATCH path_ec2 = (principal)--(ec2_policy:AWSPolicy)--(stmt_ec2:AWSPolicyStatement)
WHERE stmt_ec2.effect = 'Allow'
AND any(action IN stmt_ec2.action WHERE
toLower(action) = 'ec2:runinstances'
OR toLower(action) = 'ec2:*'
OR action = '*'
)
// Find roles that trust EC2 service (can be passed to EC2)
MATCH path_target = (aws)--(target_role:AWSRole)
WHERE target_role.arn CONTAINS $provider_uid
// Check if principal can pass this role
AND any(resource IN stmt_passrole.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
// Check if target role has elevated permissions (optional, for severity assessment)
OPTIONAL MATCH (target_role)--(role_policy:AWSPolicy)--(role_stmt:AWSPolicyStatement)
WHERE role_stmt.effect = 'Allow'
AND (
any(action IN role_stmt.action WHERE action = '*')
OR any(action IN role_stmt.action WHERE toLower(action) = 'iam:*')
)
CALL apoc.create.vRelationship(principal, 'CAN_LAUNCH', {
via: 'ec2:RunInstances + iam:PassRole'
}, ec2_node)
YIELD rel AS launch_rel
CALL apoc.create.vRelationship(ec2_node, 'ASSUMES_ROLE', {}, target_role)
YIELD rel AS assumes_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {
reference: 'https://pathfinding.cloud/paths/ec2-001'
}, escalation_outcome)
YIELD rel AS grants_rel
UNWIND nodes(path_principal) + nodes(path_passrole) + nodes(path_ec2) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path_principal, path_passrole, path_ec2, path_target,
ec2_node, escalation_outcome, launch_rel, assumes_rel, grants_rel,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-glue-privesc-passrole-dev-endpoint",
name="Privilege Escalation: Glue Dev Endpoint with PassRole",
description="Detect principals that can escalate privileges by passing a role to a Glue development endpoint. The attacker creates a dev endpoint with an arbitrary role attached, then accesses those credentials through the endpoint.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator-glue',
check_title: 'Privilege Escalation',
name: 'Effective Administrator (Glue)',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS escalation_outcome
WITH escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Principal can assume roles (up to 2 hops)
OPTIONAL MATCH path_assume = (principal)-[:STS_ASSUMEROLE_ALLOW*0..2]->(acting_as:AWSRole)
WITH escalation_outcome, principal, path_principal, path_assume,
CASE WHEN path_assume IS NULL THEN principal ELSE acting_as END AS effective_principal
// Find iam:PassRole permission
MATCH path_passrole = (effective_principal)--(passrole_policy:AWSPolicy)--(passrole_stmt:AWSPolicyStatement)
WHERE passrole_stmt.effect = 'Allow'
AND any(action IN passrole_stmt.action WHERE toLower(action) = 'iam:passrole' OR action = '*')
// Find Glue CreateDevEndpoint permission
MATCH (effective_principal)--(glue_policy:AWSPolicy)--(glue_stmt:AWSPolicyStatement)
WHERE glue_stmt.effect = 'Allow'
AND any(action IN glue_stmt.action WHERE toLower(action) = 'glue:createdevendpoint' OR action = '*' OR toLower(action) = 'glue:*')
// Find target role with elevated permissions
MATCH (aws)--(target_role:AWSRole)--(target_policy:AWSPolicy)--(target_stmt:AWSPolicyStatement)
WHERE target_stmt.effect = 'Allow'
AND (
any(action IN target_stmt.action WHERE action = '*')
OR any(action IN target_stmt.action WHERE toLower(action) = 'iam:*')
)
// Deduplicate before creating virtual nodes
WITH DISTINCT escalation_outcome, aws, principal, effective_principal, target_role
// Create virtual Glue endpoint node (one per unique principal->target pair)
CALL apoc.create.vNode(['GlueDevEndpoint'], {
name: 'New Dev Endpoint',
description: 'Glue endpoint with target role attached',
id: effective_principal.arn + '->' + target_role.arn
})
YIELD node AS glue_endpoint
CALL apoc.create.vRelationship(effective_principal, 'CREATES_ENDPOINT', {
permissions: ['iam:PassRole', 'glue:CreateDevEndpoint'],
technique: 'new-passrole'
}, glue_endpoint)
YIELD rel AS create_rel
CALL apoc.create.vRelationship(glue_endpoint, 'RUNS_AS', {}, target_role)
YIELD rel AS runs_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {
reference: 'https://pathfinding.cloud/paths/glue-001'
}, escalation_outcome)
YIELD rel AS grants_rel
// Re-match paths for visualization
MATCH path_principal = (aws)--(principal)
MATCH path_target = (aws)--(target_role)
RETURN path_principal, path_target,
glue_endpoint, escalation_outcome, create_rel, runs_rel, grants_rel
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-iam-privesc-attach-role-policy-assume-role",
name="Privilege Escalation: iam:AttachRolePolicy + sts:AssumeRole",
description="Detect principals who can both attach policies to roles AND assume those roles. This two-step attack allows modifying a role's permissions then assuming it to gain elevated access. This is a principal-access escalation path (pathfinding.cloud: iam-014).",
provider="aws",
cypher="""
// Create a virtual escalation outcome node (styled like a finding)
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator',
check_title: 'Privilege Escalation',
name: 'Effective Administrator',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS admin_outcome
WITH admin_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Find statements granting iam:AttachRolePolicy
MATCH path_attach = (principal)--(attach_policy:AWSPolicy)--(stmt_attach:AWSPolicyStatement)
WHERE stmt_attach.effect = 'Allow'
AND any(action IN stmt_attach.action WHERE
toLower(action) = 'iam:attachrolepolicy'
OR toLower(action) = 'iam:*'
OR action = '*'
)
// Find statements granting sts:AssumeRole
MATCH path_assume = (principal)--(assume_policy:AWSPolicy)--(stmt_assume:AWSPolicyStatement)
WHERE stmt_assume.effect = 'Allow'
AND any(action IN stmt_assume.action WHERE
toLower(action) = 'sts:assumerole'
OR toLower(action) = 'sts:*'
OR action = '*'
)
// Find target roles that the principal can both modify AND assume
MATCH path_target = (aws)--(target_role:AWSRole)
WHERE target_role.arn CONTAINS $provider_uid
// Can attach policy to this role
AND any(resource IN stmt_attach.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
// Can assume this role
AND any(resource IN stmt_assume.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
// Deduplicate before creating virtual relationships
WITH DISTINCT admin_outcome, aws, principal, target_role
// Create virtual relationships showing the attack path
CALL apoc.create.vRelationship(principal, 'CAN_MODIFY', {
via: 'iam:AttachRolePolicy'
}, target_role)
YIELD rel AS modify_rel
CALL apoc.create.vRelationship(target_role, 'LEADS_TO', {
technique: 'iam:AttachRolePolicy + sts:AssumeRole',
via: 'sts:AssumeRole',
reference: 'https://pathfinding.cloud/paths/iam-014'
}, admin_outcome)
YIELD rel AS escalation_rel
// Re-match paths for visualization
MATCH path_principal = (aws)--(principal)
MATCH path_target = (aws)--(target_role)
UNWIND nodes(path_principal) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path_principal, path_target,
admin_outcome, modify_rel, escalation_rel,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-bedrock-privesc-passrole-code-interpreter",
name="Privilege Escalation: Bedrock Code Interpreter with PassRole",
description="Detect principals that can escalate privileges by passing a role to a Bedrock AgentCore Code Interpreter. The attacker creates a code interpreter with an arbitrary role, then invokes it to execute code with those credentials.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator-bedrock',
check_title: 'Privilege Escalation',
name: 'Effective Administrator (Bedrock)',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS escalation_outcome
WITH escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Principal can assume roles (up to 2 hops)
OPTIONAL MATCH path_assume = (principal)-[:STS_ASSUMEROLE_ALLOW*0..2]->(acting_as:AWSRole)
WITH escalation_outcome, aws, principal, path_principal, path_assume,
CASE WHEN path_assume IS NULL THEN principal ELSE acting_as END AS effective_principal
// Find iam:PassRole permission
MATCH path_passrole = (effective_principal)--(passrole_policy:AWSPolicy)--(passrole_stmt:AWSPolicyStatement)
WHERE passrole_stmt.effect = 'Allow'
AND any(action IN passrole_stmt.action WHERE toLower(action) = 'iam:passrole' OR action = '*')
// Find Bedrock AgentCore permissions
MATCH (effective_principal)--(bedrock_policy:AWSPolicy)--(bedrock_stmt:AWSPolicyStatement)
WHERE bedrock_stmt.effect = 'Allow'
AND (
any(action IN bedrock_stmt.action WHERE toLower(action) = 'bedrock-agentcore:createcodeinterpreter' OR action = '*' OR toLower(action) = 'bedrock-agentcore:*')
)
AND (
any(action IN bedrock_stmt.action WHERE toLower(action) = 'bedrock-agentcore:startsession' OR action = '*' OR toLower(action) = 'bedrock-agentcore:*')
)
AND (
any(action IN bedrock_stmt.action WHERE toLower(action) = 'bedrock-agentcore:invoke' OR action = '*' OR toLower(action) = 'bedrock-agentcore:*')
)
// Find target roles with elevated permissions that could be passed
MATCH (aws)--(target_role:AWSRole)--(target_policy:AWSPolicy)--(target_stmt:AWSPolicyStatement)
WHERE target_stmt.effect = 'Allow'
AND (
any(action IN target_stmt.action WHERE action = '*')
OR any(action IN target_stmt.action WHERE toLower(action) = 'iam:*')
)
// Deduplicate per (principal, target_role) pair
WITH DISTINCT escalation_outcome, aws, principal, target_role
// Group by principal, collect target_roles
WITH escalation_outcome, aws, principal,
collect(DISTINCT target_role) AS target_roles,
count(DISTINCT target_role) AS target_count
// Create single virtual Bedrock node per principal
CALL apoc.create.vNode(['BedrockCodeInterpreter'], {
name: 'New Code Interpreter',
description: toString(target_count) + ' admin role(s) can be passed',
id: principal.arn,
target_role_count: target_count
})
YIELD node AS bedrock_agent
// Connect from principal (not effective_principal) to keep graph connected
CALL apoc.create.vRelationship(principal, 'CREATES_INTERPRETER', {
permissions: ['iam:PassRole', 'bedrock-agentcore:CreateCodeInterpreter', 'bedrock-agentcore:StartSession', 'bedrock-agentcore:Invoke'],
technique: 'new-passrole'
}, bedrock_agent)
YIELD rel AS create_rel
// UNWIND target_roles to show which roles can be passed
UNWIND target_roles AS target_role
CALL apoc.create.vRelationship(bedrock_agent, 'PASSES_ROLE', {}, target_role)
YIELD rel AS pass_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {
reference: 'https://pathfinding.cloud/paths/bedrock-001'
}, escalation_outcome)
YIELD rel AS grants_rel
// Re-match path for visualization
MATCH path_principal = (aws)--(principal)
RETURN path_principal,
bedrock_agent, target_role, escalation_outcome, create_rel, pass_rel, grants_rel, target_count
""",
parameters=[],
),
],
}
_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
definition.id: definition
for definitions in _QUERY_DEFINITIONS.values()
for definition in definitions
}

View File

@@ -1,12 +1,13 @@
import logging
from typing import Any
from typing import Any, Iterable
from rest_framework.exceptions import APIException, ValidationError
from api.attack_paths import database as graph_database, AttackPathsQueryDefinition
from api.models import AttackPathsScan
from config.custom_logging import BackendLogger
from tasks.jobs.attack_paths.config import INTERNAL_LABELS
logger = logging.getLogger(BackendLogger.API)
@@ -101,7 +102,7 @@ def _serialize_graph(graph):
nodes.append(
{
"id": node.element_id,
"labels": list(node.labels),
"labels": _filter_labels(node.labels),
"properties": _serialize_properties(node._properties),
},
)
@@ -124,6 +125,10 @@ def _serialize_graph(graph):
}
def _filter_labels(labels: Iterable[str]) -> list[str]:
return [label for label in labels if label not in INTERNAL_LABELS]
def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]:
"""Convert Neo4j property values into JSON-serializable primitives."""

View File

@@ -2287,7 +2287,7 @@ class TaskViewSet(BaseRLSViewSet):
),
attack_paths_queries=extend_schema(
tags=["Attack Paths"],
summary="List attack paths queries",
summary="List Attack Paths queries",
description="Retrieve the catalog of Attack Paths queries available for this Attack Paths scan.",
responses={
200: OpenApiResponse(AttackPathsQuerySerializer(many=True)),
@@ -2307,7 +2307,7 @@ class TaskViewSet(BaseRLSViewSet):
description="Bad request (e.g., Unknown Attack Paths query for the selected provider)"
),
404: OpenApiResponse(
description="No attack paths found for the given query and parameters"
description="No Attack Paths found for the given query and parameters"
),
500: OpenApiResponse(
description="Attack Paths query execution failed due to a database error"

View File

@@ -29,7 +29,7 @@ def start_aws_ingestion(
attack_paths_scan: ProwlerAPIAttackPathsScan,
) -> dict[str, dict[str, str]]:
"""
Code based on Cartography version 0.122.0, specifically on `cartography.intel.aws.__init__.py`.
Code based on Cartography, specifically on `cartography.intel.aws.__init__.py`.
For the scan progress updates:
- The caller of this function (`tasks.jobs.attack_paths.scan.run`) has set it to 2.

View File

@@ -0,0 +1,86 @@
from dataclasses import dataclass
from typing import Callable
from config.env import env
from tasks.jobs.attack_paths import aws
# Batch size for Neo4j operations
BATCH_SIZE = env.int("ATTACK_PATHS_BATCH_SIZE", 1000)
# Neo4j internal labels (Prowler-specific, not provider-specific)
# - `ProwlerFinding`: Label for finding nodes created by Prowler and linked to cloud resources.
# - `ProviderResource`: Added to ALL synced nodes for provider isolation and drop/query ops.
PROWLER_FINDING_LABEL = "ProwlerFinding"
PROVIDER_RESOURCE_LABEL = "ProviderResource"
@dataclass(frozen=True)
class ProviderConfig:
"""Configuration for a cloud provider's Attack Paths integration."""
name: str
root_node_label: str # e.g., "AWSAccount"
uid_field: str # e.g., "arn"
# Label for resources connected to the account node, enabling indexed finding lookups.
resource_label: str # e.g., "AWSResource"
ingestion_function: Callable
# Provider Configurations
# -----------------------
AWS_CONFIG = ProviderConfig(
name="aws",
root_node_label="AWSAccount",
uid_field="arn",
resource_label="AWSResource",
ingestion_function=aws.start_aws_ingestion,
)
PROVIDER_CONFIGS: dict[str, ProviderConfig] = {
"aws": AWS_CONFIG,
}
# Labels added by Prowler that should be filtered from API responses
# Derived from provider configs + common internal labels
INTERNAL_LABELS: list[str] = [
"Tenant",
PROVIDER_RESOURCE_LABEL,
# Add all provider-specific resource labels
*[config.resource_label for config in PROVIDER_CONFIGS.values()],
]
# Provider Config Accessors
# -------------------------
def is_provider_available(provider_type: str) -> bool:
"""Check if a provider type is available for Attack Paths scans."""
return provider_type in PROVIDER_CONFIGS
def get_cartography_ingestion_function(provider_type: str) -> Callable | None:
"""Get the Cartography ingestion function for a provider type."""
config = PROVIDER_CONFIGS.get(provider_type)
return config.ingestion_function if config else None
def get_root_node_label(provider_type: str) -> str:
"""Get the root node label for a provider type (e.g., AWSAccount)."""
config = PROVIDER_CONFIGS.get(provider_type)
return config.root_node_label if config else "UnknownProviderAccount"
def get_node_uid_field(provider_type: str) -> str:
"""Get the UID field for a provider type (e.g., arn for AWS)."""
config = PROVIDER_CONFIGS.get(provider_type)
return config.uid_field if config else "UnknownProviderUID"
def get_provider_resource_label(provider_type: str) -> str:
"""Get the resource label for a provider type (e.g., `AWSResource`)."""
config = PROVIDER_CONFIGS.get(provider_type)
return config.resource_label if config else "UnknownProviderResource"

View File

@@ -1,7 +1,6 @@
from datetime import datetime, timezone
from typing import Any
from django.db.models import Q
from cartography.config import Config as CartographyConfig
from api.db_utils import rls_transaction
@@ -10,7 +9,7 @@ from api.models import (
Provider as ProwlerAPIProvider,
StateChoices,
)
from tasks.jobs.attack_paths.providers import is_provider_available
from tasks.jobs.attack_paths.config import is_provider_available
def can_provider_run_attack_paths_scan(tenant_id: str, provider_id: int) -> bool:
@@ -145,24 +144,3 @@ def update_old_attack_paths_scan(
with rls_transaction(old_attack_paths_scan.tenant_id):
old_attack_paths_scan.is_graph_database_deleted = True
old_attack_paths_scan.save(update_fields=["is_graph_database_deleted"])
def get_provider_graph_database_names(tenant_id: str, provider_id: str) -> list[str]:
"""
Return existing graph database names for a tenant/provider.
Note: For accesing the `AttackPathsScan` we need to use `all_objects` manager because the provider is soft-deleted.
"""
with rls_transaction(tenant_id):
graph_databases_names_qs = (
ProwlerAPIAttackPathsScan.all_objects.filter(
~Q(graph_database=""),
graph_database__isnull=False,
provider_id=provider_id,
is_graph_database_deleted=False,
)
.values_list("graph_database", flat=True)
.distinct()
)
return list(graph_databases_names_qs)

View File

@@ -0,0 +1,355 @@
"""
Prowler findings ingestion into Neo4j graph.
This module handles:
- Adding resource labels to Cartography nodes for efficient lookups
- Loading Prowler findings into the graph
- Linking findings to resources
- Cleaning up stale findings
"""
from collections import defaultdict
from dataclasses import asdict, dataclass, fields
from typing import Any, Generator
from uuid import UUID
import neo4j
from cartography.config import Config as CartographyConfig
from celery.utils.log import get_task_logger
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Finding as FindingModel
from api.models import Provider, ResourceFindingMapping
from prowler.config import config as ProwlerConfig
from tasks.jobs.attack_paths.config import (
BATCH_SIZE,
get_node_uid_field,
get_provider_resource_label,
get_root_node_label,
)
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
from tasks.jobs.attack_paths.queries import (
ADD_RESOURCE_LABEL_TEMPLATE,
CLEANUP_FINDINGS_TEMPLATE,
INSERT_FINDING_TEMPLATE,
render_cypher_template,
)
logger = get_task_logger(__name__)
# Type Definitions
# -----------------
# Maps dataclass field names to Django ORM query field names
_DB_FIELD_MAP: dict[str, str] = {
"check_title": "check_metadata__checktitle",
}
@dataclass(slots=True)
class Finding:
"""
Finding data for Neo4j ingestion.
Can be created from a Django .values() query result using from_db_record().
"""
id: str
uid: str
inserted_at: str
updated_at: str
first_seen_at: str
scan_id: str
delta: str
status: str
status_extended: str
severity: str
check_id: str
check_title: str
muted: bool
muted_reason: str | None
resource_uid: str | None = None
@classmethod
def get_db_query_fields(cls) -> tuple[str, ...]:
"""Get field names for Django .values() query."""
return tuple(
_DB_FIELD_MAP.get(f.name, f.name)
for f in fields(cls)
if f.name != "resource_uid"
)
@classmethod
def from_db_record(cls, record: dict[str, Any], resource_uid: str) -> "Finding":
"""Create a Finding from a Django .values() query result."""
return cls(
id=str(record["id"]),
uid=record["uid"],
inserted_at=record["inserted_at"],
updated_at=record["updated_at"],
first_seen_at=record["first_seen_at"],
scan_id=str(record["scan_id"]),
delta=record["delta"],
status=record["status"],
status_extended=record["status_extended"],
severity=record["severity"],
check_id=str(record["check_id"]),
check_title=record["check_metadata__checktitle"],
muted=record["muted"],
muted_reason=record["muted_reason"],
resource_uid=resource_uid,
)
def to_dict(self) -> dict[str, Any]:
"""Convert to dict for Neo4j ingestion."""
return asdict(self)
# Public API
# ----------
def create_findings_indexes(neo4j_session: neo4j.Session) -> None:
"""Create indexes for Prowler findings and resource lookups."""
create_indexes(neo4j_session, IndexType.FINDINGS)
def analysis(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
scan_id: str,
config: CartographyConfig,
) -> None:
"""
Main entry point for Prowler findings analysis.
Adds resource labels, loads findings, and cleans up stale data.
"""
add_resource_label(
neo4j_session, prowler_api_provider.provider, str(prowler_api_provider.uid)
)
findings_data = stream_findings_with_resources(prowler_api_provider, scan_id)
load_findings(neo4j_session, findings_data, prowler_api_provider, config)
cleanup_findings(neo4j_session, prowler_api_provider, config)
def add_resource_label(
neo4j_session: neo4j.Session, provider_type: str, provider_uid: str
) -> int:
"""
Add a common resource label to all nodes connected to the provider account.
This enables index usage for resource lookups in the findings query,
since Cartography nodes don't have a common parent label.
Returns the total number of nodes labeled.
"""
query = render_cypher_template(
ADD_RESOURCE_LABEL_TEMPLATE,
{
"__ROOT_LABEL__": get_root_node_label(provider_type),
"__RESOURCE_LABEL__": get_provider_resource_label(provider_type),
},
)
logger.info(
f"Adding {get_provider_resource_label(provider_type)} label to all resources for {provider_uid}"
)
total_labeled = 0
labeled_count = 1
while labeled_count > 0:
result = neo4j_session.run(
query,
{"provider_uid": provider_uid, "batch_size": BATCH_SIZE},
)
labeled_count = result.single().get("labeled_count", 0)
total_labeled += labeled_count
if labeled_count > 0:
logger.info(
f"Labeled {total_labeled} nodes with {get_provider_resource_label(provider_type)}"
)
return total_labeled
def load_findings(
neo4j_session: neo4j.Session,
findings_batches: Generator[list[Finding], None, None],
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
"""Load Prowler findings into the graph, linking them to resources."""
query = render_cypher_template(
INSERT_FINDING_TEMPLATE,
{
"__ROOT_NODE_LABEL__": get_root_node_label(prowler_api_provider.provider),
"__NODE_UID_FIELD__": get_node_uid_field(prowler_api_provider.provider),
"__RESOURCE_LABEL__": get_provider_resource_label(
prowler_api_provider.provider
),
},
)
parameters = {
"provider_uid": str(prowler_api_provider.uid),
"last_updated": config.update_tag,
"prowler_version": ProwlerConfig.prowler_version,
}
batch_num = 0
total_records = 0
for batch in findings_batches:
batch_num += 1
batch_size = len(batch)
total_records += batch_size
parameters["findings_data"] = [f.to_dict() for f in batch]
logger.info(f"Loading findings batch {batch_num} ({batch_size} records)")
neo4j_session.run(query, parameters)
logger.info(f"Finished loading {total_records} records in {batch_num} batches")
def cleanup_findings(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
"""Remove stale findings (classic Cartography behaviour)."""
parameters = {
"provider_uid": str(prowler_api_provider.uid),
"last_updated": config.update_tag,
"batch_size": BATCH_SIZE,
}
batch = 1
deleted_count = 1
while deleted_count > 0:
logger.info(f"Cleaning findings batch {batch}")
result = neo4j_session.run(CLEANUP_FINDINGS_TEMPLATE, parameters)
deleted_count = result.single().get("deleted_findings_count", 0)
batch += 1
# Findings Streaming (Generator-based)
# -------------------------------------
def stream_findings_with_resources(
prowler_api_provider: Provider,
scan_id: str,
) -> Generator[list[Finding], None, None]:
"""
Stream findings with their associated resources in batches.
Uses keyset pagination for efficient traversal of large datasets.
Memory efficient: yields one batch at a time, never holds all findings in memory.
"""
logger.info(
f"Starting findings stream for scan {scan_id} "
f"(tenant {prowler_api_provider.tenant_id}) with batch size {BATCH_SIZE}"
)
tenant_id = prowler_api_provider.tenant_id
for batch in _paginate_findings(tenant_id, scan_id):
enriched = _enrich_batch_with_resources(batch, tenant_id)
if enriched:
yield enriched
logger.info(f"Finished streaming findings for scan {scan_id}")
def _paginate_findings(
tenant_id: str,
scan_id: str,
) -> Generator[list[dict[str, Any]], None, None]:
"""
Paginate through findings using keyset pagination.
Each iteration fetches one batch within its own RLS transaction,
preventing long-held database connections.
"""
last_id = None
iteration = 0
while True:
iteration += 1
batch = _fetch_findings_batch(tenant_id, scan_id, last_id)
logger.info(f"Iteration #{iteration}: fetched {len(batch)} findings")
if not batch:
break
last_id = batch[-1]["id"]
yield batch
def _fetch_findings_batch(
tenant_id: str,
scan_id: str,
after_id: UUID | None,
) -> list[dict[str, Any]]:
"""
Fetch a single batch of findings from the database.
Uses read replica and RLS-scoped transaction.
"""
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
# Use all_objects to avoid the ActiveProviderManager's implicit JOIN
# through Scan -> Provider (to check is_deleted=False).
# The provider is already validated as active in this context.
qs = FindingModel.all_objects.filter(scan_id=scan_id).order_by("id")
if after_id is not None:
qs = qs.filter(id__gt=after_id)
return list(qs.values(*Finding.get_db_query_fields())[:BATCH_SIZE])
# Batch Enrichment
# -----------------
def _enrich_batch_with_resources(
findings_batch: list[dict[str, Any]],
tenant_id: str,
) -> list[Finding]:
"""
Enrich findings with their resource UIDs.
One finding with N resources becomes N output records.
Findings without resources are skipped.
"""
finding_ids = [f["id"] for f in findings_batch]
resource_map = _build_finding_resource_map(finding_ids, tenant_id)
return [
Finding.from_db_record(finding, resource_uid)
for finding in findings_batch
for resource_uid in resource_map.get(finding["id"], [])
]
def _build_finding_resource_map(
finding_ids: list[UUID], tenant_id: str
) -> dict[UUID, list[str]]:
"""Build mapping from finding_id to list of resource UIDs."""
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
resource_mappings = ResourceFindingMapping.objects.filter(
finding_id__in=finding_ids
).values_list("finding_id", "resource__uid")
result = defaultdict(list)
for finding_id, resource_uid in resource_mappings:
result[finding_id].append(resource_uid)
return result

View File

@@ -0,0 +1,64 @@
from enum import Enum
import neo4j
from cartography.client.core.tx import run_write_query
from celery.utils.log import get_task_logger
from tasks.jobs.attack_paths.config import (
PROWLER_FINDING_LABEL,
PROVIDER_RESOURCE_LABEL,
)
logger = get_task_logger(__name__)
class IndexType(Enum):
"""Types of indexes that can be created."""
FINDINGS = "findings"
SYNC = "sync"
# Indexes for Prowler findings and resource lookups
FINDINGS_INDEX_STATEMENTS = [
# Resources indexes for quick Prowler Finding lookups
"CREATE INDEX aws_resource_arn IF NOT EXISTS FOR (n:AWSResource) ON (n.arn);",
"CREATE INDEX aws_resource_id IF NOT EXISTS FOR (n:AWSResource) ON (n.id);",
# Prowler Finding indexes
f"CREATE INDEX prowler_finding_id IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.id);",
f"CREATE INDEX prowler_finding_provider_uid IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.provider_uid);",
f"CREATE INDEX prowler_finding_lastupdated IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.lastupdated);",
f"CREATE INDEX prowler_finding_status IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.status);",
]
# Indexes for provider resource sync operations
SYNC_INDEX_STATEMENTS = [
f"CREATE INDEX provider_element_id IF NOT EXISTS FOR (n:{PROVIDER_RESOURCE_LABEL}) ON (n.provider_element_id);",
f"CREATE INDEX provider_resource_provider_id IF NOT EXISTS FOR (n:{PROVIDER_RESOURCE_LABEL}) ON (n.provider_id);",
]
def create_indexes(neo4j_session: neo4j.Session, index_type: IndexType) -> None:
"""
Create indexes for the specified type.
Args:
`neo4j_session`: The Neo4j session to use
`index_type`: The type of indexes to create (FINDINGS or SYNC)
"""
if index_type == IndexType.FINDINGS:
logger.info("Creating indexes for Prowler Findings node types")
for statement in FINDINGS_INDEX_STATEMENTS:
run_write_query(neo4j_session, statement)
elif index_type == IndexType.SYNC:
logger.info("Ensuring ProviderResource indexes exist")
for statement in SYNC_INDEX_STATEMENTS:
neo4j_session.run(statement)
def create_all_indexes(neo4j_session: neo4j.Session) -> None:
"""Create all indexes (both findings and sync)."""
create_indexes(neo4j_session, IndexType.FINDINGS)
create_indexes(neo4j_session, IndexType.SYNC)

View File

@@ -1,23 +0,0 @@
AVAILABLE_PROVIDERS: list[str] = [
"aws",
]
ROOT_NODE_LABELS: dict[str, str] = {
"aws": "AWSAccount",
}
NODE_UID_FIELDS: dict[str, str] = {
"aws": "arn",
}
def is_provider_available(provider_type: str) -> bool:
return provider_type in AVAILABLE_PROVIDERS
def get_root_node_label(provider_type: str) -> str:
return ROOT_NODE_LABELS.get(provider_type, "UnknownProviderAccount")
def get_node_uid_field(provider_type: str) -> str:
return NODE_UID_FIELDS.get(provider_type, "UnknownProviderUID")

View File

@@ -1,290 +0,0 @@
from collections import defaultdict
from typing import Generator
import neo4j
from cartography.client.core.tx import run_write_query
from cartography.config import Config as CartographyConfig
from celery.utils.log import get_task_logger
from config.env import env
from tasks.jobs.attack_paths.providers import get_node_uid_field, get_root_node_label
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Finding, Provider, ResourceFindingMapping
from prowler.config import config as ProwlerConfig
logger = get_task_logger(__name__)
BATCH_SIZE = env.int("ATTACK_PATHS_FINDINGS_BATCH_SIZE", 1000)
INDEX_STATEMENTS = [
"CREATE INDEX prowler_finding_id IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.id);",
"CREATE INDEX prowler_finding_provider_uid IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.provider_uid);",
"CREATE INDEX prowler_finding_lastupdated IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.lastupdated);",
"CREATE INDEX prowler_finding_check_id IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.status);",
]
INSERT_STATEMENT_TEMPLATE = """
MATCH (account:__ROOT_NODE_LABEL__ {id: $provider_uid})
UNWIND $findings_data AS finding_data
OPTIONAL MATCH (account)-->(resource_by_uid)
WHERE resource_by_uid.__NODE_UID_FIELD__ = finding_data.resource_uid
WITH account, finding_data, resource_by_uid
OPTIONAL MATCH (account)-->(resource_by_id)
WHERE resource_by_uid IS NULL
AND resource_by_id.id = finding_data.resource_uid
WITH account, finding_data, COALESCE(resource_by_uid, resource_by_id) AS resource
WHERE resource IS NOT NULL
MERGE (finding:ProwlerFinding {id: finding_data.id})
ON CREATE SET
finding.id = finding_data.id,
finding.uid = finding_data.uid,
finding.inserted_at = finding_data.inserted_at,
finding.updated_at = finding_data.updated_at,
finding.first_seen_at = finding_data.first_seen_at,
finding.scan_id = finding_data.scan_id,
finding.delta = finding_data.delta,
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.severity = finding_data.severity,
finding.check_id = finding_data.check_id,
finding.check_title = finding_data.check_title,
finding.muted = finding_data.muted,
finding.muted_reason = finding_data.muted_reason,
finding.provider_uid = $provider_uid,
finding.firstseen = timestamp(),
finding.lastupdated = $last_updated,
finding._module_name = 'cartography:prowler',
finding._module_version = $prowler_version
ON MATCH SET
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.lastupdated = $last_updated
MERGE (resource)-[rel:HAS_FINDING]->(finding)
ON CREATE SET
rel.provider_uid = $provider_uid,
rel.firstseen = timestamp(),
rel.lastupdated = $last_updated,
rel._module_name = 'cartography:prowler',
rel._module_version = $prowler_version
ON MATCH SET
rel.lastupdated = $last_updated
"""
CLEANUP_STATEMENT = """
MATCH (finding:ProwlerFinding {provider_uid: $provider_uid})
WHERE finding.lastupdated < $last_updated
WITH finding LIMIT $batch_size
DETACH DELETE finding
RETURN COUNT(finding) AS deleted_findings_count
"""
def create_indexes(neo4j_session: neo4j.Session) -> None:
"""
Code based on Cartography version 0.122.0, specifically on `cartography.intel.create_indexes.run`.
"""
logger.info("Creating indexes for Prowler Findings node types")
for statement in INDEX_STATEMENTS:
run_write_query(neo4j_session, statement)
def analysis(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
scan_id: str,
config: CartographyConfig,
) -> None:
findings_data = get_provider_last_scan_findings(prowler_api_provider, scan_id)
load_findings(neo4j_session, findings_data, prowler_api_provider, config)
cleanup_findings(neo4j_session, prowler_api_provider, config)
def get_provider_last_scan_findings(
prowler_api_provider: Provider,
scan_id: str,
) -> Generator[list[dict[str, str]], None, None]:
"""
Generator that yields batches of finding-resource pairs.
Two-step query approach per batch:
1. Paginate findings for scan (single table, indexed by scan_id)
2. Batch-fetch resource UIDs via mapping table (single join)
3. Merge and yield flat structure for Neo4j
Memory efficient: never holds more than BATCH_SIZE findings in memory.
"""
logger.info(
f"Starting findings fetch for scan {scan_id} (tenant {prowler_api_provider.tenant_id}) with batch size {BATCH_SIZE}"
)
iteration = 0
last_id = None
while True:
iteration += 1
with rls_transaction(prowler_api_provider.tenant_id, using=READ_REPLICA_ALIAS):
# Use all_objects to avoid the ActiveProviderManager's implicit JOIN
# through Scan -> Provider (to check is_deleted=False).
# The provider is already validated as active in this context.
qs = Finding.all_objects.filter(scan_id=scan_id).order_by("id")
if last_id is not None:
qs = qs.filter(id__gt=last_id)
findings_batch = list(
qs.values(
"id",
"uid",
"inserted_at",
"updated_at",
"first_seen_at",
"scan_id",
"delta",
"status",
"status_extended",
"severity",
"check_id",
"check_metadata__checktitle",
"muted",
"muted_reason",
)[:BATCH_SIZE]
)
logger.info(
f"Iteration #{iteration} fetched {len(findings_batch)} findings"
)
if not findings_batch:
logger.info(
f"No findings returned for iteration #{iteration}; stopping pagination"
)
break
last_id = findings_batch[-1]["id"]
enriched_batch = _enrich_and_flatten_batch(findings_batch)
# Yield outside the transaction
if enriched_batch:
yield enriched_batch
logger.info(f"Finished fetching findings for scan {scan_id}")
def _enrich_and_flatten_batch(
findings_batch: list[dict],
) -> list[dict[str, str]]:
"""
Fetch resource UIDs for a batch of findings and return flat structure.
One finding with 3 resources becomes 3 dicts (same output format as before).
Must be called within an RLS transaction context.
"""
finding_ids = [f["id"] for f in findings_batch]
# Single join: mapping -> resource
resource_mappings = ResourceFindingMapping.objects.filter(
finding_id__in=finding_ids
).values_list("finding_id", "resource__uid")
# Build finding_id -> [resource_uids] mapping
finding_resources = defaultdict(list)
for finding_id, resource_uid in resource_mappings:
finding_resources[finding_id].append(resource_uid)
# Flatten: one dict per (finding, resource) pair
results = []
for f in findings_batch:
resource_uids = finding_resources.get(f["id"], [])
if not resource_uids:
continue
for resource_uid in resource_uids:
results.append(
{
"resource_uid": str(resource_uid),
"id": str(f["id"]),
"uid": f["uid"],
"inserted_at": f["inserted_at"],
"updated_at": f["updated_at"],
"first_seen_at": f["first_seen_at"],
"scan_id": str(f["scan_id"]),
"delta": f["delta"],
"status": f["status"],
"status_extended": f["status_extended"],
"severity": f["severity"],
"check_id": str(f["check_id"]),
"check_title": f["check_metadata__checktitle"],
"muted": f["muted"],
"muted_reason": f["muted_reason"],
}
)
return results
def load_findings(
neo4j_session: neo4j.Session,
findings_batches: Generator[list[dict[str, str]], None, None],
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
replacements = {
"__ROOT_NODE_LABEL__": get_root_node_label(prowler_api_provider.provider),
"__NODE_UID_FIELD__": get_node_uid_field(prowler_api_provider.provider),
}
query = INSERT_STATEMENT_TEMPLATE
for replace_key, replace_value in replacements.items():
query = query.replace(replace_key, replace_value)
parameters = {
"provider_uid": str(prowler_api_provider.uid),
"last_updated": config.update_tag,
"prowler_version": ProwlerConfig.prowler_version,
}
batch_num = 0
total_records = 0
for batch in findings_batches:
batch_num += 1
batch_size = len(batch)
total_records += batch_size
parameters["findings_data"] = batch
logger.info(f"Loading findings batch {batch_num} ({batch_size} records)")
neo4j_session.run(query, parameters)
logger.info(f"Finished loading {total_records} records in {batch_num} batches")
def cleanup_findings(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
parameters = {
"provider_uid": str(prowler_api_provider.uid),
"last_updated": config.update_tag,
"batch_size": BATCH_SIZE,
}
batch = 1
deleted_count = 1
while deleted_count > 0:
logger.info(f"Cleaning findings batch {batch}")
result = neo4j_session.run(CLEANUP_STATEMENT, parameters)
deleted_count = result.single().get("deleted_findings_count", 0)
batch += 1

View File

@@ -0,0 +1,134 @@
# Cypher query templates for Attack Paths operations
from tasks.jobs.attack_paths.config import (
PROWLER_FINDING_LABEL,
PROVIDER_RESOURCE_LABEL,
)
def render_cypher_template(template: str, replacements: dict[str, str]) -> str:
"""
Render a Cypher query template by replacing placeholders.
Placeholders use `__DOUBLE_UNDERSCORE__` format to avoid conflicts
with Cypher syntax.
"""
query = template
for placeholder, value in replacements.items():
query = query.replace(placeholder, value)
return query
# Findings queries (used by findings.py)
# ---------------------------------------
ADD_RESOURCE_LABEL_TEMPLATE = """
MATCH (account:__ROOT_LABEL__ {id: $provider_uid})-->(r)
WHERE NOT r:__ROOT_LABEL__ AND NOT r:__RESOURCE_LABEL__
WITH r LIMIT $batch_size
SET r:__RESOURCE_LABEL__
RETURN COUNT(r) AS labeled_count
"""
INSERT_FINDING_TEMPLATE = f"""
MATCH (account:__ROOT_NODE_LABEL__ {{id: $provider_uid}})
UNWIND $findings_data AS finding_data
OPTIONAL MATCH (account)-->(resource_by_uid:__RESOURCE_LABEL__)
WHERE resource_by_uid.__NODE_UID_FIELD__ = finding_data.resource_uid
WITH account, finding_data, resource_by_uid
OPTIONAL MATCH (account)-->(resource_by_id:__RESOURCE_LABEL__)
WHERE resource_by_uid IS NULL
AND resource_by_id.id = finding_data.resource_uid
WITH account, finding_data, COALESCE(resource_by_uid, resource_by_id) AS resource
WHERE resource IS NOT NULL
MERGE (finding:{PROWLER_FINDING_LABEL} {{id: finding_data.id}})
ON CREATE SET
finding.id = finding_data.id,
finding.uid = finding_data.uid,
finding.inserted_at = finding_data.inserted_at,
finding.updated_at = finding_data.updated_at,
finding.first_seen_at = finding_data.first_seen_at,
finding.scan_id = finding_data.scan_id,
finding.delta = finding_data.delta,
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.severity = finding_data.severity,
finding.check_id = finding_data.check_id,
finding.check_title = finding_data.check_title,
finding.muted = finding_data.muted,
finding.muted_reason = finding_data.muted_reason,
finding.provider_uid = $provider_uid,
finding.firstseen = timestamp(),
finding.lastupdated = $last_updated,
finding._module_name = 'cartography:prowler',
finding._module_version = $prowler_version
ON MATCH SET
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.lastupdated = $last_updated
MERGE (resource)-[rel:HAS_FINDING]->(finding)
ON CREATE SET
rel.provider_uid = $provider_uid,
rel.firstseen = timestamp(),
rel.lastupdated = $last_updated,
rel._module_name = 'cartography:prowler',
rel._module_version = $prowler_version
ON MATCH SET
rel.lastupdated = $last_updated
"""
CLEANUP_FINDINGS_TEMPLATE = f"""
MATCH (finding:{PROWLER_FINDING_LABEL} {{provider_uid: $provider_uid}})
WHERE finding.lastupdated < $last_updated
WITH finding LIMIT $batch_size
DETACH DELETE finding
RETURN COUNT(finding) AS deleted_findings_count
"""
# Sync queries (used by sync.py)
# -------------------------------
NODE_FETCH_QUERY = """
MATCH (n)
WHERE id(n) > $last_id
RETURN id(n) AS internal_id,
elementId(n) AS element_id,
labels(n) AS labels,
properties(n) AS props
ORDER BY internal_id
LIMIT $batch_size
"""
RELATIONSHIPS_FETCH_QUERY = """
MATCH ()-[r]->()
WHERE id(r) > $last_id
RETURN id(r) AS internal_id,
type(r) AS rel_type,
elementId(startNode(r)) AS start_element_id,
elementId(endNode(r)) AS end_element_id,
properties(r) AS props
ORDER BY internal_id
LIMIT $batch_size
"""
NODE_SYNC_TEMPLATE = """
UNWIND $rows AS row
MERGE (n:__NODE_LABELS__ {provider_element_id: row.provider_element_id})
SET n += row.props
SET n.provider_id = $provider_id
"""
RELATIONSHIP_SYNC_TEMPLATE = f"""
UNWIND $rows AS row
MATCH (s:{PROVIDER_RESOURCE_LABEL} {{provider_element_id: row.start_element_id}})
MATCH (t:{PROVIDER_RESOURCE_LABEL} {{provider_element_id: row.end_element_id}})
MERGE (s)-[r:__REL_TYPE__ {{provider_element_id: row.provider_element_id}}]->(t)
SET r += row.props
SET r.provider_id = $provider_id
"""

View File

@@ -1,8 +1,7 @@
import logging
import time
import asyncio
from typing import Any, Callable
from typing import Any
from cartography.config import Config as CartographyConfig
from cartography.intel import analysis as cartography_analysis
@@ -17,7 +16,8 @@ from api.models import (
StateChoices,
)
from api.utils import initialize_prowler_provider
from tasks.jobs.attack_paths import aws, db_utils, prowler, utils
from tasks.jobs.attack_paths import db_utils, findings, sync, utils
from tasks.jobs.attack_paths.config import get_cartography_ingestion_function
# Without this Celery goes crazy with Cartography logging
logging.getLogger("cartography").setLevel(logging.ERROR)
@@ -25,18 +25,10 @@ logging.getLogger("neo4j").propagate = False
logger = get_task_logger(__name__)
CARTOGRAPHY_INGESTION_FUNCTIONS: dict[str, Callable] = {
"aws": aws.start_aws_ingestion,
}
def get_cartography_ingestion_function(provider_type: str) -> Callable | None:
return CARTOGRAPHY_INGESTION_FUNCTIONS.get(provider_type)
def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
"""
Code based on Cartography version 0.122.0, specifically on `cartography.cli.main`, `cartography.cli.CLI.main`,
Code based on Cartography, specifically on `cartography.cli.main`, `cartography.cli.CLI.main`,
`cartography.sync.run_with_config` and `cartography.sync.Sync.run`.
"""
ingestion_exceptions = {} # This will hold any exceptions raised during ingestion
@@ -76,22 +68,36 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
tenant_id, scan_id, prowler_api_provider.id
)
tmp_database_name = graph_database.get_database_name(
attack_paths_scan.id, temporary=True
)
tenant_database_name = graph_database.get_database_name(
prowler_api_provider.tenant_id
)
# While creating the Cartography configuration, attributes `neo4j_user` and `neo4j_password` are not really needed in this config object
cartography_config = CartographyConfig(
tmp_cartography_config = CartographyConfig(
neo4j_uri=graph_database.get_uri(),
neo4j_database=graph_database.get_database_name(attack_paths_scan.id),
neo4j_database=tmp_database_name,
update_tag=int(time.time()),
)
tenant_cartography_config = CartographyConfig(
neo4j_uri=tmp_cartography_config.neo4j_uri,
neo4j_database=tenant_database_name,
update_tag=tmp_cartography_config.update_tag,
)
# Starting the Attack Paths scan
db_utils.starting_attack_paths_scan(attack_paths_scan, task_id, cartography_config)
db_utils.starting_attack_paths_scan(
attack_paths_scan, task_id, tenant_cartography_config
)
try:
logger.info(
f"Creating Neo4j database {cartography_config.neo4j_database} for tenant {prowler_api_provider.tenant_id}"
f"Creating Neo4j database {tmp_cartography_config.neo4j_database} for tenant {prowler_api_provider.tenant_id}"
)
graph_database.create_database(cartography_config.neo4j_database)
graph_database.create_database(tmp_cartography_config.neo4j_database)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 1)
logger.info(
@@ -99,18 +105,18 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
with graph_database.get_session(
cartography_config.neo4j_database
) as neo4j_session:
tmp_cartography_config.neo4j_database
) as tmp_neo4j_session:
# Indexes creation
cartography_create_indexes.run(neo4j_session, cartography_config)
prowler.create_indexes(neo4j_session)
cartography_create_indexes.run(tmp_neo4j_session, tmp_cartography_config)
findings.create_findings_indexes(tmp_neo4j_session)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 2)
# The real scan, where iterates over cloud services
ingestion_exceptions = _call_within_event_loop(
ingestion_exceptions = utils.call_within_event_loop(
cartography_ingestion_function,
neo4j_session,
cartography_config,
tmp_neo4j_session,
tmp_cartography_config,
prowler_api_provider,
prowler_sdk_provider,
attack_paths_scan,
@@ -120,43 +126,92 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
logger.info(
f"Syncing Cartography ontology for AWS account {prowler_api_provider.uid}"
)
cartography_ontology.run(neo4j_session, cartography_config)
cartography_ontology.run(tmp_neo4j_session, tmp_cartography_config)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 95)
logger.info(
f"Syncing Cartography analysis for AWS account {prowler_api_provider.uid}"
)
cartography_analysis.run(neo4j_session, cartography_config)
cartography_analysis.run(tmp_neo4j_session, tmp_cartography_config)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 96)
# Adding Prowler nodes and relationships
logger.info(
f"Syncing Prowler analysis for AWS account {prowler_api_provider.uid}"
)
prowler.analysis(
neo4j_session, prowler_api_provider, scan_id, cartography_config
findings.analysis(
tmp_neo4j_session, prowler_api_provider, scan_id, tmp_cartography_config
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 97)
logger.info(
f"Clearing Neo4j cache for database {cartography_config.neo4j_database}"
f"Clearing Neo4j cache for database {tmp_cartography_config.neo4j_database}"
)
graph_database.clear_cache(cartography_config.neo4j_database)
graph_database.clear_cache(tmp_cartography_config.neo4j_database)
logger.info(
f"Ensuring tenant database {tenant_database_name}, and its indexes, exists for tenant {prowler_api_provider.tenant_id}"
)
graph_database.create_database(tenant_database_name)
with graph_database.get_session(tenant_database_name) as tenant_neo4j_session:
cartography_create_indexes.run(
tenant_neo4j_session, tenant_cartography_config
)
findings.create_findings_indexes(tenant_neo4j_session)
sync.create_sync_indexes(tenant_neo4j_session)
logger.info(f"Deleting existing provider graph in {tenant_database_name}")
graph_database.drop_subgraph(
database=tenant_database_name,
provider_id=str(prowler_api_provider.id),
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 98)
logger.info(
f"Syncing graph from {tmp_database_name} into {tenant_database_name}"
)
sync.sync_graph(
source_database=tmp_database_name,
target_database=tenant_database_name,
provider_id=str(prowler_api_provider.id),
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 99)
logger.info(f"Clearing Neo4j cache for database {tenant_database_name}")
graph_database.clear_cache(tenant_database_name)
logger.info(
f"Completed Cartography ({attack_paths_scan.id}) for "
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
# Handling databases changes
# TODO
# This piece of code delete old Neo4j databases for this tenant's provider
# When we clean all of these databases we need to:
# - Delete this block
# - Delete function from `db_utils` the functions get_old_attack_paths_scans` & `update_old_attack_paths_scan`
# - Remove `graph_database` & `is_graph_database_deleted` from the AttackPathsScan model:
# - Check indexes
# - Create migration
# - The use of `attack_paths_scan.graph_database` on `views` and `views_helpers`
# - Tests
old_attack_paths_scans = db_utils.get_old_attack_paths_scans(
prowler_api_provider.tenant_id,
prowler_api_provider.id,
attack_paths_scan.id,
)
for old_attack_paths_scan in old_attack_paths_scans:
graph_database.drop_database(old_attack_paths_scan.graph_database)
old_graph_database = old_attack_paths_scan.graph_database
if old_graph_database and old_graph_database != tenant_database_name:
logger.info(
f"Dropping old Neo4j database {old_graph_database} for provider {prowler_api_provider.id}"
)
graph_database.drop_database(old_graph_database)
db_utils.update_old_attack_paths_scan(old_attack_paths_scan)
logger.info(f"Dropping temporary Neo4j database {tmp_database_name}")
graph_database.drop_database(tmp_database_name)
db_utils.finish_attack_paths_scan(
attack_paths_scan, StateChoices.COMPLETED, ingestion_exceptions
)
@@ -168,30 +223,8 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
ingestion_exceptions["global_cartography_error"] = exception_message
# Handling databases changes
graph_database.drop_database(cartography_config.neo4j_database)
graph_database.drop_database(tmp_cartography_config.neo4j_database)
db_utils.finish_attack_paths_scan(
attack_paths_scan, StateChoices.FAILED, ingestion_exceptions
)
raise
def _call_within_event_loop(fn, *args, **kwargs):
"""
Cartography needs a running event loop, so assuming there is none (Celery task or even regular DRF endpoint),
let's create a new one and set it as the current event loop for this thread.
"""
loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(loop)
return fn(*args, **kwargs)
finally:
try:
loop.run_until_complete(loop.shutdown_asyncgens())
except Exception as e:
logger.warning(f"Failed to shutdown async generators cleanly: {e}")
loop.close()
asyncio.set_event_loop(None)

View File

@@ -0,0 +1,202 @@
"""
Graph sync operations for Attack Paths.
This module handles syncing graph data from temporary scan databases
to the tenant database, adding provider isolation labels and properties.
"""
from collections import defaultdict
from typing import Any
from celery.utils.log import get_task_logger
from api.attack_paths import database as graph_database
from tasks.jobs.attack_paths.config import BATCH_SIZE, PROVIDER_RESOURCE_LABEL
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
from tasks.jobs.attack_paths.queries import (
NODE_FETCH_QUERY,
NODE_SYNC_TEMPLATE,
RELATIONSHIP_SYNC_TEMPLATE,
RELATIONSHIPS_FETCH_QUERY,
render_cypher_template,
)
logger = get_task_logger(__name__)
def create_sync_indexes(neo4j_session) -> None:
"""Create indexes for provider resource sync operations."""
create_indexes(neo4j_session, IndexType.SYNC)
def sync_graph(
source_database: str,
target_database: str,
provider_id: str,
) -> dict[str, int]:
"""
Sync all nodes and relationships from source to target database.
Args:
`source_database`: The temporary scan database
`target_database`: The tenant database
`provider_id`: The provider ID for isolation
Returns:
Dict with counts of synced nodes and relationships
"""
nodes_synced = sync_nodes(
source_database,
target_database,
provider_id,
)
relationships_synced = sync_relationships(
source_database,
target_database,
provider_id,
)
return {
"nodes": nodes_synced,
"relationships": relationships_synced,
}
def sync_nodes(
source_database: str,
target_database: str,
provider_id: str,
) -> int:
"""
Sync nodes from source to target database.
Adds `ProviderResource` label and `provider_id` property to all nodes.
"""
last_id = -1
total_synced = 0
with (
graph_database.get_session(source_database) as source_session,
graph_database.get_session(target_database) as target_session,
):
while True:
rows = list(
source_session.run(
NODE_FETCH_QUERY,
{"last_id": last_id, "batch_size": BATCH_SIZE},
)
)
if not rows:
break
last_id = rows[-1]["internal_id"]
grouped: dict[tuple[str, ...], list[dict[str, Any]]] = defaultdict(list)
for row in rows:
labels = tuple(sorted(set(row["labels"] or [])))
props = dict(row["props"] or {})
_strip_internal_properties(props)
provider_element_id = f"{provider_id}:{row['element_id']}"
grouped[labels].append(
{
"provider_element_id": provider_element_id,
"props": props,
}
)
for labels, batch in grouped.items():
label_set = set(labels)
label_set.add(PROVIDER_RESOURCE_LABEL)
node_labels = ":".join(f"`{label}`" for label in sorted(label_set))
query = render_cypher_template(
NODE_SYNC_TEMPLATE, {"__NODE_LABELS__": node_labels}
)
target_session.run(
query,
{
"rows": batch,
"provider_id": provider_id,
},
)
total_synced += len(rows)
logger.info(
f"Synced {total_synced} nodes from {source_database} to {target_database}"
)
return total_synced
def sync_relationships(
source_database: str,
target_database: str,
provider_id: str,
) -> int:
"""
Sync relationships from source to target database.
Adds `provider_id` property to all relationships.
"""
last_id = -1
total_synced = 0
with (
graph_database.get_session(source_database) as source_session,
graph_database.get_session(target_database) as target_session,
):
while True:
rows = list(
source_session.run(
RELATIONSHIPS_FETCH_QUERY,
{"last_id": last_id, "batch_size": BATCH_SIZE},
)
)
if not rows:
break
last_id = rows[-1]["internal_id"]
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
for row in rows:
props = dict(row["props"] or {})
_strip_internal_properties(props)
rel_type = row["rel_type"]
grouped[rel_type].append(
{
"start_element_id": f"{provider_id}:{row['start_element_id']}",
"end_element_id": f"{provider_id}:{row['end_element_id']}",
"provider_element_id": f"{provider_id}:{rel_type}:{row['internal_id']}",
"props": props,
}
)
for rel_type, batch in grouped.items():
query = render_cypher_template(
RELATIONSHIP_SYNC_TEMPLATE, {"__REL_TYPE__": rel_type}
)
target_session.run(
query,
{
"rows": batch,
"provider_id": provider_id,
},
)
total_synced += len(rows)
logger.info(
f"Synced {total_synced} relationships from {source_database} to {target_database}"
)
return total_synced
def _strip_internal_properties(props: dict[str, Any]) -> None:
"""Remove internal properties that shouldn't be copied during sync."""
for key in [
"provider_element_id",
"provider_id",
]:
props.pop(key, None)

View File

@@ -1,10 +1,40 @@
import asyncio
import traceback
from datetime import datetime, timezone
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
def stringify_exception(exception: Exception, context: str) -> str:
"""Format an exception with timestamp and traceback for logging."""
timestamp = datetime.now(tz=timezone.utc)
exception_traceback = traceback.TracebackException.from_exception(exception)
traceback_string = "".join(exception_traceback.format())
return f"{timestamp} - {context}\n{traceback_string}"
def call_within_event_loop(fn, *args, **kwargs):
"""
Execute a function within a new event loop.
Cartography needs a running event loop, so assuming there is none
(Celery task or even regular DRF endpoint), this creates a new one
and sets it as the current event loop for this thread.
"""
loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(loop)
return fn(*args, **kwargs)
finally:
try:
loop.run_until_complete(loop.shutdown_asyncgens())
except Exception as e:
logger.warning(f"Failed to shutdown async generators cleanly: {e}")
loop.close()
asyncio.set_event_loop(None)

View File

@@ -13,7 +13,6 @@ from api.models import (
ScanSummary,
Tenant,
)
from tasks.jobs.attack_paths.db_utils import get_provider_graph_database_names
logger = get_task_logger(__name__)
@@ -33,13 +32,13 @@ def delete_provider(tenant_id: str, pk: str):
Raises:
Provider.DoesNotExist: If no instance with the provided primary key exists.
"""
# Delete the Attack Paths' graph databases related to the provider
graph_database_names = get_provider_graph_database_names(tenant_id, pk)
# Delete the Attack Paths' graph data related to the provider
tenant_database_name = graph_database.get_database_name(tenant_id)
try:
for graph_database_name in graph_database_names:
graph_database.drop_database(graph_database_name)
graph_database.drop_subgraph(tenant_database_name, str(pk))
except graph_database.GraphDatabaseQueryException as gdb_error:
logger.error(f"Error deleting Provider databases: {gdb_error}")
logger.error(f"Error deleting Provider graph data: {gdb_error}")
raise
# Get all provider related data and delete them in batches
@@ -90,6 +89,13 @@ def delete_tenant(pk: str):
summary = delete_provider(pk, provider.id)
deletion_summary.update(summary)
try:
tenant_database_name = graph_database.get_database_name(pk)
graph_database.drop_database(tenant_database_name)
except graph_database.GraphDatabaseQueryException as gdb_error:
logger.error(f"Error dropping Tenant graph database: {gdb_error}")
raise
Tenant.objects.using(MainRouter.admin_db).filter(id=pk).delete()
return deletion_summary

View File

@@ -3,7 +3,7 @@ from types import SimpleNamespace
from unittest.mock import MagicMock, call, patch
import pytest
from tasks.jobs.attack_paths import prowler as prowler_module
from tasks.jobs.attack_paths import findings as findings_module
from tasks.jobs.attack_paths.scan import run as attack_paths_run
from api.models import (
@@ -21,7 +21,65 @@ from prowler.lib.check.models import Severity
@pytest.mark.django_db
class TestAttackPathsRun:
def test_run_success_flow(self, tenants_fixture, providers_fixture, scans_fixture):
# Patching with decorators as we got a `SyntaxError: too many statically nested blocks` error if we use context managers
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch(
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch(
"tasks.jobs.attack_paths.scan.db_utils.get_old_attack_paths_scans",
return_value=[],
)
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.sync.sync_graph")
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_subgraph")
@patch("tasks.jobs.attack_paths.scan.sync.create_sync_indexes")
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_ontology.run")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
)
@patch(
"tasks.jobs.attack_paths.scan.initialize_prowler_provider",
return_value=MagicMock(_enabled_regions=["us-east-1"]),
)
@patch(
"tasks.jobs.attack_paths.scan.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
)
def test_run_success_flow(
self,
mock_init_provider,
mock_get_uri,
mock_create_db,
mock_clear_cache,
mock_cartography_indexes,
mock_cartography_analysis,
mock_cartography_ontology,
mock_findings_indexes,
mock_findings_analysis,
mock_sync_indexes,
mock_drop_subgraph,
mock_sync,
mock_starting,
mock_update_progress,
mock_finish,
mock_get_old_scans,
mock_event_loop,
mock_drop_db,
tenants_fixture,
providers_fixture,
scans_fixture,
):
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
@@ -45,66 +103,22 @@ class TestAttackPathsRun:
ingestion_fn = MagicMock(return_value=ingestion_result)
with (
patch(
"tasks.jobs.attack_paths.scan.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
),
patch(
"tasks.jobs.attack_paths.scan.initialize_prowler_provider",
return_value=MagicMock(_enabled_regions=["us-east-1"]),
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
return_value="db-scan-id",
side_effect=["db-scan-id", "tenant-db"],
) as mock_get_db_name,
patch(
"tasks.jobs.attack_paths.scan.graph_database.create_database"
) as mock_create_db,
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
) as mock_get_session,
patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache"),
patch(
"tasks.jobs.attack_paths.scan.cartography_create_indexes.run"
) as mock_cartography_indexes,
patch(
"tasks.jobs.attack_paths.scan.cartography_analysis.run"
) as mock_cartography_analysis,
patch(
"tasks.jobs.attack_paths.scan.cartography_ontology.run"
) as mock_cartography_ontology,
patch(
"tasks.jobs.attack_paths.scan.prowler.create_indexes"
) as mock_prowler_indexes,
patch(
"tasks.jobs.attack_paths.scan.prowler.analysis"
) as mock_prowler_analysis,
patch(
"tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan",
return_value=attack_paths_scan,
) as mock_retrieve_scan,
patch(
"tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan"
) as mock_starting,
patch(
"tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress"
) as mock_update_progress,
patch(
"tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan"
) as mock_finish,
patch(
"tasks.jobs.attack_paths.scan.get_cartography_ingestion_function",
return_value=ingestion_fn,
) as mock_get_ingestion,
patch(
"tasks.jobs.attack_paths.scan._call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
) as mock_event_loop,
):
result = attack_paths_run(str(tenant.id), str(scan.id), "task-123")
@@ -112,29 +126,40 @@ class TestAttackPathsRun:
mock_retrieve_scan.assert_called_once_with(str(tenant.id), str(scan.id))
mock_starting.assert_called_once()
config = mock_starting.call_args[0][2]
assert config.neo4j_database == "db-scan-id"
assert config.neo4j_database == "tenant-db"
mock_get_db_name.assert_has_calls(
[call(attack_paths_scan.id, temporary=True), call(provider.tenant_id)]
)
mock_create_db.assert_called_once_with("db-scan-id")
mock_get_session.assert_called_once_with("db-scan-id")
mock_cartography_indexes.assert_called_once_with(mock_session, config)
mock_prowler_indexes.assert_called_once_with(mock_session)
mock_cartography_analysis.assert_called_once_with(mock_session, config)
mock_cartography_ontology.assert_called_once_with(mock_session, config)
mock_prowler_analysis.assert_called_once_with(
mock_session,
provider,
str(scan.id),
config,
mock_create_db.assert_has_calls([call("db-scan-id"), call("tenant-db")])
mock_get_session.assert_has_calls([call("db-scan-id"), call("tenant-db")])
assert mock_cartography_indexes.call_count == 2
mock_findings_indexes.assert_has_calls([call(mock_session), call(mock_session)])
mock_sync_indexes.assert_called_once_with(mock_session)
# These use tmp_cartography_config (neo4j_database="db-scan-id")
mock_cartography_analysis.assert_called_once()
mock_cartography_ontology.assert_called_once()
mock_findings_analysis.assert_called_once()
mock_drop_subgraph.assert_called_once_with(
database="tenant-db",
provider_id=str(provider.id),
)
mock_sync.assert_called_once_with(
source_database="db-scan-id",
target_database="tenant-db",
provider_id=str(provider.id),
)
mock_get_ingestion.assert_called_once_with(provider.provider)
mock_event_loop.assert_called_once()
mock_update_progress.assert_any_call(attack_paths_scan, 1)
mock_update_progress.assert_any_call(attack_paths_scan, 2)
mock_update_progress.assert_any_call(attack_paths_scan, 95)
mock_update_progress.assert_any_call(attack_paths_scan, 97)
mock_update_progress.assert_any_call(attack_paths_scan, 98)
mock_update_progress.assert_any_call(attack_paths_scan, 99)
mock_finish.assert_called_once_with(
attack_paths_scan, StateChoices.COMPLETED, ingestion_result
)
mock_get_db_name.assert_called_once_with(attack_paths_scan.id)
def test_run_failure_marks_scan_failed(
self, tenants_fixture, providers_fixture, scans_fixture
@@ -181,8 +206,8 @@ class TestAttackPathsRun:
),
patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run"),
patch("tasks.jobs.attack_paths.scan.cartography_analysis.run"),
patch("tasks.jobs.attack_paths.scan.prowler.create_indexes"),
patch("tasks.jobs.attack_paths.scan.prowler.analysis"),
patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes"),
patch("tasks.jobs.attack_paths.scan.findings.analysis"),
patch(
"tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan",
return_value=attack_paths_scan,
@@ -194,12 +219,13 @@ class TestAttackPathsRun:
patch(
"tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan"
) as mock_finish,
patch("tasks.jobs.attack_paths.scan.graph_database.drop_database"),
patch(
"tasks.jobs.attack_paths.scan.get_cartography_ingestion_function",
return_value=ingestion_fn,
),
patch(
"tasks.jobs.attack_paths.scan._call_within_event_loop",
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
),
patch(
@@ -261,15 +287,17 @@ class TestAttackPathsRun:
@pytest.mark.django_db
class TestAttackPathsProwlerHelpers:
def test_create_indexes_executes_all_statements(self):
class TestAttackPathsFindingsHelpers:
def test_create_findings_indexes_executes_all_statements(self):
mock_session = MagicMock()
with patch("tasks.jobs.attack_paths.prowler.run_write_query") as mock_run_write:
prowler_module.create_indexes(mock_session)
with patch("tasks.jobs.attack_paths.indexes.run_write_query") as mock_run_write:
findings_module.create_findings_indexes(mock_session)
assert mock_run_write.call_count == len(prowler_module.INDEX_STATEMENTS)
from tasks.jobs.attack_paths.indexes import FINDINGS_INDEX_STATEMENTS
assert mock_run_write.call_count == len(FINDINGS_INDEX_STATEMENTS)
mock_run_write.assert_has_calls(
[call(mock_session, stmt) for stmt in prowler_module.INDEX_STATEMENTS]
[call(mock_session, stmt) for stmt in FINDINGS_INDEX_STATEMENTS]
)
def test_load_findings_batches_requests(self, providers_fixture):
@@ -277,25 +305,35 @@ class TestAttackPathsProwlerHelpers:
provider.provider = Provider.ProviderChoices.AWS
provider.save()
# Create a generator that yields two batches
# Create mock Finding objects with to_dict() method
mock_finding_1 = MagicMock()
mock_finding_1.to_dict.return_value = {"id": "1", "resource_uid": "r-1"}
mock_finding_2 = MagicMock()
mock_finding_2.to_dict.return_value = {"id": "2", "resource_uid": "r-2"}
# Create a generator that yields two batches of Finding instances
def findings_generator():
yield [{"id": "1", "resource_uid": "r-1"}]
yield [{"id": "2", "resource_uid": "r-2"}]
yield [mock_finding_1]
yield [mock_finding_2]
config = SimpleNamespace(update_tag=12345)
mock_session = MagicMock()
with (
patch(
"tasks.jobs.attack_paths.prowler.get_root_node_label",
"tasks.jobs.attack_paths.findings.get_root_node_label",
return_value="AWSAccount",
),
patch(
"tasks.jobs.attack_paths.prowler.get_node_uid_field",
"tasks.jobs.attack_paths.findings.get_node_uid_field",
return_value="arn",
),
patch(
"tasks.jobs.attack_paths.findings.get_provider_resource_label",
return_value="AWSResource",
),
):
prowler_module.load_findings(
findings_module.load_findings(
mock_session, findings_generator(), provider, config
)
@@ -317,14 +355,14 @@ class TestAttackPathsProwlerHelpers:
second_batch.single.return_value = {"deleted_findings_count": 0}
mock_session.run.side_effect = [first_batch, second_batch]
prowler_module.cleanup_findings(mock_session, provider, config)
findings_module.cleanup_findings(mock_session, provider, config)
assert mock_session.run.call_count == 2
params = mock_session.run.call_args.args[1]
assert params["provider_uid"] == str(provider.uid)
assert params["last_updated"] == config.update_tag
def test_get_provider_last_scan_findings_returns_latest_scan_data(
def test_stream_findings_with_resources_returns_latest_scan_data(
self,
tenants_fixture,
providers_fixture,
@@ -402,15 +440,18 @@ class TestAttackPathsProwlerHelpers:
latest_scan.refresh_from_db()
with patch(
"tasks.jobs.attack_paths.prowler.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
), patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"default",
with (
patch(
"tasks.jobs.attack_paths.findings.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
),
patch(
"tasks.jobs.attack_paths.findings.READ_REPLICA_ALIAS",
"default",
),
):
# Generator yields batches, collect all findings from all batches
findings_batches = prowler_module.get_provider_last_scan_findings(
findings_batches = findings_module.stream_findings_with_resources(
provider,
str(latest_scan.id),
)
@@ -419,18 +460,18 @@ class TestAttackPathsProwlerHelpers:
findings_data.extend(batch)
assert len(findings_data) == 1
finding_dict = findings_data[0]
assert finding_dict["id"] == str(finding.id)
assert finding_dict["resource_uid"] == resource.uid
assert finding_dict["check_title"] == "Check title"
assert finding_dict["scan_id"] == str(latest_scan.id)
finding_result = findings_data[0]
assert finding_result.id == str(finding.id)
assert finding_result.resource_uid == resource.uid
assert finding_result.check_title == "Check title"
assert finding_result.scan_id == str(latest_scan.id)
def test_enrich_and_flatten_batch_single_resource(
def test_enrich_batch_with_resources_single_resource(
self,
tenants_fixture,
providers_fixture,
):
"""One finding + one resource = one output dict"""
"""One finding + one resource = one output Finding instance"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
@@ -493,25 +534,27 @@ class TestAttackPathsProwlerHelpers:
"muted_reason": finding.muted_reason,
}
# _enrich_and_flatten_batch queries ResourceFindingMapping directly
# _enrich_batch_with_resources queries ResourceFindingMapping directly
# No RLS mock needed - test DB doesn't enforce RLS policies
with patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"tasks.jobs.attack_paths.findings.READ_REPLICA_ALIAS",
"default",
):
result = prowler_module._enrich_and_flatten_batch([finding_dict])
result = findings_module._enrich_batch_with_resources(
[finding_dict], str(tenant.id)
)
assert len(result) == 1
assert result[0]["resource_uid"] == resource.uid
assert result[0]["id"] == str(finding.id)
assert result[0]["status"] == "FAIL"
assert result[0].resource_uid == resource.uid
assert result[0].id == str(finding.id)
assert result[0].status == "FAIL"
def test_enrich_and_flatten_batch_multiple_resources(
def test_enrich_batch_with_resources_multiple_resources(
self,
tenants_fixture,
providers_fixture,
):
"""One finding + three resources = three output dicts"""
"""One finding + three resources = three output Finding instances"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
@@ -579,24 +622,26 @@ class TestAttackPathsProwlerHelpers:
"muted_reason": finding.muted_reason,
}
# _enrich_and_flatten_batch queries ResourceFindingMapping directly
# _enrich_batch_with_resources queries ResourceFindingMapping directly
# No RLS mock needed - test DB doesn't enforce RLS policies
with patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"tasks.jobs.attack_paths.findings.READ_REPLICA_ALIAS",
"default",
):
result = prowler_module._enrich_and_flatten_batch([finding_dict])
result = findings_module._enrich_batch_with_resources(
[finding_dict], str(tenant.id)
)
assert len(result) == 3
result_resource_uids = {r["resource_uid"] for r in result}
result_resource_uids = {r.resource_uid for r in result}
assert result_resource_uids == {r.uid for r in resources}
# All should have same finding data
for r in result:
assert r["id"] == str(finding.id)
assert r["status"] == "FAIL"
assert r.id == str(finding.id)
assert r.status == "FAIL"
def test_enrich_and_flatten_batch_no_resources_skips(
def test_enrich_batch_with_resources_no_resources_skips(
self,
tenants_fixture,
providers_fixture,
@@ -652,12 +697,14 @@ class TestAttackPathsProwlerHelpers:
# Mock logger to verify no warning is emitted
with (
patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"tasks.jobs.attack_paths.findings.READ_REPLICA_ALIAS",
"default",
),
patch("tasks.jobs.attack_paths.prowler.logger") as mock_logger,
patch("tasks.jobs.attack_paths.findings.logger") as mock_logger,
):
result = prowler_module._enrich_and_flatten_batch([finding_dict])
result = findings_module._enrich_batch_with_resources(
[finding_dict], str(tenant.id)
)
assert len(result) == 0
mock_logger.warning.assert_not_called()
@@ -670,11 +717,11 @@ class TestAttackPathsProwlerHelpers:
scan_id = "some-scan-id"
with (
patch("tasks.jobs.attack_paths.prowler.rls_transaction") as mock_rls,
patch("tasks.jobs.attack_paths.prowler.Finding") as mock_finding,
patch("tasks.jobs.attack_paths.findings.rls_transaction") as mock_rls,
patch("tasks.jobs.attack_paths.findings.Finding") as mock_finding,
):
# Create generator but don't iterate
prowler_module.get_provider_last_scan_findings(provider, scan_id)
findings_module.stream_findings_with_resources(provider, scan_id)
# Nothing should be called yet
mock_rls.assert_not_called()
@@ -695,14 +742,18 @@ class TestAttackPathsProwlerHelpers:
with (
patch(
"tasks.jobs.attack_paths.prowler.get_root_node_label",
"tasks.jobs.attack_paths.findings.get_root_node_label",
return_value="AWSAccount",
),
patch(
"tasks.jobs.attack_paths.prowler.get_node_uid_field",
"tasks.jobs.attack_paths.findings.get_node_uid_field",
return_value="arn",
),
patch(
"tasks.jobs.attack_paths.findings.get_provider_resource_label",
return_value="AWSResource",
),
):
prowler_module.load_findings(mock_session, empty_gen(), provider, config)
findings_module.load_findings(mock_session, empty_gen(), provider, config)
mock_session.run.assert_not_called()

View File

@@ -11,14 +11,15 @@ from tasks.jobs.deletion import delete_provider, delete_tenant
@pytest.mark.django_db
class TestDeleteProvider:
def test_delete_provider_success(self, providers_fixture):
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
graph_db_names = ["graph-db-1", "graph-db-2"]
mock_get_provider_graph_database_names.return_value = graph_db_names
with (
patch(
"tasks.jobs.deletion.graph_database.get_database_name",
return_value="tenant-db",
) as mock_get_database_name,
patch(
"tasks.jobs.deletion.graph_database.drop_subgraph"
) as mock_drop_subgraph,
):
instance = providers_fixture[0]
tenant_id = str(instance.tenant_id)
result = delete_provider(tenant_id, instance.id)
@@ -27,33 +28,32 @@ class TestDeleteProvider:
with pytest.raises(ObjectDoesNotExist):
Provider.objects.get(pk=instance.id)
mock_get_provider_graph_database_names.assert_called_once_with(
tenant_id, instance.id
)
mock_drop_database.assert_has_calls(
[call(graph_db_name) for graph_db_name in graph_db_names]
mock_get_database_name.assert_called_once_with(tenant_id)
mock_drop_subgraph.assert_called_once_with(
"tenant-db",
str(instance.id),
)
def test_delete_provider_does_not_exist(self, tenants_fixture):
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
graph_db_names = ["graph-db-1"]
mock_get_provider_graph_database_names.return_value = graph_db_names
with (
patch(
"tasks.jobs.deletion.graph_database.get_database_name",
return_value="tenant-db",
) as mock_get_database_name,
patch(
"tasks.jobs.deletion.graph_database.drop_subgraph"
) as mock_drop_subgraph,
):
tenant_id = str(tenants_fixture[0].id)
non_existent_pk = "babf6796-cfcc-4fd3-9dcf-88d012247645"
with pytest.raises(ObjectDoesNotExist):
delete_provider(tenant_id, non_existent_pk)
mock_get_provider_graph_database_names.assert_called_once_with(
tenant_id, non_existent_pk
)
mock_drop_database.assert_has_calls(
[call(graph_db_name) for graph_db_name in graph_db_names]
mock_get_database_name.assert_called_once_with(tenant_id)
mock_drop_subgraph.assert_called_once_with(
"tenant-db",
non_existent_pk,
)
@@ -63,21 +63,21 @@ class TestDeleteTenant:
"""
Test successful deletion of a tenant and its related data.
"""
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
with (
patch(
"tasks.jobs.deletion.graph_database.get_database_name",
return_value="tenant-db",
) as mock_get_database_name,
patch(
"tasks.jobs.deletion.graph_database.drop_subgraph"
) as mock_drop_subgraph,
patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database,
):
tenant = tenants_fixture[0]
providers = list(Provider.objects.filter(tenant_id=tenant.id))
graph_db_names_per_provider = [
[f"graph-db-{provider.id}"] for provider in providers
]
mock_get_provider_graph_database_names.side_effect = (
graph_db_names_per_provider
)
# Ensure the tenant and related providers exist before deletion
assert Tenant.objects.filter(id=tenant.id).exists()
assert providers
@@ -89,30 +89,42 @@ class TestDeleteTenant:
assert not Tenant.objects.filter(id=tenant.id).exists()
assert not Provider.objects.filter(tenant_id=tenant.id).exists()
expected_calls = [
call(provider.tenant_id, provider.id) for provider in providers
# get_database_name is called once per provider + once for drop_database
expected_get_db_calls = [call(tenant.id) for _ in providers] + [
call(tenant.id)
]
mock_get_provider_graph_database_names.assert_has_calls(
expected_calls, any_order=True
mock_get_database_name.assert_has_calls(
expected_get_db_calls, any_order=True
)
assert mock_get_provider_graph_database_names.call_count == len(
expected_calls
)
expected_drop_calls = [
call(graph_db_name[0]) for graph_db_name in graph_db_names_per_provider
assert mock_get_database_name.call_count == len(expected_get_db_calls)
expected_drop_subgraph_calls = [
call("tenant-db", str(provider.id)) for provider in providers
]
mock_drop_database.assert_has_calls(expected_drop_calls, any_order=True)
assert mock_drop_database.call_count == len(expected_drop_calls)
mock_drop_subgraph.assert_has_calls(
expected_drop_subgraph_calls,
any_order=True,
)
assert mock_drop_subgraph.call_count == len(expected_drop_subgraph_calls)
mock_drop_database.assert_called_once_with("tenant-db")
def test_delete_tenant_with_no_providers(self, tenants_fixture):
"""
Test deletion of a tenant with no related providers.
"""
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
with (
patch(
"tasks.jobs.deletion.graph_database.get_database_name",
return_value="tenant-db",
) as mock_get_database_name,
patch(
"tasks.jobs.deletion.graph_database.drop_subgraph"
) as mock_drop_subgraph,
patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database,
):
tenant = tenants_fixture[1] # Assume this tenant has no providers
providers = Provider.objects.filter(tenant_id=tenant.id)
@@ -126,5 +138,7 @@ class TestDeleteTenant:
assert deletion_summary == {} # No providers, so empty summary
assert not Tenant.objects.filter(id=tenant.id).exists()
mock_get_provider_graph_database_names.assert_not_called()
mock_drop_database.assert_not_called()
# get_database_name is called once for drop_database
mock_get_database_name.assert_called_once_with(tenant.id)
mock_drop_subgraph.assert_not_called()
mock_drop_database.assert_called_once_with("tenant-db")

View File

@@ -0,0 +1,25 @@
import warnings
from dashboard.common_methods import get_section_containers_format3
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"REQUIREMENTS_DESCRIPTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
].copy()
return get_section_containers_format3(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)

View File

@@ -86,3 +86,81 @@ docker compose up -d
<Note>
We are evaluating adding these values to the default `docker-compose.yml` to avoid this issue in future releases.
</Note>
### API Container Fails to Start with JWT Key Permission Error
See [GitHub Issue #8897](https://github.com/prowler-cloud/prowler/issues/8897) for more details.
When deploying Prowler via Docker Compose on a fresh installation, the API container may fail to start with permission errors related to JWT RSA key file generation. This issue is commonly observed on Linux systems (Ubuntu, Debian, cloud VMs) and Windows with Docker Desktop, but not typically on macOS.
**Error Message:**
Checking the API container logs reveals:
```bash
PermissionError: [Errno 13] Permission denied: '/home/prowler/.config/prowler-api/jwt_private.pem'
```
Or:
```bash
Token generation failed due to invalid key configuration. Provide valid DJANGO_TOKEN_SIGNING_KEY and DJANGO_TOKEN_VERIFYING_KEY in the environment.
```
**Root Cause:**
This permission mismatch occurs due to UID (User ID) mapping between the host system and Docker containers:
* The API container runs as user `prowler` with UID/GID 1000
* In environments like WSL2, the host user may have a different UID than the container user
* Docker creates the mounted volume directory `./_data/api` on the host, often with the host user's UID or root ownership (UID 0)
* When the application attempts to write JWT key files (`jwt_private.pem` and `jwt_public.pem`), the operation fails because the container's UID 1000 does not have write permissions to the host-owned directory
**Solutions:**
There are two approaches to resolve this issue:
**Option 1: Fix Volume Ownership (Resolve UID Mapping)**
Change the ownership of the volume directory to match the container user's UID (1000):
```bash
# The container user 'prowler' has UID 1000
# This command changes the directory ownership to UID 1000
sudo chown -R 1000:1000 ./_data/api
```
Then start Docker Compose:
```bash
docker compose up -d
```
This solution directly addresses the UID mapping mismatch by ensuring the volume directory is owned by the same UID that the container process uses.
**Option 2: Use Environment Variables (Skip File Storage)**
Generate JWT RSA keys manually and provide them via environment variables to bypass file-based key storage entirely:
```bash
# Generate RSA keys
openssl genrsa -out jwt_private.pem 4096
openssl rsa -in jwt_private.pem -pubout -out jwt_public.pem
# Extract key content (removes headers/footers and newlines)
PRIVATE_KEY=$(awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}' jwt_private.pem)
PUBLIC_KEY=$(awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}' jwt_public.pem)
```
Add the following to the `.env` file:
```env
DJANGO_TOKEN_SIGNING_KEY=<content of jwt_private.pem>
DJANGO_TOKEN_VERIFYING_KEY=<content of jwt_public.pem>
```
When these environment variables are set, the API will use them directly instead of attempting to write key files to the mounted volume.
<Note>
A fix addressing this permission issue is being evaluated in [PR #9953](https://github.com/prowler-cloud/prowler/pull/9953).
</Note>

103
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.3.1 and should not be changed by hand.
[[package]]
name = "about-time"
@@ -1425,64 +1425,6 @@ typing-extensions = ">=4.6.0"
[package.extras]
aio = ["azure-core[aio] (>=1.30.0)"]
[[package]]
name = "backports-datetime-fromisoformat"
version = "2.0.3"
description = "Backport of Python 3.11's datetime.fromisoformat"
optional = false
python-versions = ">3"
groups = ["dev"]
markers = "python_version <= \"3.10\""
files = [
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f681f638f10588fa3c101ee9ae2b63d3734713202ddfcfb6ec6cea0778a29d4"},
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:cd681460e9142f1249408e5aee6d178c6d89b49e06d44913c8fdfb6defda8d1c"},
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:ee68bc8735ae5058695b76d3bb2aee1d137c052a11c8303f1e966aa23b72b65b"},
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8273fe7932db65d952a43e238318966eab9e49e8dd546550a41df12175cc2be4"},
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39d57ea50aa5a524bb239688adc1d1d824c31b6094ebd39aa164d6cadb85de22"},
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ac6272f87693e78209dc72e84cf9ab58052027733cd0721c55356d3c881791cf"},
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:44c497a71f80cd2bcfc26faae8857cf8e79388e3d5fbf79d2354b8c360547d58"},
{file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:6335a4c9e8af329cb1ded5ab41a666e1448116161905a94e054f205aa6d263bc"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2e4b66e017253cdbe5a1de49e0eecff3f66cd72bcb1229d7db6e6b1832c0443"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:43e2d648e150777e13bbc2549cc960373e37bf65bd8a5d2e0cef40e16e5d8dd0"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:4ce6326fd86d5bae37813c7bf1543bae9e4c215ec6f5afe4c518be2635e2e005"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7c8fac333bf860208fd522a5394369ee3c790d0aa4311f515fcc4b6c5ef8d75"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24a4da5ab3aa0cc293dc0662a0c6d1da1a011dc1edcbc3122a288cfed13a0b45"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:58ea11e3bf912bd0a36b0519eae2c5b560b3cb972ea756e66b73fb9be460af01"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8a375c7dbee4734318714a799b6c697223e4bbb57232af37fbfff88fb48a14c6"},
{file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:ac677b1664c4585c2e014739f6678137c8336815406052349c85898206ec7061"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66ce47ee1ba91e146149cf40565c3d750ea1be94faf660ca733d8601e0848147"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8b7e069910a66b3bba61df35b5f879e5253ff0821a70375b9daf06444d046fa4"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:a3b5d1d04a9e0f7b15aa1e647c750631a873b298cdd1255687bb68779fe8eb35"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec1b95986430e789c076610aea704db20874f0781b8624f648ca9fb6ef67c6e1"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffe5f793db59e2f1d45ec35a1cf51404fdd69df9f6952a0c87c3060af4c00e32"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:620e8e73bd2595dfff1b4d256a12b67fce90ece3de87b38e1dde46b910f46f4d"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4cf9c0a985d68476c1cabd6385c691201dda2337d7453fb4da9679ce9f23f4e7"},
{file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:d144868a73002e6e2e6fef72333e7b0129cecdd121aa8f1edba7107fd067255d"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e81b26497a17c29595bc7df20bc6a872ceea5f8c9d6537283945d4b6396aec10"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:5ba00ead8d9d82fd6123eb4891c566d30a293454e54e32ff7ead7644f5f7e575"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:24d574cb4072e1640b00864e94c4c89858033936ece3fc0e1c6f7179f120d0a8"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9735695a66aad654500b0193525e590c693ab3368478ce07b34b443a1ea5e824"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63d39709e17eb72685d052ac82acf0763e047f57c86af1b791505b1fec96915d"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1ea2cc84224937d6b9b4c07f5cb7c667f2bde28c255645ba27f8a675a7af8234"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4024e6d35a9fdc1b3fd6ac7a673bd16cb176c7e0b952af6428b7129a70f72cce"},
{file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5e2dcc94dc9c9ab8704409d86fcb5236316e9dcef6feed8162287634e3568f4c"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fa2de871801d824c255fac7e5e7e50f2be6c9c376fd9268b40c54b5e9da91f42"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:1314d4923c1509aa9696712a7bc0c7160d3b7acf72adafbbe6c558d523f5d491"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b750ecba3a8815ad8bc48311552f3f8ab99dd2326d29df7ff670d9c49321f48f"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d5117dce805d8a2f78baeddc8c6127281fa0a5e2c40c6dd992ba6b2b367876"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb35f607bd1cbe37b896379d5f5ed4dc298b536f4b959cb63180e05cacc0539d"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:61c74710900602637d2d145dda9720c94e303380803bf68811b2a151deec75c2"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ece59af54ebf67ecbfbbf3ca9066f5687879e36527ad69d8b6e3ac565d565a62"},
{file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:d0a7c5f875068efe106f62233bc712d50db4d07c13c7db570175c7857a7b5dbd"},
{file = "backports_datetime_fromisoformat-2.0.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90e202e72a3d5aae673fcc8c9a4267d56b2f532beeb9173361293625fe4d2039"},
{file = "backports_datetime_fromisoformat-2.0.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2df98ef1b76f5a58bb493dda552259ba60c3a37557d848e039524203951c9f06"},
{file = "backports_datetime_fromisoformat-2.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7100adcda5e818b5a894ad0626e38118bb896a347f40ebed8981155675b9ba7b"},
{file = "backports_datetime_fromisoformat-2.0.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e410383f5d6a449a529d074e88af8bc80020bb42b402265f9c02c8358c11da5"},
{file = "backports_datetime_fromisoformat-2.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2797593760da6bcc32c4a13fa825af183cd4bfd333c60b3dbf84711afca26ef"},
{file = "backports_datetime_fromisoformat-2.0.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35a144fd681a0bea1013ccc4cd3fd4dc758ea17ee23dca019c02b82ec46fc0c4"},
{file = "backports_datetime_fromisoformat-2.0.3.tar.gz", hash = "sha256:b58edc8f517b66b397abc250ecc737969486703a66eb97e01e6d51291b1a139d"},
]
[[package]]
name = "bandit"
version = "1.8.3"
@@ -3532,46 +3474,22 @@ files = [
[[package]]
name = "marshmallow"
version = "4.0.1"
version = "3.26.2"
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
optional = false
python-versions = ">=3.9"
groups = ["dev"]
markers = "python_version < \"3.10\""
files = [
{file = "marshmallow-4.0.1-py3-none-any.whl", hash = "sha256:72f14ef346f81269dbddee891bac547dda1501e9e08b6a809756ea3dbb7936a1"},
{file = "marshmallow-4.0.1.tar.gz", hash = "sha256:e1d860bd262737cb2d34e1541b84cb52c32c72c9474e3fe6f30f137ef8b0d97f"},
{file = "marshmallow-3.26.2-py3-none-any.whl", hash = "sha256:013fa8a3c4c276c24d26d84ce934dc964e2aa794345a0f8c7e5a7191482c8a73"},
{file = "marshmallow-3.26.2.tar.gz", hash = "sha256:bbe2adb5a03e6e3571b573f42527c6fe926e17467833660bebd11593ab8dfd57"},
]
[package.dependencies]
backports-datetime-fromisoformat = {version = "*", markers = "python_version < \"3.11\""}
typing-extensions = {version = "*", markers = "python_version < \"3.11\""}
packaging = ">=17.0"
[package.extras]
dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"]
docs = ["autodocsumm (==0.2.14)", "furo (==2025.7.19)", "sphinx (==8.2.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.1)", "sphinxext-opengraph (==0.12.0)"]
tests = ["pytest", "simplejson"]
[[package]]
name = "marshmallow"
version = "4.2.2"
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
optional = false
python-versions = ">=3.10"
groups = ["dev"]
markers = "python_version >= \"3.10\""
files = [
{file = "marshmallow-4.2.2-py3-none-any.whl", hash = "sha256:084a9466111b7ec7183ca3a65aed758739af919fedc5ebdab60fb39d6b4dc121"},
{file = "marshmallow-4.2.2.tar.gz", hash = "sha256:ba40340683a2d1c15103647994ff2f6bc2c8c80da01904cbe5d96ee4baa78d9f"},
]
[package.dependencies]
backports-datetime-fromisoformat = {version = "*", markers = "python_version < \"3.11\""}
typing-extensions = {version = "*", markers = "python_version < \"3.11\""}
[package.extras]
dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"]
docs = ["autodocsumm (==0.2.14)", "furo (==2025.12.19)", "sphinx (==8.2.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.1)", "sphinxext-opengraph (==0.13.0)"]
docs = ["autodocsumm (==0.2.14)", "furo (==2024.8.6)", "sphinx (==8.1.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.0)", "sphinxext-opengraph (==0.9.1)"]
tests = ["pytest", "simplejson"]
[[package]]
@@ -4943,7 +4861,7 @@ description = "C parser in Python"
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
markers = "implementation_name != \"PyPy\" and platform_python_implementation != \"PyPy\""
markers = "platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\""
files = [
{file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
{file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
@@ -5956,7 +5874,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"},
@@ -5965,7 +5882,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"},
@@ -5974,7 +5890,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"},
@@ -5983,7 +5898,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"},
@@ -5992,7 +5906,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2c59aa6170b990d8d2719323e628aaf36f3bfbc1c26279c0eeeb24d05d2d11c7"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"},
{file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"},
@@ -6940,4 +6853,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">3.9.1,<3.13"
content-hash = "071090ee9e8754b41ef66dec0001806f65d6894eaac66309fe69183e3a64292c"
content-hash = "f9ff21ae57caa3ddcd27f3753c29c1b3be2966709baed52e1bbc24e7bdc33f3c"

View File

@@ -2,7 +2,7 @@
All notable changes to the **Prowler SDK** are documented in this file.
## [5.18.0] (Prowler UNRELEASED)
## [5.18.0] (Prowler v5.18.0)
### 🚀 Added
@@ -15,9 +15,10 @@ All notable changes to the **Prowler SDK** are documented in this file.
- `rds_instance_extended_support` check for AWS provider [(#9865)](https://github.com/prowler-cloud/prowler/pull/9865)
- `OpenStack` provider support with Compute service including 1 security check [(#9811)](https://github.com/prowler-cloud/prowler/pull/9811)
- `OpenStack` documentation for the support in the CLI [(#9848)](https://github.com/prowler-cloud/prowler/pull/9848)
- Add HIPAA compliance framework for the Azure provider [(#9957)](https://github.com/prowler-cloud/prowler/pull/9957)
- Cloudflare provider credentials as constructor parameters (`api_token`, `api_key`, `api_email`) [(#9907)](https://github.com/prowler-cloud/prowler/pull/9907)
### Changed
### 🔄 Changed
- Update Azure App Service service metadata to new format [(#9613)](https://github.com/prowler-cloud/prowler/pull/9613)
- Update Azure Application Insights service metadata to new format [(#9614)](https://github.com/prowler-cloud/prowler/pull/9614)
@@ -35,7 +36,7 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Update Azure Network service metadata to new format [(#9624)](https://github.com/prowler-cloud/prowler/pull/9624)
- Update Azure Storage service metadata to new format [(#9628)](https://github.com/prowler-cloud/prowler/pull/9628)
### Fixed
### 🐛 Fixed
- Duplicated findings in `entra_user_with_vm_access_has_mfa` check when user has multiple VM access roles [(#9914)](https://github.com/prowler-cloud/prowler/pull/9914)
- Jira integration failing with `INVALID_INPUT` error when sending findings with long resource UIDs exceeding 255-character summary limit [(#9926)](https://github.com/prowler-cloud/prowler/pull/9926)

View File

@@ -0,0 +1,820 @@
{
"Framework": "HIPAA",
"Name": "HIPAA compliance framework for Azure",
"Version": "",
"Provider": "Azure",
"Description": "The Health Insurance Portability and Accountability Act of 1996 (HIPAA) is legislation that helps US workers to retain health insurance coverage when they change or lose jobs. The legislation also seeks to encourage electronic health records to improve the efficiency and quality of the US healthcare system through improved information sharing. This framework maps HIPAA requirements to Microsoft Azure security best practices.",
"Requirements": [
{
"Id": "164_308_a_1_ii_a",
"Name": "164.308(a)(1)(ii)(A) Risk analysis",
"Description": "Conduct an accurate and thorough assessment of the potential risks and vulnerabilities to the confidentiality, integrity, and availability of electronic protected health information held by the covered entity or business associate.",
"Attributes": [
{
"ItemId": "164_308_a_1_ii_a",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"defender_ensure_defender_for_server_is_on",
"defender_ensure_defender_for_app_services_is_on",
"defender_ensure_defender_for_sql_servers_is_on",
"defender_ensure_defender_for_storage_is_on",
"defender_ensure_defender_for_keyvault_is_on",
"defender_ensure_defender_for_arm_is_on",
"defender_ensure_defender_for_dns_is_on",
"defender_ensure_defender_for_containers_is_on",
"defender_ensure_defender_for_cosmosdb_is_on",
"defender_ensure_mcas_is_enabled",
"policy_ensure_asc_enforcement_enabled"
]
},
{
"Id": "164_308_a_1_ii_b",
"Name": "164.308(a)(1)(ii)(B) Risk Management",
"Description": "Implement security measures sufficient to reduce risks and vulnerabilities to a reasonable and appropriate level to comply with 164.306(a): Ensure the confidentiality, integrity, and availability of all electronic protected health information the covered entity or business associate creates, receives, maintains, or transmits.",
"Attributes": [
{
"ItemId": "164_308_a_1_ii_b",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_ensure_encryption_with_customer_managed_keys",
"storage_infrastructure_encryption_is_enabled",
"storage_blob_public_access_level_is_disabled",
"storage_default_network_access_rule_is_denied",
"storage_ensure_private_endpoints_in_storage_accounts",
"sqlserver_tde_encryption_enabled",
"sqlserver_tde_encrypted_with_cmk",
"sqlserver_unrestricted_inbound_access",
"keyvault_key_rotation_enabled",
"keyvault_rbac_enabled",
"keyvault_private_endpoints",
"vm_ensure_attached_disks_encrypted_with_cmk",
"vm_ensure_unattached_disks_encrypted_with_cmk",
"network_ssh_internet_access_restricted",
"network_rdp_internet_access_restricted",
"network_http_internet_access_restricted",
"network_udp_internet_access_restricted",
"iam_subscription_roles_owner_custom_not_created",
"iam_custom_role_has_permissions_to_administer_resource_locks",
"cosmosdb_account_firewall_use_selected_networks",
"cosmosdb_account_use_private_endpoints",
"aks_clusters_public_access_disabled",
"aks_clusters_created_with_private_nodes"
]
},
{
"Id": "164_308_a_1_ii_d",
"Name": "164.308(a)(1)(ii)(D) Information system activity review",
"Description": "Implement procedures to regularly review records of information system activity, such as audit logs, access reports, and security incident tracking reports.",
"Attributes": [
{
"ItemId": "164_308_a_1_ii_d",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"monitor_diagnostic_setting_with_appropriate_categories",
"monitor_diagnostic_settings_exists",
"monitor_alert_create_policy_assignment",
"monitor_alert_delete_policy_assignment",
"monitor_alert_create_update_nsg",
"monitor_alert_delete_nsg",
"monitor_alert_create_update_security_solution",
"monitor_alert_delete_security_solution",
"sqlserver_auditing_enabled",
"sqlserver_auditing_retention_90_days",
"keyvault_logging_enabled",
"network_watcher_enabled",
"network_flow_log_captured_sent",
"network_flow_log_more_than_90_days",
"app_http_logs_enabled",
"appinsights_ensure_is_configured"
]
},
{
"Id": "164_308_a_3_i",
"Name": "164.308(a)(3)(i) Workforce security",
"Description": "Implement policies and procedures to ensure that all members of its workforce have appropriate access to electronic protected health information, as provided under paragraph (a)(4) of this section, and to prevent those workforce members who do not have access under paragraph (a)(4) of this section from obtaining access to electronic protected health information.",
"Attributes": [
{
"ItemId": "164_308_a_3_i",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_blob_public_access_level_is_disabled",
"storage_default_network_access_rule_is_denied",
"sqlserver_unrestricted_inbound_access",
"network_ssh_internet_access_restricted",
"network_rdp_internet_access_restricted",
"network_http_internet_access_restricted",
"iam_subscription_roles_owner_custom_not_created",
"iam_role_user_access_admin_restricted",
"containerregistry_not_publicly_accessible",
"app_function_not_publicly_accessible",
"aisearch_service_not_publicly_accessible",
"cosmosdb_account_firewall_use_selected_networks"
]
},
{
"Id": "164_308_a_3_ii_a",
"Name": "164.308(a)(3)(ii)(A) Authorization and/or supervision",
"Description": "Implement procedures for the authorization and/or supervision of workforce members who work with electronic protected health information or in locations where it might be accessed.",
"Attributes": [
{
"ItemId": "164_308_a_3_ii_a",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"monitor_diagnostic_setting_with_appropriate_categories",
"monitor_diagnostic_settings_exists",
"sqlserver_auditing_enabled",
"keyvault_logging_enabled",
"entra_privileged_user_has_mfa",
"entra_non_privileged_user_has_mfa",
"entra_security_defaults_enabled",
"entra_conditional_access_policy_require_mfa_for_management_api",
"entra_user_with_vm_access_has_mfa",
"network_flow_log_captured_sent",
"app_http_logs_enabled"
]
},
{
"Id": "164_308_a_3_ii_b",
"Name": "164.308(a)(3)(ii)(B) Workforce clearance procedure",
"Description": "Implement procedures to determine that the access of a workforce member to electronic protected health information is appropriate.",
"Attributes": [
{
"ItemId": "164_308_a_3_ii_b",
"Section": "164.308 Administrative Safeguards",
"Service": "entra"
}
],
"Checks": [
"iam_subscription_roles_owner_custom_not_created",
"iam_role_user_access_admin_restricted",
"iam_custom_role_has_permissions_to_administer_resource_locks",
"entra_global_admin_in_less_than_five_users",
"entra_policy_default_users_cannot_create_security_groups",
"entra_policy_ensure_default_user_cannot_create_apps",
"entra_policy_guest_invite_only_for_admin_roles",
"entra_policy_guest_users_access_restrictions"
]
},
{
"Id": "164_308_a_3_ii_c",
"Name": "164.308(a)(3)(ii)(C) Termination procedures",
"Description": "Implement procedures for terminating access to electronic protected health information when the employment of, or other arrangement with, a workforce member ends or as required by determinations made as specified in paragraph (a)(3)(ii)(b).",
"Attributes": [
{
"ItemId": "164_308_a_3_ii_c",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_key_rotation_90_days",
"keyvault_key_rotation_enabled",
"keyvault_rbac_key_expiration_set",
"keyvault_rbac_secret_expiration_set",
"keyvault_key_expiration_set_in_non_rbac",
"keyvault_non_rbac_secret_expiration_set"
]
},
{
"Id": "164_308_a_4_i",
"Name": "164.308(a)(4)(i) Information access management",
"Description": "Implement policies and procedures for authorizing access to electronic protected health information that are consistent with the applicable requirements of subpart E of this part.",
"Attributes": [
{
"ItemId": "164_308_a_4_i",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"iam_subscription_roles_owner_custom_not_created",
"iam_role_user_access_admin_restricted",
"iam_custom_role_has_permissions_to_administer_resource_locks",
"keyvault_rbac_enabled",
"entra_global_admin_in_less_than_five_users",
"entra_policy_restricts_user_consent_for_apps",
"entra_policy_user_consent_for_verified_apps"
]
},
{
"Id": "164_308_a_4_ii_a",
"Name": "164.308(a)(4)(ii)(A) Isolating health care clearinghouse functions",
"Description": "If a health care clearinghouse is part of a larger organization, the clearinghouse must implement policies and procedures that protect the electronic protected health information of the clearinghouse from unauthorized access by the larger organization.",
"Attributes": [
{
"ItemId": "164_308_a_4_ii_a",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_ensure_encryption_with_customer_managed_keys",
"storage_infrastructure_encryption_is_enabled",
"storage_ensure_private_endpoints_in_storage_accounts",
"storage_default_network_access_rule_is_denied",
"sqlserver_tde_encryption_enabled",
"sqlserver_tde_encrypted_with_cmk",
"sqlserver_auditing_enabled",
"keyvault_key_rotation_enabled",
"keyvault_logging_enabled",
"keyvault_private_endpoints",
"vm_ensure_attached_disks_encrypted_with_cmk",
"vm_backup_enabled",
"cosmosdb_account_use_private_endpoints",
"databricks_workspace_cmk_encryption_enabled",
"databricks_workspace_vnet_injection_enabled"
]
},
{
"Id": "164_308_a_4_ii_b",
"Name": "164.308(a)(4)(ii)(B) Access authorization",
"Description": "Implement policies and procedures for granting access to electronic protected health information, as one illustrative example, through access to a workstation, transaction, program, process, or other mechanism.",
"Attributes": [
{
"ItemId": "164_308_a_4_ii_b",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"iam_subscription_roles_owner_custom_not_created",
"iam_role_user_access_admin_restricted",
"iam_custom_role_has_permissions_to_administer_resource_locks",
"keyvault_rbac_enabled",
"aks_cluster_rbac_enabled",
"cosmosdb_account_use_aad_and_rbac",
"sqlserver_azuread_administrator_enabled",
"entra_global_admin_in_less_than_five_users"
]
},
{
"Id": "164_308_a_4_ii_c",
"Name": "164.308(a)(4)(ii)(C) Access establishment and modification",
"Description": "Implement policies and procedures that, based upon the covered entity's or the business associate's access authorization policies, establish, document, review, and modify a user's right of access to a workstation, transaction, program, or process.",
"Attributes": [
{
"ItemId": "164_308_a_4_ii_c",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"iam_subscription_roles_owner_custom_not_created",
"iam_role_user_access_admin_restricted",
"storage_key_rotation_90_days",
"keyvault_key_rotation_enabled",
"keyvault_rbac_key_expiration_set",
"keyvault_rbac_secret_expiration_set",
"entra_global_admin_in_less_than_five_users",
"entra_policy_default_users_cannot_create_security_groups",
"entra_policy_ensure_default_user_cannot_create_apps"
]
},
{
"Id": "164_308_a_5_ii_b",
"Name": "164.308(a)(5)(ii)(B) Protection from malicious software",
"Description": "Procedures for guarding against, detecting, and reporting malicious software.",
"Attributes": [
{
"ItemId": "164_308_a_5_ii_b",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"defender_ensure_defender_for_server_is_on",
"defender_ensure_wdatp_is_enabled",
"defender_assessments_vm_endpoint_protection_installed",
"defender_ensure_system_updates_are_applied",
"defender_container_images_scan_enabled",
"defender_container_images_resolved_vulnerabilities"
]
},
{
"Id": "164_308_a_5_ii_c",
"Name": "164.308(a)(5)(ii)(C) Log-in monitoring",
"Description": "Procedures for monitoring log-in attempts and reporting discrepancies.",
"Attributes": [
{
"ItemId": "164_308_a_5_ii_c",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"defender_ensure_defender_for_server_is_on",
"defender_ensure_mcas_is_enabled",
"monitor_diagnostic_setting_with_appropriate_categories",
"entra_security_defaults_enabled",
"sqlserver_auditing_enabled",
"keyvault_logging_enabled"
]
},
{
"Id": "164_308_a_5_ii_d",
"Name": "164.308(a)(5)(ii)(D) Password management",
"Description": "Procedures for creating, changing, and safeguarding passwords.",
"Attributes": [
{
"ItemId": "164_308_a_5_ii_d",
"Section": "164.308 Administrative Safeguards",
"Service": "entra"
}
],
"Checks": [
"entra_security_defaults_enabled",
"entra_privileged_user_has_mfa",
"entra_non_privileged_user_has_mfa",
"storage_key_rotation_90_days",
"keyvault_key_rotation_enabled",
"keyvault_rbac_key_expiration_set",
"keyvault_rbac_secret_expiration_set"
]
},
{
"Id": "164_308_a_6_i",
"Name": "164.308(a)(6)(i) Security incident procedures",
"Description": "Implement policies and procedures to address security incidents.",
"Attributes": [
{
"ItemId": "164_308_a_6_i",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"monitor_alert_create_update_nsg",
"monitor_alert_delete_nsg",
"monitor_alert_create_update_security_solution",
"monitor_alert_delete_security_solution",
"monitor_alert_service_health_exists",
"defender_ensure_defender_for_server_is_on",
"defender_ensure_notify_alerts_severity_is_high",
"defender_ensure_notify_emails_to_owners",
"defender_additional_email_configured_with_a_security_contact",
"defender_attack_path_notifications_properly_configured"
]
},
{
"Id": "164_308_a_6_ii",
"Name": "164.308(a)(6)(ii) Response and reporting",
"Description": "Identify and respond to suspected or known security incidents; mitigate, to the extent practicable, harmful effects of security incidents that are known to the covered entity or business associate; and document security incidents and their outcomes.",
"Attributes": [
{
"ItemId": "164_308_a_6_ii",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"monitor_diagnostic_setting_with_appropriate_categories",
"monitor_diagnostic_settings_exists",
"monitor_alert_create_update_nsg",
"monitor_alert_delete_nsg",
"defender_ensure_defender_for_server_is_on",
"defender_ensure_notify_alerts_severity_is_high",
"defender_ensure_notify_emails_to_owners",
"defender_additional_email_configured_with_a_security_contact",
"sqlserver_auditing_enabled",
"keyvault_logging_enabled",
"network_flow_log_captured_sent",
"app_http_logs_enabled"
]
},
{
"Id": "164_308_a_7_i",
"Name": "164.308(a)(7)(i) Contingency plan",
"Description": "Establish (and implement as needed) policies and procedures for responding to an emergency or other occurrence (for example, fire, vandalism, system failure, and natural disaster) that damages systems that contain electronic protected health information.",
"Attributes": [
{
"ItemId": "164_308_a_7_i",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"vm_backup_enabled",
"vm_sufficient_daily_backup_retention_period",
"storage_blob_versioning_is_enabled",
"storage_ensure_soft_delete_is_enabled",
"storage_ensure_file_shares_soft_delete_is_enabled",
"storage_geo_redundant_enabled",
"keyvault_recoverable",
"sqlserver_auditing_retention_90_days"
]
},
{
"Id": "164_308_a_7_ii_a",
"Name": "164.308(a)(7)(ii)(A) Data backup plan",
"Description": "Establish and implement procedures to create and maintain retrievable exact copies of electronic protected health information.",
"Attributes": [
{
"ItemId": "164_308_a_7_ii_a",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"vm_backup_enabled",
"vm_sufficient_daily_backup_retention_period",
"storage_blob_versioning_is_enabled",
"storage_ensure_soft_delete_is_enabled",
"storage_ensure_file_shares_soft_delete_is_enabled",
"storage_geo_redundant_enabled",
"keyvault_recoverable",
"sqlserver_auditing_retention_90_days",
"postgresql_flexible_server_log_retention_days_greater_3"
]
},
{
"Id": "164_308_a_7_ii_b",
"Name": "164.308(a)(7)(ii)(B) Disaster recovery plan",
"Description": "Establish (and implement as needed) procedures to restore any loss of data.",
"Attributes": [
{
"ItemId": "164_308_a_7_ii_b",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"vm_backup_enabled",
"vm_sufficient_daily_backup_retention_period",
"storage_blob_versioning_is_enabled",
"storage_ensure_soft_delete_is_enabled",
"storage_geo_redundant_enabled",
"keyvault_recoverable"
]
},
{
"Id": "164_308_a_7_ii_c",
"Name": "164.308(a)(7)(ii)(C) Emergency mode operation plan",
"Description": "Establish (and implement as needed) procedures to enable continuation of critical business processes for protection of the security of electronic protected health information while operating in emergency mode.",
"Attributes": [
{
"ItemId": "164_308_a_7_ii_c",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"vm_backup_enabled",
"vm_sufficient_daily_backup_retention_period",
"storage_blob_versioning_is_enabled",
"storage_ensure_soft_delete_is_enabled",
"storage_geo_redundant_enabled",
"keyvault_recoverable"
]
},
{
"Id": "164_308_a_8",
"Name": "164.308(a)(8) Evaluation",
"Description": "Perform a periodic technical and nontechnical evaluation, based initially upon the standards implemented under this rule and subsequently, in response to environmental or operational changes affecting the security of electronic protected health information, that establishes the extent to which an entity's security policies and procedures meet the requirements of this subpart.",
"Attributes": [
{
"ItemId": "164_308_a_8",
"Section": "164.308 Administrative Safeguards",
"Service": "azure"
}
],
"Checks": [
"defender_ensure_defender_for_server_is_on",
"defender_ensure_mcas_is_enabled",
"sqlserver_vulnerability_assessment_enabled",
"sqlserver_va_periodic_recurring_scans_enabled",
"sqlserver_va_scan_reports_configured",
"sqlserver_va_emails_notifications_admins_enabled",
"policy_ensure_asc_enforcement_enabled"
]
},
{
"Id": "164_310_a_1",
"Name": "164.310(a)(1) Facility access controls",
"Description": "Implement policies and procedures to limit physical access to its electronic information systems and the facility or facilities in which they are housed, while ensuring that properly authorized access is allowed.",
"Attributes": [
{
"ItemId": "164_310_a_1",
"Section": "164.310 Physical Safeguards",
"Service": "azure"
}
],
"Checks": [
"network_ssh_internet_access_restricted",
"network_rdp_internet_access_restricted",
"network_http_internet_access_restricted",
"network_bastion_host_exists",
"vm_jit_access_enabled",
"aks_clusters_public_access_disabled",
"aks_clusters_created_with_private_nodes"
]
},
{
"Id": "164_310_d_1",
"Name": "164.310(d)(1) Device and media controls",
"Description": "Implement policies and procedures that govern the receipt and removal of hardware and electronic media that contain electronic protected health information into and out of a facility, and the movement of these items within the facility.",
"Attributes": [
{
"ItemId": "164_310_d_1",
"Section": "164.310 Physical Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_ensure_encryption_with_customer_managed_keys",
"storage_infrastructure_encryption_is_enabled",
"vm_ensure_attached_disks_encrypted_with_cmk",
"vm_ensure_unattached_disks_encrypted_with_cmk",
"vm_ensure_using_managed_disks",
"sqlserver_tde_encryption_enabled",
"databricks_workspace_cmk_encryption_enabled"
]
},
{
"Id": "164_312_a_1",
"Name": "164.312(a)(1) Access control",
"Description": "Implement technical policies and procedures for electronic information systems that maintain electronic protected health information to allow access only to those persons or software programs that have been granted access rights as specified in 164.308(a)(4).",
"Attributes": [
{
"ItemId": "164_312_a_1",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_blob_public_access_level_is_disabled",
"storage_default_network_access_rule_is_denied",
"storage_ensure_private_endpoints_in_storage_accounts",
"sqlserver_unrestricted_inbound_access",
"network_ssh_internet_access_restricted",
"network_rdp_internet_access_restricted",
"network_http_internet_access_restricted",
"iam_subscription_roles_owner_custom_not_created",
"iam_role_user_access_admin_restricted",
"entra_privileged_user_has_mfa",
"containerregistry_not_publicly_accessible",
"app_function_not_publicly_accessible",
"aisearch_service_not_publicly_accessible",
"cosmosdb_account_firewall_use_selected_networks",
"cosmosdb_account_use_private_endpoints",
"aks_clusters_public_access_disabled"
]
},
{
"Id": "164_312_a_2_i",
"Name": "164.312(a)(2)(i) Unique user identification",
"Description": "Assign a unique name and/or number for identifying and tracking user identity.",
"Attributes": [
{
"ItemId": "164_312_a_2_i",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"sqlserver_auditing_enabled",
"sqlserver_azuread_administrator_enabled",
"entra_security_defaults_enabled",
"storage_default_to_entra_authorization_enabled",
"cosmosdb_account_use_aad_and_rbac",
"postgresql_flexible_server_entra_id_authentication_enabled"
]
},
{
"Id": "164_312_a_2_ii",
"Name": "164.312(a)(2)(ii) Emergency access procedure",
"Description": "Establish (and implement as needed) procedures for obtaining necessary electronic protected health information during an emergency.",
"Attributes": [
{
"ItemId": "164_312_a_2_ii",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"vm_backup_enabled",
"vm_sufficient_daily_backup_retention_period",
"storage_blob_versioning_is_enabled",
"storage_ensure_soft_delete_is_enabled",
"storage_geo_redundant_enabled",
"keyvault_recoverable"
]
},
{
"Id": "164_312_a_2_iv",
"Name": "164.312(a)(2)(iv) Encryption and decryption",
"Description": "Implement a mechanism to encrypt and decrypt electronic protected health information.",
"Attributes": [
{
"ItemId": "164_312_a_2_iv",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_ensure_encryption_with_customer_managed_keys",
"storage_infrastructure_encryption_is_enabled",
"storage_secure_transfer_required_is_enabled",
"sqlserver_tde_encryption_enabled",
"sqlserver_tde_encrypted_with_cmk",
"keyvault_key_rotation_enabled",
"vm_ensure_attached_disks_encrypted_with_cmk",
"vm_ensure_unattached_disks_encrypted_with_cmk",
"databricks_workspace_cmk_encryption_enabled",
"monitor_storage_account_with_activity_logs_cmk_encrypted"
]
},
{
"Id": "164_312_b",
"Name": "164.312(b) Audit controls",
"Description": "Implement hardware, software, and/or procedural mechanisms that record and examine activity in information systems that contain or use electronic protected health information.",
"Attributes": [
{
"ItemId": "164_312_b",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"monitor_diagnostic_setting_with_appropriate_categories",
"monitor_diagnostic_settings_exists",
"monitor_alert_create_policy_assignment",
"monitor_alert_delete_policy_assignment",
"monitor_alert_create_update_nsg",
"monitor_alert_delete_nsg",
"monitor_alert_create_update_sqlserver_fr",
"monitor_alert_delete_sqlserver_fr",
"sqlserver_auditing_enabled",
"sqlserver_auditing_retention_90_days",
"keyvault_logging_enabled",
"network_watcher_enabled",
"network_flow_log_captured_sent",
"network_flow_log_more_than_90_days",
"app_http_logs_enabled",
"appinsights_ensure_is_configured",
"postgresql_flexible_server_log_checkpoints_on",
"postgresql_flexible_server_log_connections_on",
"postgresql_flexible_server_log_disconnections_on",
"mysql_flexible_server_audit_log_enabled",
"mysql_flexible_server_audit_log_connection_activated"
]
},
{
"Id": "164_312_c_1",
"Name": "164.312(c)(1) Integrity",
"Description": "Implement policies and procedures to protect electronic protected health information from improper alteration or destruction.",
"Attributes": [
{
"ItemId": "164_312_c_1",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_ensure_encryption_with_customer_managed_keys",
"storage_blob_versioning_is_enabled",
"storage_secure_transfer_required_is_enabled",
"keyvault_key_rotation_enabled",
"keyvault_recoverable",
"sqlserver_tde_encryption_enabled",
"vm_ensure_attached_disks_encrypted_with_cmk"
]
},
{
"Id": "164_312_c_2",
"Name": "164.312(c)(2) Mechanism to authenticate electronic protected health information",
"Description": "Implement electronic mechanisms to corroborate that electronic protected health information has not been altered or destroyed in an unauthorized manner.",
"Attributes": [
{
"ItemId": "164_312_c_2",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_ensure_encryption_with_customer_managed_keys",
"storage_blob_versioning_is_enabled",
"storage_secure_transfer_required_is_enabled",
"keyvault_key_rotation_enabled",
"keyvault_logging_enabled",
"sqlserver_auditing_enabled",
"network_flow_log_captured_sent"
]
},
{
"Id": "164_312_d",
"Name": "164.312(d) Person or entity authentication",
"Description": "Implement procedures to verify that a person or entity seeking access to electronic protected health information is the one claimed.",
"Attributes": [
{
"ItemId": "164_312_d",
"Section": "164.312 Technical Safeguards",
"Service": "entra"
}
],
"Checks": [
"entra_security_defaults_enabled",
"entra_privileged_user_has_mfa",
"entra_non_privileged_user_has_mfa",
"entra_conditional_access_policy_require_mfa_for_management_api",
"entra_user_with_vm_access_has_mfa",
"entra_trusted_named_locations_exists",
"sqlserver_azuread_administrator_enabled",
"postgresql_flexible_server_entra_id_authentication_enabled"
]
},
{
"Id": "164_312_e_1",
"Name": "164.312(e)(1) Transmission security",
"Description": "Implement technical security measures to guard against unauthorized access to electronic protected health information that is being transmitted over an electronic communications network.",
"Attributes": [
{
"ItemId": "164_312_e_1",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_secure_transfer_required_is_enabled",
"storage_ensure_minimum_tls_version_12",
"sqlserver_recommended_minimal_tls_version",
"app_minimum_tls_version_12",
"app_ensure_http_is_redirected_to_https",
"app_ensure_using_http20",
"app_function_ftps_deployment_disabled",
"app_ftp_deployment_disabled",
"network_ssh_internet_access_restricted",
"network_rdp_internet_access_restricted",
"mysql_flexible_server_minimum_tls_version_12",
"mysql_flexible_server_ssl_connection_enabled",
"postgresql_flexible_server_enforce_ssl_enabled"
]
},
{
"Id": "164_312_e_2_i",
"Name": "164.312(e)(2)(i) Integrity controls",
"Description": "Implement security measures to ensure that electronically transmitted electronic protected health information is not improperly modified without detection until disposed of.",
"Attributes": [
{
"ItemId": "164_312_e_2_i",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"monitor_diagnostic_setting_with_appropriate_categories",
"storage_secure_transfer_required_is_enabled",
"storage_ensure_minimum_tls_version_12",
"storage_blob_versioning_is_enabled",
"defender_ensure_defender_for_server_is_on",
"sqlserver_auditing_enabled",
"keyvault_logging_enabled",
"network_flow_log_captured_sent"
]
},
{
"Id": "164_312_e_2_ii",
"Name": "164.312(e)(2)(ii) Encryption",
"Description": "Implement a mechanism to encrypt electronic protected health information whenever deemed appropriate.",
"Attributes": [
{
"ItemId": "164_312_e_2_ii",
"Section": "164.312 Technical Safeguards",
"Service": "azure"
}
],
"Checks": [
"storage_ensure_encryption_with_customer_managed_keys",
"storage_infrastructure_encryption_is_enabled",
"storage_secure_transfer_required_is_enabled",
"storage_ensure_minimum_tls_version_12",
"sqlserver_tde_encryption_enabled",
"sqlserver_tde_encrypted_with_cmk",
"sqlserver_recommended_minimal_tls_version",
"keyvault_key_rotation_enabled",
"vm_ensure_attached_disks_encrypted_with_cmk",
"vm_ensure_unattached_disks_encrypted_with_cmk",
"app_minimum_tls_version_12",
"app_ensure_http_is_redirected_to_https",
"mysql_flexible_server_minimum_tls_version_12",
"mysql_flexible_server_ssl_connection_enabled",
"postgresql_flexible_server_enforce_ssl_enabled",
"databricks_workspace_cmk_encryption_enabled"
]
}
]
}

View File

@@ -121,7 +121,7 @@ filelock = [
]
flake8 = "7.1.2"
freezegun = "1.5.1"
marshmallow = ">=3.15.0,<5.0.0"
marshmallow = ">=3.15.0,<4.0.0"
mock = "5.2.0"
moto = {extras = ["all"], version = "5.1.11"}
openapi-schema-validator = "0.6.3"

View File

@@ -2,7 +2,7 @@
All notable changes to the **Prowler UI** are documented in this file.
## [1.18.0] (Prowler UNRELEASED)
## [1.18.0] (Prowler v5.18.0)
### 🔄 Changed

View File

@@ -188,5 +188,5 @@ export const getUserByMe = async (accessToken: string) => {
};
export async function logOut() {
await signOut();
await signOut({ redirectTo: "/sign-in" });
}

View File

@@ -74,11 +74,17 @@ test.describe("Session Error Messages", () => {
await scansPage.goto();
await expect(page.locator("main")).toBeVisible();
// Navigate to a safe public page before clearing cookies
// This prevents background requests from the protected page (scans)
// triggering a client-side redirect race condition when cookies are cleared
await signInPage.goto();
// Clear cookies to simulate session expiry
await context.clearCookies();
// Try to navigate to a different protected route
await providersPage.goto();
// Use fresh navigation to force middleware evaluation
await providersPage.gotoFresh();
// Should be redirected to login with callbackUrl
await signInPage.verifyRedirectWithCallback("/providers");

View File

@@ -1,6 +1,11 @@
import { expect, test } from "@playwright/test";
import { getSession, TEST_CREDENTIALS, verifySessionValid } from "../helpers";
import {
getSession,
getSessionWithoutCookies,
TEST_CREDENTIALS,
verifySessionValid,
} from "../helpers";
import { HomePage } from "../home/home-page";
import { SignInPage } from "../sign-in-base/sign-in-base-page";
@@ -71,7 +76,7 @@ test.describe("Token Refresh Flow", () => {
await context.clearCookies();
const expiredSession = await getSession(page);
const expiredSession = await getSessionWithoutCookies(page);
expect(expiredSession).toBeNull();
},
);

View File

@@ -143,9 +143,10 @@
### Flow Steps:
1. Log in with valid credentials.
2. Navigate to a protected route (/scans).
3. Clear cookies to simulate session expiry.
4. Navigate to another protected route (/providers).
5. Verify redirect to sign-in includes callbackUrl parameter.
3. Navigate to a safe public page (/sign-in).
4. Clear cookies to simulate session expiry.
5. Navigate to another protected route (/providers) using fresh navigation.
6. Verify redirect to sign-in includes callbackUrl parameter.
### Expected Result:
- URL contains callbackUrl=/providers parameter.

View File

@@ -1,4 +1,4 @@
import { Locator, Page, expect } from "@playwright/test";
import { Locator, Page, expect, request } from "@playwright/test";
import { AWSProviderCredential, AWSProviderData, AWS_CREDENTIAL_OPTIONS, ProvidersPage } from "./providers/providers-page";
import { ScansPage } from "./scans/scans-page";
@@ -47,6 +47,20 @@ export async function getSession(page: Page) {
return response.json();
}
export async function getSessionWithoutCookies(page: Page) {
const currentUrl = page.url();
const baseUrl = currentUrl.startsWith("http")
? new URL(currentUrl).origin
: process.env.NEXTAUTH_URL || "http://localhost:3000";
const apiContext = await request.newContext({ baseURL: baseUrl });
const response = await apiContext.get("/api/auth/session");
const session = await response.json();
await apiContext.dispose();
return session;
}
export async function verifySessionValid(page: Page) {
const session = await getSession(page);
expect(session).toBeTruthy();

View File

@@ -428,6 +428,12 @@ export class ProvidersPage extends BasePage {
await super.goto("/providers");
}
async gotoFresh(): Promise<void> {
// Go to the providers page with fresh navigation
await super.gotoFresh("/providers");
}
private async verifyPageHasProwlerTitle(): Promise<void> {
await expect(this.page).toHaveTitle(/Prowler/);
}

View File

@@ -110,8 +110,18 @@ export class ScansPage extends BasePage {
.filter({ hasText: accountId })
.first();
// Verify the row with the account ID is visible (provider exists)
await expect(rowWithAccountId).toBeVisible();
try {
// Verify the row with the account ID is visible (provider exists)
// Use a short timeout first to allow for a quick check
await expect(rowWithAccountId).toBeVisible({ timeout: 5000 });
} catch {
// If not visible immediately (likely due to async backend processing),
// reload the page to fetch the latest data
await this.page.reload();
await this.verifyPageLoaded();
// Wait longer after reload
await expect(rowWithAccountId).toBeVisible({ timeout: 15000 });
}
// Verify the row contains "scheduled scan" in the Scan name column
// The scan name "Daily scheduled scan" is displayed as "scheduled scan" in the table

View File

@@ -150,9 +150,11 @@ test.describe("Session Persistence", () => {
await signInPage.loginAndVerify(TEST_CREDENTIALS.VALID);
await homePage.signOut();
await signInPage.verifyLogoutSuccess();
await homePage.goto();
// Wait for signOut to redirect to /sign-in (NextAuth signOut flow)
await page.waitForURL(/\/sign-in/, { timeout: 15000 });
// Verify we're on the sign-in page with the form visible
await signInPage.verifyOnSignInPage();
},
);