refactor(api): migrate Attack Paths network exposure queries from APOC to openCypher (#10266)

This commit is contained in:
Josema Camacho
2026-03-10 16:48:16 +01:00
committed by GitHub
parent 3288a4a131
commit 57f3920e66
3 changed files with 144 additions and 102 deletions

View File

@@ -6,6 +6,7 @@ All notable changes to the **Prowler API** are documented in this file.
### 🔄 Changed ### 🔄 Changed
- Attack Paths: Migrate network exposure queries from APOC to standard openCypher for Neo4j and Neptune compatibility [(#10266)](https://github.com/prowler-cloud/prowler/pull/10266)
- `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293) - `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293)
--- ---

View File

@@ -16,8 +16,7 @@ AWS_INTERNET_EXPOSED_EC2_SENSITIVE_S3_ACCESS = AttackPathsQueryDefinition(
description="Detect EC2 instances with SSH exposed to the internet that can assume higher-privileged roles to read tagged sensitive S3 buckets despite bucket-level public access blocks.", description="Detect EC2 instances with SSH exposed to the internet that can assume higher-privileged roles to read tagged sensitive S3 buckets despite bucket-level public access blocks.",
provider="aws", provider="aws",
cypher=f""" cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet', provider_id: $provider_id}}) OPTIONAL MATCH (internet:Internet {{_provider_id: $provider_id}})
YIELD node AS internet
MATCH path_s3 = (aws:AWSAccount {{id: $provider_uid}})--(s3:S3Bucket)--(t:AWSTag) MATCH path_s3 = (aws:AWSAccount {{id: $provider_uid}})--(s3:S3Bucket)--(t:AWSTag)
WHERE toLower(t.key) = toLower($tag_key) AND toLower(t.value) = toLower($tag_value) WHERE toLower(t.key) = toLower($tag_key) AND toLower(t.value) = toLower($tag_value)
@@ -32,8 +31,7 @@ AWS_INTERNET_EXPOSED_EC2_SENSITIVE_S3_ACCESS = AttackPathsQueryDefinition(
MATCH path_assume_role = (ec2)-[p:STS_ASSUMEROLE_ALLOW*1..9]-(r:AWSRole) MATCH path_assume_role = (ec2)-[p:STS_ASSUMEROLE_ALLOW*1..9]-(r:AWSRole)
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{provider_id: $provider_id}}, ec2) OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(ec2)
YIELD rel AS can_access
UNWIND nodes(path_s3) + nodes(path_ec2) + nodes(path_role) + nodes(path_assume_role) as n UNWIND nodes(path_s3) + nodes(path_ec2) + nodes(path_role) + nodes(path_assume_role) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}}) OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}})
@@ -181,14 +179,12 @@ AWS_EC2_INSTANCES_INTERNET_EXPOSED = AttackPathsQueryDefinition(
description="Find EC2 instances flagged as exposed to the internet within the selected account.", description="Find EC2 instances flagged as exposed to the internet within the selected account.",
provider="aws", provider="aws",
cypher=f""" cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet', provider_id: $provider_id}}) OPTIONAL MATCH (internet:Internet {{_provider_id: $provider_id}})
YIELD node AS internet
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(ec2:EC2Instance) MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(ec2:EC2Instance)
WHERE ec2.exposed_internet = true WHERE ec2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{provider_id: $provider_id}}, ec2) OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(ec2)
YIELD rel AS can_access
UNWIND nodes(path) as n UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}}) OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}})
@@ -205,16 +201,14 @@ AWS_SECURITY_GROUPS_OPEN_INTERNET_FACING = AttackPathsQueryDefinition(
description="Find internet-facing resources associated with security groups that allow inbound access from '0.0.0.0/0'.", description="Find internet-facing resources associated with security groups that allow inbound access from '0.0.0.0/0'.",
provider="aws", provider="aws",
cypher=f""" cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet', provider_id: $provider_id}}) OPTIONAL MATCH (internet:Internet {{_provider_id: $provider_id}})
YIELD node AS internet
// Match EC2 instances that are internet-exposed with open security groups (0.0.0.0/0) // Match EC2 instances that are internet-exposed with open security groups (0.0.0.0/0)
MATCH path_ec2 = (aws:AWSAccount {{id: $provider_uid}})--(ec2:EC2Instance)--(sg:EC2SecurityGroup)--(ipi:IpPermissionInbound)--(ir:IpRange) MATCH path_ec2 = (aws:AWSAccount {{id: $provider_uid}})--(ec2:EC2Instance)--(sg:EC2SecurityGroup)--(ipi:IpPermissionInbound)--(ir:IpRange)
WHERE ec2.exposed_internet = true WHERE ec2.exposed_internet = true
AND ir.range = "0.0.0.0/0" AND ir.range = "0.0.0.0/0"
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{provider_id: $provider_id}}, ec2) OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(ec2)
YIELD rel AS can_access
UNWIND nodes(path_ec2) as n UNWIND nodes(path_ec2) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}}) OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}})
@@ -231,14 +225,12 @@ AWS_CLASSIC_ELB_INTERNET_EXPOSED = AttackPathsQueryDefinition(
description="Find Classic Load Balancers exposed to the internet along with their listeners.", description="Find Classic Load Balancers exposed to the internet along with their listeners.",
provider="aws", provider="aws",
cypher=f""" cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet', provider_id: $provider_id}}) OPTIONAL MATCH (internet:Internet {{_provider_id: $provider_id}})
YIELD node AS internet
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(elb:LoadBalancer)--(listener:ELBListener) MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(elb:LoadBalancer)--(listener:ELBListener)
WHERE elb.exposed_internet = true WHERE elb.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{provider_id: $provider_id}}, elb) OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(elb)
YIELD rel AS can_access
UNWIND nodes(path) as n UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}}) OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}})
@@ -255,14 +247,12 @@ AWS_ELBV2_INTERNET_EXPOSED = AttackPathsQueryDefinition(
description="Find ELBv2 load balancers exposed to the internet along with their listeners.", description="Find ELBv2 load balancers exposed to the internet along with their listeners.",
provider="aws", provider="aws",
cypher=f""" cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet', provider_id: $provider_id}}) OPTIONAL MATCH (internet:Internet {{_provider_id: $provider_id}})
YIELD node AS internet
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(elbv2:LoadBalancerV2)--(listener:ELBV2Listener) MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(elbv2:LoadBalancerV2)--(listener:ELBV2Listener)
WHERE elbv2.exposed_internet = true WHERE elbv2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{provider_id: $provider_id}}, elbv2) OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(elbv2)
YIELD rel AS can_access
UNWIND nodes(path) as n UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}}) OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}})
@@ -279,31 +269,15 @@ AWS_PUBLIC_IP_RESOURCE_LOOKUP = AttackPathsQueryDefinition(
description="Given a public IP address, find the related AWS resource and its adjacent node within the selected account.", description="Given a public IP address, find the related AWS resource and its adjacent node within the selected account.",
provider="aws", provider="aws",
cypher=f""" cypher=f"""
CALL apoc.create.vNode(['Internet'], {{id: 'Internet', name: 'Internet', provider_id: $provider_id}}) OPTIONAL MATCH (internet:Internet {{_provider_id: $provider_id}})
YIELD node AS internet
CALL () {{ MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x)-[q]-(y)
MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:EC2PrivateIp)-[q]-(y) WHERE (x:EC2PrivateIp AND x.public_ip = $ip)
WHERE x.public_ip = $ip OR (x:EC2Instance AND x.publicipaddress = $ip)
RETURN path, x OR (x:NetworkInterface AND x.public_ip = $ip)
OR (x:ElasticIPAddress AND x.public_ip = $ip)
UNION MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:EC2Instance)-[q]-(y) OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(x)
WHERE x.publicipaddress = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:NetworkInterface)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {{id: $provider_uid}})-[r]-(x:ElasticIPAddress)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
}}
WITH path, x, internet
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {{provider_id: $provider_id}}, x)
YIELD rel AS can_access
UNWIND nodes(path) as n UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}}) OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}})

View File

@@ -7,7 +7,7 @@ description: >
license: Apache-2.0 license: Apache-2.0
metadata: metadata:
author: prowler-cloud author: prowler-cloud
version: "1.0" version: "1.1"
scope: [root, api] scope: [root, api]
auto_invoke: auto_invoke:
- "Creating Attack Paths queries" - "Creating Attack Paths queries"
@@ -80,7 +80,16 @@ api/src/backend/api/attack_paths/queries/{provider}.py
Example: `api/src/backend/api/attack_paths/queries/aws.py` Example: `api/src/backend/api/attack_paths/queries/aws.py`
### Query Definition Pattern ### Query parameters for provider scoping
Two parameters exist. Both are injected automatically by the query runner.
| Parameter | Property it matches | Used on | Purpose |
| --------------- | ------------------- | -------------- | ------------------------------------ |
| `$provider_uid` | `id` | `AWSAccount` | Scopes to a specific AWS account |
| `$provider_id` | `_provider_id` | Any other node | Scopes nodes to the provider context |
### Privilege Escalation Query Pattern
```python ```python
from api.attack_paths.queries.types import ( from api.attack_paths.queries.types import (
@@ -88,7 +97,6 @@ from api.attack_paths.queries.types import (
AttackPathsQueryDefinition, AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition, AttackPathsQueryParameterDefinition,
) )
from tasks.jobs.attack_paths.config import PROWLER_FINDING_LABEL
# {REFERENCE_ID} (e.g., EC2-001, GLUE-001) # {REFERENCE_ID} (e.g., EC2-001, GLUE-001)
AWS_{QUERY_NAME} = AttackPathsQueryDefinition( AWS_{QUERY_NAME} = AttackPathsQueryDefinition(
@@ -129,7 +137,7 @@ AWS_{QUERY_NAME} = AttackPathsQueryDefinition(
) )
UNWIND nodes(path_principal) + nodes(path_target) as n UNWIND nodes(path_principal) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {{status: 'FAIL', provider_uid: $provider_uid}}) OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {{status: 'FAIL', provider_uid: $provider_uid}})
RETURN path_principal, path_target, RETURN path_principal, path_target,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
@@ -138,6 +146,36 @@ AWS_{QUERY_NAME} = AttackPathsQueryDefinition(
) )
``` ```
### Network Exposure Query Pattern
```python
AWS_{QUERY_NAME} = AttackPathsQueryDefinition(
id="aws-{kebab-case-name}",
name="{Human-friendly label}",
short_description="{Brief explanation.}",
description="{Detailed description.}",
provider="aws",
cypher=f"""
// Match the Internet sentinel node
OPTIONAL MATCH (internet:Internet {{_provider_id: $provider_id}})
// Match exposed resources (MUST chain from `aws`)
MATCH path = (aws:AWSAccount {{id: $provider_uid}})--(resource:EC2Instance)
WHERE resource.exposed_internet = true
// Link Internet to resource
OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(resource)
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {{status: 'FAIL', provider_uid: $provider_uid}})
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr,
internet, can_access
""",
parameters=[],
)
```
### Register in Query List ### Register in Query List
Add to the `{PROVIDER}_QUERIES` list at the bottom of the file: Add to the `{PROVIDER}_QUERIES` list at the bottom of the file:
@@ -219,6 +257,7 @@ https://raw.githubusercontent.com/cartography-cncf/cartography/refs/tags/0.126.0
- `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties - `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties
- `ProviderResource` - Generic resource marker - `ProviderResource` - Generic resource marker
- `{Provider}Resource` - Provider-specific marker (e.g., `AWSResource`) - `{Provider}Resource` - Provider-specific marker (e.g., `AWSResource`)
- `Internet` - Internet sentinel node with `_provider_id` property (used in network exposure queries)
These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`. These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`.
@@ -234,7 +273,7 @@ This informs query design by showing what data is actually available to query.
### 4. Create Query Definition ### 4. Create Query Definition
Use the standard pattern (see above) with: Use the appropriate pattern (privilege escalation or network exposure) with:
- **id**: Auto-generated as `{provider}-{kebab-case-description}` - **id**: Auto-generated as `{provider}-{kebab-case-description}`
- **name**: Short, human-friendly label. No raw IAM permissions. For sourced queries (e.g., pathfinding.cloud), append the reference ID in parentheses: `"EC2 Instance Launch with Privileged Role (EC2-001)"`. If the name already has parentheses, prepend the ID inside them: `"ECS Service Creation with Privileged Role (ECS-003 - Existing Cluster)"`. - **name**: Short, human-friendly label. No raw IAM permissions. For sourced queries (e.g., pathfinding.cloud), append the reference ID in parentheses: `"EC2 Instance Launch with Privileged Role (EC2-001)"`. If the name already has parentheses, prepend the ID inside them: `"ECS Service Creation with Privileged Role (ECS-003 - Existing Cluster)"`.
@@ -263,7 +302,7 @@ Examples:
- `aws-ec2-privesc-passrole-iam` - `aws-ec2-privesc-passrole-iam`
- `aws-iam-privesc-attach-role-policy-assume-role` - `aws-iam-privesc-attach-role-policy-assume-role`
- `aws-rds-unencrypted-storage` - `aws-ec2-instances-internet-exposed`
### Query Constant Name ### Query Constant Name
@@ -275,7 +314,7 @@ Examples:
- `AWS_EC2_PRIVESC_PASSROLE_IAM` - `AWS_EC2_PRIVESC_PASSROLE_IAM`
- `AWS_IAM_PRIVESC_ATTACH_ROLE_POLICY_ASSUME_ROLE` - `AWS_IAM_PRIVESC_ATTACH_ROLE_POLICY_ASSUME_ROLE`
- `AWS_RDS_UNENCRYPTED_STORAGE` - `AWS_EC2_INSTANCES_INTERNET_EXPOSED`
--- ---
@@ -325,46 +364,91 @@ WHERE any(resource IN stmt.resource WHERE
) )
``` ```
### Match Internet Sentinel Node
Used in network exposure queries. The Internet node is a real graph node, scoped by `_provider_id`:
```cypher
OPTIONAL MATCH (internet:Internet {_provider_id: $provider_id})
```
### Link Internet to Exposed Resource
The `CAN_ACCESS` relationship is a real graph relationship linking the Internet node to exposed resources:
```cypher
OPTIONAL MATCH (internet)-[can_access:CAN_ACCESS]->(resource)
```
### Multi-label OR (match multiple resource types)
When a query needs to match different resource types in the same position, use label checks in WHERE:
```cypher
MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x)-[q]-(y)
WHERE (x:EC2PrivateIp AND x.public_ip = $ip)
OR (x:EC2Instance AND x.publicipaddress = $ip)
OR (x:NetworkInterface AND x.public_ip = $ip)
OR (x:ElasticIPAddress AND x.public_ip = $ip)
```
### Include Prowler Findings ### Include Prowler Findings
```cypher ```cypher
UNWIND nodes(path_principal) + nodes(path_target) as n UNWIND nodes(path_principal) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:{PROWLER_FINDING_LABEL} {status: 'FAIL', provider_uid: $provider_uid}) OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding {status: 'FAIL', provider_uid: $provider_uid})
RETURN path_principal, path_target, RETURN path_principal, path_target,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
``` ```
For network exposure queries, also return the internet node and relationship:
```cypher
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr,
internet, can_access
```
--- ---
## Common Node Labels by Provider ## Common Node Labels by Provider
### AWS ### AWS
| Label | Description | | Label | Description |
| -------------------- | ----------------------------------- | | --------------------- | --------------------------------------- |
| `AWSAccount` | AWS account root | | `AWSAccount` | AWS account root |
| `AWSPrincipal` | IAM principal (user, role, service) | | `AWSPrincipal` | IAM principal (user, role, service) |
| `AWSRole` | IAM role | | `AWSRole` | IAM role |
| `AWSUser` | IAM user | | `AWSUser` | IAM user |
| `AWSPolicy` | IAM policy | | `AWSPolicy` | IAM policy |
| `AWSPolicyStatement` | Policy statement | | `AWSPolicyStatement` | Policy statement |
| `EC2Instance` | EC2 instance | | `AWSTag` | Resource tag (key/value) |
| `EC2SecurityGroup` | Security group | | `EC2Instance` | EC2 instance |
| `S3Bucket` | S3 bucket | | `EC2SecurityGroup` | Security group |
| `RDSInstance` | RDS database instance | | `EC2PrivateIp` | EC2 private IP (has `public_ip`) |
| `LoadBalancer` | Classic ELB | | `IpPermissionInbound` | Inbound security group rule |
| `LoadBalancerV2` | ALB/NLB | | `IpRange` | IP range (e.g., `0.0.0.0/0`) |
| `LaunchTemplate` | EC2 launch template | | `NetworkInterface` | ENI (has `public_ip`) |
| `ElasticIPAddress` | Elastic IP (has `public_ip`) |
| `S3Bucket` | S3 bucket |
| `RDSInstance` | RDS database instance |
| `LoadBalancer` | Classic ELB |
| `LoadBalancerV2` | ALB/NLB |
| `ELBListener` | Classic ELB listener |
| `ELBV2Listener` | ALB/NLB listener |
| `LaunchTemplate` | EC2 launch template |
| `Internet` | Internet sentinel node (`_provider_id`) |
### Common Relationships ### Common Relationships
| Relationship | Description | | Relationship | Description |
| ---------------------- | ----------------------- | | ---------------------- | ---------------------------------- |
| `TRUSTS_AWS_PRINCIPAL` | Role trust relationship | | `TRUSTS_AWS_PRINCIPAL` | Role trust relationship |
| `STS_ASSUMEROLE_ALLOW` | Can assume role | | `STS_ASSUMEROLE_ALLOW` | Can assume role |
| `POLICY` | Has policy attached | | `CAN_ACCESS` | Internet-to-resource exposure link |
| `STATEMENT` | Policy has statement | | `POLICY` | Has policy attached |
| `STATEMENT` | Policy has statement |
--- ---
@@ -393,7 +477,7 @@ parameters=[
## Best Practices ## Best Practices
1. **Always filter by provider_uid**: Use `{id: $provider_uid}` on account nodes and `{provider_uid: $provider_uid}` on ProwlerFinding nodes 1. **Always scope by provider**: Use `{id: $provider_uid}` on `AWSAccount` nodes. Use `{_provider_id: $provider_id}` on any other node that needs provider scoping (e.g., `Internet`).
2. **Use consistent naming**: Follow existing patterns in the file 2. **Use consistent naming**: Follow existing patterns in the file
@@ -415,6 +499,8 @@ parameters=[
MATCH (aws)--(role:AWSRole) WHERE role.name = 'admin' MATCH (aws)--(role:AWSRole) WHERE role.name = 'admin'
``` ```
The `Internet` node is an exception: it uses `OPTIONAL MATCH` with `_provider_id` for scoping instead of chaining from `aws`.
--- ---
## openCypher Compatibility ## openCypher Compatibility
@@ -425,23 +511,14 @@ Queries must be written in **openCypher Version 9** to ensure compatibility with
### Avoid These (Not in openCypher spec) ### Avoid These (Not in openCypher spec)
| Feature | Reason | | Feature | Reason | Use instead |
| --------------------------------------------------- | ----------------------------------------------- | | -------------------------- | ----------------------------------------------- | ------------------------------------------------------ |
| APOC procedures (`apoc.*`) | Neo4j-specific plugin, not available in Neptune | | APOC procedures (`apoc.*`) | Neo4j-specific plugin, not available in Neptune | Real nodes and relationships in the graph |
| Virtual nodes (`apoc.create.vNode`) | APOC-specific | | Neptune extensions | Not available in Neo4j | Standard openCypher |
| Virtual relationships (`apoc.create.vRelationship`) | APOC-specific | | `reduce()` function | Not in openCypher spec | `UNWIND` + `collect()` |
| Neptune extensions | Not available in Neo4j | | `FOREACH` clause | Not in openCypher spec | `WITH` + `UNWIND` + `SET` |
| `reduce()` function | Use `UNWIND` + aggregation instead | | Regex operator (`=~`) | Not supported in Neptune | `toLower()` + exact match, or `CONTAINS`/`STARTS WITH` |
| `FOREACH` clause | Use `WITH` + `UNWIND` + `SET` instead | | `CALL () { UNION }` | Complex, hard to maintain | Multi-label OR in WHERE (see patterns section) |
| Regex match operator (`=~`) | Not supported in Neptune |
### CALL Subqueries
Supported with limitations:
- Use `WITH` clause to import variables: `CALL { WITH var ... }`
- Updates inside CALL subqueries are NOT supported
- Emitted variables cannot overlap with variables before the CALL
--- ---
@@ -451,7 +528,7 @@ Supported with limitations:
- **Repository**: https://github.com/DataDog/pathfinding.cloud - **Repository**: https://github.com/DataDog/pathfinding.cloud
- **All paths JSON**: `https://raw.githubusercontent.com/DataDog/pathfinding.cloud/main/docs/paths.json` - **All paths JSON**: `https://raw.githubusercontent.com/DataDog/pathfinding.cloud/main/docs/paths.json`
- Use WebFetch to query specific paths or list available services - Always use Bash with `curl | jq` to fetch paths (WebFetch truncates the large JSON)
### Cartography Schema ### Cartography Schema
@@ -461,7 +538,6 @@ Supported with limitations:
### openCypher Specification ### openCypher Specification
- **Neptune openCypher compliance** (what Neptune supports): https://docs.aws.amazon.com/neptune/latest/userguide/feature-opencypher-compliance.html - **Neptune openCypher compliance** (what Neptune supports): https://docs.aws.amazon.com/neptune/latest/userguide/feature-opencypher-compliance.html
- **Rewriting Cypher for Neptune** (converting Neo4j-specific syntax): https://docs.aws.amazon.com/neptune/latest/userguide/migration-opencypher-rewrites.html
- **openCypher project** (spec, grammar, TCK): https://github.com/opencypher/openCypher - **openCypher project** (spec, grammar, TCK): https://github.com/opencypher/openCypher
--- ---
@@ -485,13 +561,4 @@ Use the existing queries to learn:
- How to include Prowler findings - How to include Prowler findings
- Comment style - Comment style
> **Compatibility Warning**: Some existing queries use Neo4j-specific features **DO NOT** use generic templates. Match the exact style of existing queries in the file.
> (e.g., `apoc.create.vNode`, `apoc.create.vRelationship`, regex `=~`) that are
> **NOT compatible** with Amazon Neptune. Use these queries to learn general
> patterns (structure, naming, Prowler findings integration, comment style) but
> **DO NOT copy APOC procedures or other Neo4j-specific syntax** into new queries.
> New queries must be pure openCypher Version 9. Refer to the
> [openCypher Compatibility](#opencypher-compatibility) section for the full list
> of features to avoid.
**DO NOT** use generic templates. Match the exact style of existing **compatible** queries in the file.