Merge branch 'master' of github.com:prowler-cloud/prowler into pr11569-coderabbit-fix

2026-07-04 19:21:51 +00:00 · 2026-06-30 16:05:15 +01:00
parent bcd12863a0 c3ce3d2b3c
commit 18f9cce469
1245 changed files with 106147 additions and 12120 deletions
@@ -6,14 +6,20 @@
 PROWLER_UI_VERSION="stable"
 AUTH_URL=http://localhost:3000
 API_BASE_URL=http://prowler-api:8080/api/v1
+# deprecated, use UI_API_BASE_URL
 NEXT_PUBLIC_API_BASE_URL=${API_BASE_URL}
+UI_API_BASE_URL=${API_BASE_URL}
+# deprecated, use UI_API_DOCS_URL
 NEXT_PUBLIC_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
+UI_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
 AUTH_TRUST_HOST=true
 UI_PORT=3000
 # openssl rand -base64 32
 AUTH_SECRET="N/c6mnaS5+SWq81+819OrzQZlmx1Vxtp/orjttJSmw8="
-# Google Tag Manager ID
+# Google Tag Manager ID (empty/unset ⇒ GTM not loaded, zero egress)
+# deprecated, use UI_GOOGLE_TAG_MANAGER_ID
 NEXT_PUBLIC_GOOGLE_TAG_MANAGER_ID=""
+UI_GOOGLE_TAG_MANAGER_ID=""

 #### MCP Server ####
 PROWLER_MCP_VERSION=stable
@@ -139,13 +145,19 @@ DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
 DJANGO_SENTRY_DSN=
 DJANGO_THROTTLE_TOKEN_OBTAIN=50/minute

-# Sentry settings
-SENTRY_ENVIRONMENT=local
+# Sentry for the web app (server + browser). Empty/unset UI_SENTRY_DSN ⇒
+# Sentry disabled, zero egress. SENTRY_RELEASE (unprefixed) feeds the web app's
+# server/edge SDKs.
+UI_SENTRY_DSN=
+UI_SENTRY_ENVIRONMENT=local
 SENTRY_RELEASE=local
-NEXT_PUBLIC_SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT}
+# Reserved runtime public config (registered now; no UI consumer yet)
+# POSTHOG_KEY=
+# POSTHOG_HOST=
+# REO_DEV_CLIENT_ID=

 #### Prowler release version ####
-NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.31.0
+NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.32.0

 # Social login credentials
 SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
@@ -1,7 +1,6 @@
 # SDK
 /* @prowler-cloud/detection-remediation
 /prowler/ @prowler-cloud/detection-remediation
-/prowler/compliance/ @prowler-cloud/compliance
 /tests/ @prowler-cloud/detection-remediation
 /dashboard/ @prowler-cloud/detection-remediation
 /docs/ @prowler-cloud/detection-remediation
@@ -1,5 +1,5 @@
 name: 'OSV-Scanner'
-description: 'Install osv-scanner and scan a lockfile, failing on HIGH/CRITICAL/UNKNOWN severity findings. Posts/updates a PR comment with findings on pull_request events (requires pull-requests: write).'
+description: 'Install osv-scanner and scan a lockfile, failing on CRITICAL severity findings. Posts/updates a PR comment with findings on pull_request events (requires pull-requests: write).'
 author: 'Prowler'

 inputs:
@@ -7,9 +7,9 @@ inputs:
    description: 'Path to the lockfile to scan, relative to the repository root (e.g. uv.lock, api/uv.lock, ui/pnpm-lock.yaml).'
    required: true
  severity-levels:
-    description: 'Comma-separated severity levels that fail the scan. Default: HIGH,CRITICAL,UNKNOWN.'
+    description: 'Comma-separated severity levels that fail the scan. Default: CRITICAL.'
    required: false
-    default: 'HIGH,CRITICAL,UNKNOWN'
+    default: 'CRITICAL'
  version:
    description: 'osv-scanner release tag to install. When overriding, you MUST also override binary-sha256.'
    required: false
@@ -43,8 +43,17 @@ runs:
      if: github.repository_owner == 'prowler-cloud' && github.repository != 'prowler-cloud/prowler'
      shell: bash
      working-directory: ${{ inputs.working-directory }}
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
      run: |
-        LATEST_COMMIT=$(curl -s "https://api.github.com/repos/prowler-cloud/prowler/commits/master" | jq -r '.sha')
+        LATEST_COMMIT=$(curl -sf --retry 3 --retry-all-errors --retry-delay 2 --retry-max-time 60 \
+          -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+          -H "Accept: application/vnd.github+json" \
+          "https://api.github.com/repos/prowler-cloud/prowler/commits/master" \
+          | jq -er '.sha') || {
+          echo "::error::Failed to fetch latest prowler/master commit from the GitHub API (HTTP error or missing .sha). Check the GITHUB_TOKEN and API rate limits."
+          exit 1
+        }
        echo "Latest commit hash: $LATEST_COMMIT"
        sed -i "s|\(git = \"https://github\.com/prowler-cloud/prowler\.git?rev=master\)#[a-f0-9]\{40\}\"|\1#${LATEST_COMMIT}\"|g" uv.lock
        echo "Updated uv.lock entry:"
@@ -54,8 +63,17 @@ runs:
      if: github.event_name == 'push' && github.ref == 'refs/heads/master' && github.repository == 'prowler-cloud/prowler'
      shell: bash
      working-directory: ${{ inputs.working-directory }}
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
      run: |
-        LATEST_COMMIT=$(curl -s "https://api.github.com/repos/prowler-cloud/prowler/commits/master" | jq -r '.sha')
+        LATEST_COMMIT=$(curl -sf --retry 3 --retry-all-errors --retry-delay 2 --retry-max-time 60 \
+          -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+          -H "Accept: application/vnd.github+json" \
+          "https://api.github.com/repos/prowler-cloud/prowler/commits/master" \
+          | jq -er '.sha') || {
+          echo "::error::Failed to fetch latest prowler/master commit from the GitHub API (HTTP error or missing .sha). Check the GITHUB_TOKEN and API rate limits."
+          exit 1
+        }
        echo "Latest commit hash: $LATEST_COMMIT"
        sed -i "s|\(git = \"https://github\.com/prowler-cloud/prowler\.git?rev=master\)#[a-f0-9]\{40\}\"|\1#${LATEST_COMMIT}\"|g" uv.lock
        echo "Updated uv.lock entry:"
@@ -63,7 +63,7 @@ runs:
        exit-code: '0'
        scanners: 'vuln'
        timeout: '5m'
-        version: 'v0.69.2'
+        version: 'v0.71.2'

    - name: Run Trivy vulnerability scan (SARIF)
      if: inputs.upload-sarif == 'true' && github.event_name == 'push'
@@ -76,7 +76,7 @@ runs:
        exit-code: '0'
        scanners: 'vuln'
        timeout: '5m'
-        version: 'v0.69.2'
+        version: 'v0.71.2'

    - name: Upload Trivy results to GitHub Security tab
      if: inputs.upload-sarif == 'true' && github.event_name == 'push'
@@ -77,6 +77,11 @@ provider/okta:
      - any-glob-to-any-file: "prowler/providers/okta/**"
      - any-glob-to-any-file: "tests/providers/okta/**"

+provider/linode:
+  - changed-files:
+      - any-glob-to-any-file: "prowler/providers/linode/**"
+      - any-glob-to-any-file: "tests/providers/linode/**"
+
 github_actions:
  - changed-files:
      - any-glob-to-any-file: ".github/workflows/*"
@@ -6,8 +6,7 @@
 #   - .github/workflows/api-security.yml, sdk-security.yml, ui-security.yml
 #
 # Severity levels (comma-separated) are read from OSV_SEVERITY_LEVELS.
-# Default: HIGH,CRITICAL,UNKNOWN — preserves prior .safety-policy.yml policy
-#   (ignore-cvss-severity-below: 7 + ignore-cvss-unknown-severity: False).
+# Default: CRITICAL — only CVSS >= 9.0 findings fail the scan.
 # osv-scanner has no native CVSS threshold (google/osv-scanner#1400, closed
 # not-planned). Severity is derived from $group.max_severity (numeric CVSS
 # score string) which osv-scanner emits per group.
@@ -33,7 +32,7 @@ set -euo pipefail

 ROOT="$(git rev-parse --show-toplevel)"
 CONFIG="${ROOT}/osv-scanner.toml"
-SEVERITY_LEVELS="${OSV_SEVERITY_LEVELS:-HIGH,CRITICAL,UNKNOWN}"
+SEVERITY_LEVELS="${OSV_SEVERITY_LEVELS:-CRITICAL}"

 for bin in osv-scanner jq; do
  if ! command -v "${bin}" >/dev/null 2>&1; then
@@ -272,27 +272,3 @@ jobs:
          payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
          step-outcome: ${{ steps.outcome.outputs.outcome }}
          update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
-
-  trigger-deployment:
-    needs: [setup, container-build-push]
-    if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    permissions:
-      contents: read
-
-    steps:
-      - name: Harden Runner
-        uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
-        with:
-          egress-policy: block
-          allowed-endpoints: >
-            api.github.com:443
-
-      - name: Trigger API deployment
-        uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
-        with:
-          token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
-          repository: ${{ secrets.CLOUD_DISPATCH }}
-          event-type: api-prowler-deployment
-          client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
@@ -12,9 +12,6 @@ on:
    branches:
      - 'master'
      - 'v5.*'
-    paths:
-      - 'api/**'
-      - '.github/workflows/api-container-checks.yml'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -134,5 +131,5 @@ jobs:
        with:
          image-name: ${{ env.IMAGE_NAME }}
          image-tag: ${{ github.sha }}
-          fail-on-critical: 'false'
+          fail-on-critical: 'true'
          severity: 'CRITICAL'
@@ -16,13 +16,6 @@ on:
    branches:
      - "master"
      - "v5.*"
-    paths:
-      - 'api/**'
-      - '.github/workflows/api-tests.yml'
-      - '.github/workflows/api-security.yml'
-      - '.github/actions/setup-python-uv/**'
-      - '.github/actions/osv-scanner/**'
-      - '.github/scripts/osv-scan.sh'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -29,10 +29,11 @@ jobs:
        with:
          # We can't block as Trufflehog needs to verify secrets against vendors
          egress-policy: audit
-          # allowed-endpoints: >
-          #   github.com:443
-          #   ghcr.io:443
-          #   pkg-containers.githubusercontent.com:443
+          allowed-endpoints: >
+            github.com:443
+            ghcr.io:443
+            pkg-containers.githubusercontent.com:443
+            www.formbucket.com:443

      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -263,27 +263,3 @@ jobs:
          payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
          step-outcome: ${{ steps.outcome.outputs.outcome }}
          update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
-
-  trigger-deployment:
-    needs: [setup, container-build-push]
-    if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    permissions:
-      contents: read
-
-    steps:
-      - name: Harden Runner
-        uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
-        with:
-          egress-policy: block
-          allowed-endpoints: >
-            api.github.com:443
-
-      - name: Trigger MCP deployment
-        uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
-        with:
-          token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
-          repository: ${{ secrets.CLOUD_DISPATCH }}
-          event-type: mcp-prowler-deployment
-          client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
@@ -12,9 +12,6 @@ on:
    branches:
      - 'master'
      - 'v5.*'
-    paths:
-      - 'mcp_server/**'
-      - '.github/workflows/mcp-container-checks.yml'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -127,5 +124,5 @@ jobs:
        with:
          image-name: ${{ env.IMAGE_NAME }}
          image-tag: ${{ github.sha }}
-          fail-on-critical: 'false'
+          fail-on-critical: 'true'
          severity: 'CRITICAL'
@@ -15,12 +15,6 @@ on:
    branches:
      - 'master'
      - 'v5.*'
-    paths:
-      - 'mcp_server/pyproject.toml'
-      - 'mcp_server/uv.lock'
-      - '.github/workflows/mcp-security.yml'
-      - '.github/actions/osv-scanner/**'
-      - '.github/scripts/osv-scan.sh'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -30,7 +24,6 @@ permissions: {}

 jobs:
  mcp-security-scans:
-    if: github.repository == 'prowler-cloud/prowler'
    runs-on: ubuntu-latest
    timeout-minutes: 15
    permissions:
@@ -29,6 +29,7 @@ jobs:
          - '3.10'
          - '3.11'
          - '3.12'
+          - '3.13'

    steps:
      - name: Harden Runner
@@ -15,12 +15,6 @@ on:
    branches:
      - 'master'
      - 'v5.*'
-    paths:
-      - 'prowler/**'
-      - 'Dockerfile*'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - '.github/workflows/sdk-container-checks.yml'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -111,25 +105,14 @@ jobs:
        id: check-changes
        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
-          files: ./**
+          files: |
+            prowler/**
+            Dockerfile*
+            pyproject.toml
+            uv.lock
+            .github/workflows/sdk-container-checks.yml
          files_ignore: |
-            .github/**
            prowler/CHANGELOG.md
-            docs/**
-            permissions/**
-            api/**
-            ui/**
-            dashboard/**
-            mcp_server/**
-            skills/**
-            README.md
-            mkdocs.yml
-            .backportrc.json
-            .env
-            docker-compose*
-            examples/**
-            .gitignore
-            contrib/**
            **/AGENTS.md

      - name: Set up Docker Buildx
@@ -153,5 +136,5 @@ jobs:
        with:
          image-name: ${{ env.IMAGE_NAME }}
          image-tag: ${{ github.sha }}
-          fail-on-critical: 'false'
+          fail-on-critical: 'true'
          severity: 'CRITICAL'
@@ -19,16 +19,6 @@ on:
    branches:
      - 'master'
      - 'v5.*'
-    paths:
-      - 'prowler/**'
-      - 'tests/**'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - '.github/workflows/sdk-tests.yml'
-      - '.github/workflows/sdk-security.yml'
-      - '.github/actions/setup-python-uv/**'
-      - '.github/actions/osv-scanner/**'
-      - '.github/scripts/osv-scan.sh'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -71,27 +61,18 @@ jobs:
        id: check-changes
        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
-          files:
-            ./**
+          files: |
+            prowler/**
+            tests/**
+            pyproject.toml
+            uv.lock
+            .github/workflows/sdk-tests.yml
            .github/workflows/sdk-security.yml
+            .github/actions/setup-python-uv/**
+            .github/actions/osv-scanner/**
+            .github/scripts/osv-scan.sh
          files_ignore: |
-            .github/**
            prowler/CHANGELOG.md
-            docs/**
-            permissions/**
-            api/**
-            ui/**
-            dashboard/**
-            mcp_server/**
-            skills/**
-            README.md
-            mkdocs.yml
-            .backportrc.json
-            .env
-            docker-compose*
-            examples/**
-            .gitignore
-            contrib/**
            **/AGENTS.md

      - name: Setup Python with uv
@@ -29,6 +29,7 @@ jobs:
          - '3.10'
          - '3.11'
          - '3.12'
+          - '3.13'

    steps:
      - name: Harden Runner
@@ -540,7 +541,7 @@ jobs:
        with:
          flags: prowler-py${{ matrix.python-version }}-vercel
          files: ./vercel_coverage.xml
-      
+
      # Scaleway Provider
      - name: Check if Scaleway files changed
        if: steps.check-changes.outputs.any_changed == 'true'
@@ -588,7 +589,31 @@ jobs:
        with:
          flags: prowler-py${{ matrix.python-version }}-stackit
          files: ./stackit_coverage.xml
- 
+
+      # Linode Provider
+      - name: Check if Linode files changed
+        if: steps.check-changes.outputs.any_changed == 'true'
+        id: changed-linode
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
+        with:
+          files: |
+            ./prowler/**/linode/**
+            ./tests/**/linode/**
+            ./uv.lock
+
+      - name: Run Linode tests
+        if: steps.changed-linode.outputs.any_changed == 'true'
+        run: uv run pytest -n auto --cov=./prowler/providers/linode --cov-report=xml:linode_coverage.xml tests/providers/linode
+
+      - name: Upload Linode coverage to Codecov
+        if: steps.changed-linode.outputs.any_changed == 'true'
+        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+        with:
+          flags: prowler-py${{ matrix.python-version }}-linode
+          files: ./linode_coverage.xml
+
      # External Provider (dynamic loading)
      - name: Check if External Provider files changed
        if: steps.check-changes.outputs.any_changed == 'true'
@@ -608,14 +633,14 @@ jobs:

      - name: Upload External Provider coverage to Codecov
        if: steps.changed-external.outputs.any_changed == 'true'
-     
+
        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
        env:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
        with:
          flags: prowler-py${{ matrix.python-version }}-external
          files: ./external_coverage.xml
-          
+
      # Lib
      - name: Check if Lib files changed
        if: steps.check-changes.outputs.any_changed == 'true'
@@ -32,9 +32,6 @@ env:
  PROWLERCLOUD_DOCKERHUB_REPOSITORY: prowlercloud
  PROWLERCLOUD_DOCKERHUB_IMAGE: prowler-ui

-  # Build args
-  NEXT_PUBLIC_API_BASE_URL: http://prowler-api:8080/api/v1
-
 permissions: {}

 jobs:
@@ -146,7 +143,6 @@ jobs:
          context: ${{ env.WORKING_DIRECTORY }}
          build-args: |
            NEXT_PUBLIC_PROWLER_RELEASE_VERSION=${{ (github.event_name == 'release' || github.event_name == 'workflow_dispatch') && format('v{0}', env.RELEASE_TAG) || needs.setup.outputs.short-sha }}
-            NEXT_PUBLIC_API_BASE_URL=${{ env.NEXT_PUBLIC_API_BASE_URL }}
          push: true
          platforms: ${{ matrix.platform }}
          tags: |
@@ -262,27 +258,3 @@ jobs:
          payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
          step-outcome: ${{ steps.outcome.outputs.outcome }}
          update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
-
-  trigger-deployment:
-    needs: [setup, container-build-push]
-    if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    permissions:
-      contents: read
-
-    steps:
-      - name: Harden Runner
-        uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
-        with:
-          egress-policy: block
-          allowed-endpoints: >
-            api.github.com:443
-
-      - name: Trigger UI deployment
-        uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
-        with:
-          token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
-          repository: ${{ secrets.CLOUD_DISPATCH }}
-          event-type: ui-prowler-deployment
-          client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
@@ -12,9 +12,6 @@ on:
    branches:
      - 'master'
      - 'v5.*'
-    paths:
-      - 'ui/**'
-      - '.github/workflows/ui-container-checks.yml'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -132,5 +129,5 @@ jobs:
        with:
          image-name: ${{ env.IMAGE_NAME }}
          image-tag: ${{ github.sha }}
-          fail-on-critical: 'false'
+          fail-on-critical: 'true'
          severity: 'CRITICAL'
@@ -81,7 +81,8 @@ jobs:
      AUTH_SECRET: 'fallback-ci-secret-for-testing'
      AUTH_TRUST_HOST: true
      NEXTAUTH_URL: 'http://localhost:3000'
-      NEXT_PUBLIC_API_BASE_URL: 'http://localhost:8080/api/v1'
+      AUTH_URL: 'http://localhost:3000'
+      UI_API_BASE_URL: 'http://localhost:8080/api/v1'
      E2E_ADMIN_USER: ${{ secrets.E2E_ADMIN_USER }}
      E2E_ADMIN_PASSWORD: ${{ secrets.E2E_ADMIN_PASSWORD }}
      E2E_AWS_PROVIDER_ACCOUNT_ID: ${{ secrets.E2E_AWS_PROVIDER_ACCOUNT_ID }}
@@ -118,6 +119,14 @@ jobs:
      E2E_ALIBABACLOUD_ACCESS_KEY_ID: ${{ secrets.E2E_ALIBABACLOUD_ACCESS_KEY_ID }}
      E2E_ALIBABACLOUD_ACCESS_KEY_SECRET: ${{ secrets.E2E_ALIBABACLOUD_ACCESS_KEY_SECRET }}
      E2E_ALIBABACLOUD_ROLE_ARN: ${{ secrets.E2E_ALIBABACLOUD_ROLE_ARN }}
+      E2E_OKTA_DOMAIN: ${{ secrets.E2E_OKTA_DOMAIN }}
+      E2E_OKTA_CLIENT_ID: ${{ secrets.E2E_OKTA_CLIENT_ID }}
+      E2E_OKTA_BASE64_PRIVATE_KEY: ${{ secrets.E2E_OKTA_BASE64_PRIVATE_KEY }}
+      E2E_GOOGLEWORKSPACE_CUSTOMER_ID: ${{ secrets.E2E_GOOGLEWORKSPACE_CUSTOMER_ID }}
+      E2E_GOOGLEWORKSPACE_SERVICE_ACCOUNT_JSON: ${{ secrets.E2E_GOOGLEWORKSPACE_SERVICE_ACCOUNT_JSON }}
+      E2E_GOOGLEWORKSPACE_DELEGATED_USER: ${{ secrets.E2E_GOOGLEWORKSPACE_DELEGATED_USER }}
+      E2E_VERCEL_TEAM_ID: ${{ secrets.E2E_VERCEL_TEAM_ID }}
+      E2E_VERCEL_API_TOKEN: ${{ secrets.E2E_VERCEL_API_TOKEN }}
      # Pass E2E paths from impact analysis
      E2E_TEST_PATHS: ${{ needs.impact-analysis.outputs.ui-e2e }}
      RUN_ALL_TESTS: ${{ needs.impact-analysis.outputs.run-all }}
@@ -198,7 +207,7 @@ jobs:
          timeout=150
          elapsed=0
          while [ $elapsed -lt $timeout ]; do
-            if curl -s ${NEXT_PUBLIC_API_BASE_URL}/docs >/dev/null 2>&1; then
+            if curl -s ${UI_API_BASE_URL}/docs >/dev/null 2>&1; then
              echo "Prowler API is ready!"
              exit 0
            fi
@@ -15,12 +15,6 @@ on:
    branches:
      - 'master'
      - 'v5.*'
-    paths:
-      - 'ui/package.json'
-      - 'ui/pnpm-lock.yaml'
-      - '.github/workflows/ui-security.yml'
-      - '.github/actions/osv-scanner/**'
-      - '.github/scripts/osv-scan.sh'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -30,7 +24,6 @@ permissions: {}

 jobs:
  ui-security-scans:
-    if: github.repository == 'prowler-cloud/prowler'
    runs-on: ubuntu-latest
    timeout-minutes: 15
    permissions:
@@ -131,6 +131,10 @@ jobs:
        if: steps.check-changes.outputs.any_changed == 'true'
        run: pnpm run healthcheck

+      - name: Check product-tour alignment
+        if: steps.check-changes.outputs.any_changed == 'true'
+        run: pnpm run tour:check
+
      - name: Run pnpm audit
        if: steps.check-changes.outputs.any_changed == 'true'
        run: pnpm run audit
@@ -169,3 +169,7 @@ GEMINI.md

 # Claude Code
 .claude/*
+
+# Docker
+docker-compose.override.yml
+docker-compose-dev.override.yml
@@ -7,6 +7,10 @@
 #   P50 — dependency validation

 default_install_hook_types: [pre-commit]
+# Hooks run on commit only by default;
+# NOTE: default_stages does NOT override a hook's manifest stages, so fixers shipping pre-push in their
+# manifest need an explicit stages: ["pre-commit"] below to stay off push.
+default_stages: [pre-commit]

 repos:
  ## GENERAL (prek built-in — no external repo needed)
@@ -21,13 +25,16 @@ repos:
      - id: check-json
        priority: 10
      - id: end-of-file-fixer
+        stages: ["pre-commit"]
        priority: 0
      - id: trailing-whitespace
+        stages: ["pre-commit"]
        priority: 0
      - id: no-commit-to-branch
        priority: 10
      - id: pretty-format-json
        args: ["--autofix", --no-sort-keys, --no-ensure-ascii]
+        stages: ["pre-commit"]
        priority: 10

  ## TOML
@@ -82,6 +89,7 @@ repos:
        name: "SDK - isort"
        files: { glob: ["{prowler,tests,dashboard,util,scripts}/**/*.py"] }
        args: ["--profile", "black"]
+        stages: ["pre-commit"]
        priority: 20

  - repo: https://github.com/psf/black
@@ -102,17 +110,36 @@ repos:
        priority: 30

  ## PYTHON — API + MCP Server (ruff)
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.11
+  # Run ruff through `uv run` against each project so prek uses the exact ruff
+  # version pinned in that project's uv.lock — the same version GitHub Actions
+  # runs via `uv run ruff`. This removes the drift between the local hooks and
+  # CI. api/ and mcp_server/ are separate uv projects, so they need separate
+  # hooks (each `uv run --project` resolves its own pinned ruff + config).
+  - repo: local
    hooks:
-      - id: ruff
-        name: "API + MCP - ruff check"
-        files: { glob: ["{api,mcp_server}/**/*.py"] }
-        args: ["--fix"]
+      - id: ruff-check-api
+        name: "API - ruff check"
+        entry: uv run --project ./api ruff check --fix
+        language: system
+        files: { glob: ["api/**/*.py"] }
        priority: 30
-      - id: ruff-format
-        name: "API + MCP - ruff format"
-        files: { glob: ["{api,mcp_server}/**/*.py"] }
+      - id: ruff-format-api
+        name: "API - ruff format"
+        entry: uv run --project ./api ruff format
+        language: system
+        files: { glob: ["api/**/*.py"] }
+        priority: 20
+      - id: ruff-check-mcp
+        name: "MCP - ruff check"
+        entry: uv run --project ./mcp_server ruff check --fix
+        language: system
+        files: { glob: ["mcp_server/**/*.py"] }
+        priority: 30
+      - id: ruff-format-mcp
+        name: "MCP - ruff format"
+        entry: uv run --project ./mcp_server ruff format
+        language: system
+        files: { glob: ["mcp_server/**/*.py"] }
        priority: 20

  ## PYTHON — uv (API + SDK)
@@ -0,0 +1,98 @@
+# Trivy ignore file for prowlercloud/prowler SDK container image.
+# Each entry below documents (a) the affected package and why it ships in the
+# image, (b) why the CVE is not exploitable in Prowler's runtime, and (c) the
+# upstream fix status. Entries carry an expiry so they auto-force re-review.
+# Entries are scoped per-package so suppressions cannot drift onto unrelated
+# packages that may be assigned the same CVE in the future.
+#
+# Scanned by: .github/actions/trivy-scan via .github/workflows/sdk-container-checks.yml
+
+# CVE-2026-42496 — perl-archive-tar path traversal via crafted symlinks.
+# CVE-2026-8376  — perl heap buffer overflow when compiling regex.
+# Packages: perl, perl-base, perl-modules-5.36, libperl5.36.
+# Why ignored: perl-base is part of Debian's "Essential: yes" set; it cannot be
+# removed without breaking dpkg. The Prowler SDK does not invoke perl at runtime;
+# neither vulnerable code path (Archive::Tar parsing or regex compilation of
+# attacker-controlled input) is reachable from Prowler. No Debian bookworm fix
+# is available yet.
+CVE-2026-42496 pkg:perl exp:2026-07-15
+CVE-2026-42496 pkg:perl-base exp:2026-07-15
+CVE-2026-42496 pkg:perl-modules-5.36 exp:2026-07-15
+CVE-2026-42496 pkg:libperl5.36 exp:2026-07-15
+CVE-2026-8376 pkg:perl exp:2026-07-15
+CVE-2026-8376 pkg:perl-base exp:2026-07-15
+CVE-2026-8376 pkg:perl-modules-5.36 exp:2026-07-15
+CVE-2026-8376 pkg:libperl5.36 exp:2026-07-15
+
+# CVE-2025-7458 — SQLite integer overflow.
+# Package: libsqlite3-0.
+# Why ignored: transitive dependency of CPython's stdlib sqlite3 module. The
+# Prowler SDK does not open user-supplied SQLite databases; SQLite usage is
+# internal and bounded. No Debian bookworm fix is available.
+CVE-2025-7458 pkg:libsqlite3-0 exp:2026-07-15
+
+# CVE-2026-43185 — Linux kernel ksmbd signedness bug.
+# Package: linux-libc-dev.
+# Why ignored: linux-libc-dev ships kernel headers for build-time compilation,
+# not a running kernel. Containers execute against the host kernel, so these
+# headers are inert at runtime. The upstream fix landed in kernel 7.0-rc2 and
+# has not been backported to Debian's 6.1 LTS line.
+CVE-2026-43185 pkg:linux-libc-dev exp:2026-07-15
+
+# CVE-2023-45853 — zlib MiniZip integer overflow / heap overflow in
+# zipOpenNewFileInZip4_64.
+# Packages: zlib1g, zlib1g-dev.
+# Why ignored: Debian Security Tracker status for bookworm is <ignored>, with
+# the published rationale "contrib/minizip not built and src:zlib not producing
+# binary packages" — i.e. the vulnerable symbol is not present in the libz.so
+# shipped by Debian. Real-not-affected, not unpatched. Upstream fix is in
+# zlib 1.3.1, available in Debian trixie (13); migrating the base image would
+# clear it fully.
+# Ref: https://security-tracker.debian.org/tracker/CVE-2023-45853
+CVE-2023-45853 pkg:zlib1g exp:2026-07-15
+CVE-2023-45853 pkg:zlib1g-dev exp:2026-07-15
+
+# CVE-2026-55200 — libssh2 out-of-bounds write in ssh2_transport_read() due to
+# an unchecked packet_length field in transport.c (heap corruption, possible RCE).
+# Package: libssh2-1.
+# Why ignored: libssh2-1 is pulled in only as a transitive dependency of libcurl4
+# (installed in the SDK Dockerfile for the networking/PowerShell stack). The
+# vulnerable path is reached exclusively when libssh2 acts as an SSH/SCP/SFTP
+# client parsing transport packets from a server. Prowler never uses libcurl's
+# SSH/SCP/SFTP transports; it talks to cloud provider HTTPS endpoints only, so the
+# affected code is unreachable at runtime. Fixed upstream in libssh2 commit
+# 97acf3df (PR #2052); no Debian bookworm fix is available yet.
+# Ref: https://security-tracker.debian.org/tracker/CVE-2026-55200
+CVE-2026-55200 pkg:libssh2-1 exp:2026-07-15
+
+# --- API container image (api/Dockerfile) ---
+# The entries below are specific to the Prowler API image, which ships
+# PowerShell and additional build tooling on top of the same bookworm base.
+
+# CVE-2026-7210 — CPython/Expat hash-flooding denial of service in
+# `xml.parsers.expat` and `xml.etree.ElementTree`.
+# Packages: the Debian system Python 3.11 (python3.11*, libpython3.11*).
+# Why ignored: the API runs under the Python 3.12 interpreter shipped in its
+# `.venv`; the system `python3.11` is only present because `python3-dev` is
+# pulled in to compile native extensions (xmlsec, lxml) and is never executed
+# at runtime. The vulnerable path requires parsing attacker-controlled XML with
+# the affected interpreter, which Prowler does not do with the system Python.
+# Full mitigation also needs libexpat >= 2.8.0; no Debian bookworm fix yet.
+CVE-2026-7210 pkg:python3.11 exp:2026-07-15
+CVE-2026-7210 pkg:python3.11-dev exp:2026-07-15
+CVE-2026-7210 pkg:python3.11-minimal exp:2026-07-15
+CVE-2026-7210 pkg:libpython3.11 exp:2026-07-15
+CVE-2026-7210 pkg:libpython3.11-dev exp:2026-07-15
+CVE-2026-7210 pkg:libpython3.11-minimal exp:2026-07-15
+CVE-2026-7210 pkg:libpython3.11-stdlib exp:2026-07-15
+
+# CVE-2026-33278 — Unbound DNSSEC validator use-after-free (DoS, possible RCE).
+# CVE-2026-42960 — Unbound DNS cache poisoning via promiscuous additional records.
+# Package: libunbound8.
+# Why ignored: libunbound8 is a transitive apt dependency of the TLS/networking
+# stack (GnuTLS DANE support); only the shared library ships in the image. Both
+# vulnerabilities require operating a live Unbound recursive DNSSEC validator
+# that processes attacker-influenced DNS responses. Prowler never starts an
+# Unbound resolver, so neither code path is reachable. No Debian bookworm fix yet.
+CVE-2026-33278 pkg:libunbound8 exp:2026-07-15
+CVE-2026-42960 pkg:libunbound8 exp:2026-07-15
@@ -51,6 +51,7 @@ Use these skills for detailed patterns on-demand:
 | `django-migration-psql` | Django migration best practices for PostgreSQL | [SKILL.md](skills/django-migration-psql/SKILL.md) |
 | `postgresql-indexing` | PostgreSQL indexing, EXPLAIN, monitoring, maintenance | [SKILL.md](skills/postgresql-indexing/SKILL.md) |
 | `prowler-attack-paths-query` | Create Attack Paths openCypher queries | [SKILL.md](skills/prowler-attack-paths-query/SKILL.md) |
+| `prowler-tour` | Keep product-tour definitions aligned with the UI | [SKILL.md](skills/prowler-tour/SKILL.md) |
 | `gh-aw` | GitHub Agentic Workflows (gh-aw) | [SKILL.md](skills/gh-aw/SKILL.md) |
 | `skill-creator` | Create new AI agent skills | [SKILL.md](skills/skill-creator/SKILL.md) |

@@ -67,10 +68,12 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
 | Adding new providers | `prowler-provider` |
 | Adding privilege escalation detection queries | `prowler-attack-paths-query` |
 | Adding services to existing providers | `prowler-provider` |
+| Adding, updating, or removing a tour definition (*.tour.ts) | `prowler-tour` |
 | After creating/modifying a skill | `skill-sync` |
 | App Router / Server Actions | `nextjs-16` |
 | Auditing check-to-requirement mappings as a cloud auditor | `prowler-compliance` |
 | Building AI chat features | `ai-sdk-5` |
+| Changing button labels or section headings on a tour-covered page | `prowler-tour` |
 | Committing changes | `prowler-commit` |
 | Configuring MCP servers in agentic workflows | `gh-aw` |
 | Create PR that requires changelog entry | `prowler-changelog` |
@@ -89,6 +92,7 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
 | Creating/updating compliance frameworks | `prowler-compliance` |
 | Debug why a GitHub Actions job is failing | `prowler-ci` |
 | Debugging gh-aw compilation errors | `gh-aw` |
+| Editing a UI file containing data-tour-id attributes | `prowler-tour` |
 | Fill .github/pull_request_template.md (Context/Description/Steps to review/Checklist) | `prowler-pr` |
 | Fixing bug | `tdd` |
 | Fixing compliance JSON bugs (duplicate IDs, empty Section, stale refs) | `prowler-compliance` |
@@ -105,6 +109,8 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
 | Modifying gh-aw workflow frontmatter or safe-outputs | `gh-aw` |
 | Refactoring code | `tdd` |
 | Regenerate AGENTS.md Auto-invoke tables (sync.sh) | `skill-sync` |
+| Renaming or removing a data-tour-id attribute value | `prowler-tour` |
+| Restructuring routes or layouts covered by a tour | `prowler-tour` |
 | Review PR requirements: template, title conventions, changelog gate | `prowler-pr` |
 | Review changelog format and conventions | `prowler-changelog` |
 | Reviewing JSON:API compliance | `jsonapi` |
@@ -1,4 +1,4 @@
-FROM python:3.12.11-slim-bookworm@sha256:519591d6871b7bc437060736b9f7456b8731f1499a57e22e6c285135ae657bf7 AS build
+FROM python:3.12.13-slim-bookworm@sha256:76d4b7b6305788c6b4c6a19d6a22a3921bf802e9af4d5e1e5bd771208dba74bf AS build

 LABEL maintainer="https://github.com/prowler-cloud/prowler"
 LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
@@ -6,7 +6,7 @@ LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
 ARG POWERSHELL_VERSION=7.5.0
 ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}

-ARG TRIVY_VERSION=0.70.0
+ARG TRIVY_VERSION=0.71.2
 ENV TRIVY_VERSION=${TRIVY_VERSION}

 ARG ZIZMOR_VERSION=1.24.1
@@ -95,6 +95,18 @@ RUN uv sync --locked --compile-bytecode && \
 # Install PowerShell modules
 RUN .venv/bin/python prowler/providers/m365/lib/powershell/m365_powershell.py

+USER root
+
+# Remove build-only packages from the final image after Python dependencies are installed.
+RUN apt-get purge -y --auto-remove \
+    build-essential \
+    pkg-config \
+    libzstd-dev \
+    zlib1g-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+USER prowler
+
 # Remove deprecated dash dependencies
 RUN pip uninstall dash-html-components -y && \
    pip uninstall dash-core-components -y
@@ -1,5 +1,34 @@
 .DEFAULT_GOAL:=help

+DEV_LOCAL := ./scripts/development/dev-local.sh
+
+.PHONY: dev dev-setup dev-attach dev-launch dev-stop dev-clean dev-wipe dev-status
+
+##@ Local Development
+dev: ## Start local API, worker, and database logs
+	$(DEV_LOCAL) all
+
+dev-setup: ## Bootstrap local dependencies, migrations, and fixtures
+	$(DEV_LOCAL) setup
+
+dev-attach: ## Attach to the local tmux development session
+	$(DEV_LOCAL) attach
+
+dev-launch: ## Start the local stack on fixed ports and attach
+	$(DEV_LOCAL) launch
+
+dev-stop: ## Stop the local tmux session and containers
+	$(DEV_LOCAL) kill
+
+dev-clean: ## Remove stopped local development containers
+	$(DEV_LOCAL) clean
+
+dev-wipe: ## Stop everything and delete local development data
+	$(DEV_LOCAL) wipe
+
+dev-status: ## Show local development container status
+	$(DEV_LOCAL) status
+
 ##@ Testing
 test:   ## Test with pytest
 	rm -rf .coverage && \
@@ -16,18 +45,41 @@ coverage-html: ## Show Test Coverage
 	coverage html && \
 	open htmlcov/index.html

-##@ Linting
-format: ## Format Code
-	@echo "Running black..."
-	black .
+##@ Code Quality
+# `make` is the single entrypoint and mirrors CI exactly (uv run + same flags):
+#   SDK (prowler/, util/) -> flake8 + black + pylint
+#   API & MCP server      -> ruff (rules live in each project's pyproject.toml)
+# `format` applies fixes (incl. ruff's import/upgrade autofixes); `lint` only
+# verifies and is what CI gates on.
+.PHONY: format format-sdk format-api format-mcp lint lint-sdk lint-api lint-mcp

-lint: ## Lint Code
-	@echo "Running flake8..."
-	flake8 . --ignore=E266,W503,E203,E501,W605,E128 --exclude .venv,contrib
-	@echo "Running black... "
-	black --check .
-	@echo "Running pylint..."
-	pylint --disable=W,C,R,E -j 0 prowler util
+format: format-sdk format-api format-mcp ## Format & autofix all components (SDK, API, MCP)
+
+lint: lint-sdk lint-api lint-mcp ## Lint all components (SDK, API, MCP) — mirrors CI
+
+format-sdk: ## Format SDK code (black)
+	uv run black --exclude "\.venv|api|ui|skills|mcp_server" .
+
+lint-sdk: ## Lint SDK code (flake8, black --check, pylint)
+	uv run flake8 . --ignore=E266,W503,E203,E501,W605,E128 --exclude .venv,contrib,ui,api,skills,mcp_server
+	uv run black --exclude "\.venv|api|ui|skills|mcp_server" --check .
+	uv run pylint --disable=W,C,R,E -j 0 -rn -sn prowler/
+
+format-api: ## Format & autofix API code (ruff)
+	cd api && uv run ruff check . --exclude contrib --fix
+	cd api && uv run ruff format . --exclude contrib
+
+lint-api: ## Lint API code (ruff check + format --check)
+	cd api && uv run ruff check . --exclude contrib
+	cd api && uv run ruff format --check . --exclude contrib
+
+format-mcp: ## Format & autofix MCP server code (ruff)
+	cd mcp_server && uv run ruff check . --fix
+	cd mcp_server && uv run ruff format .
+
+lint-mcp: ## Lint MCP server code (ruff check + format --check)
+	cd mcp_server && uv run ruff check .
+	cd mcp_server && uv run ruff format --check .

 ##@ PyPI
 pypi-clean: ## Delete the distribution files
@@ -83,16 +83,35 @@ prowler dashboard

 ## Attack Paths

-Attack Paths automatically extends every completed AWS scan with a Neo4j graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan and therefore requires:
+Attack Paths automatically extends every completed AWS scan with a graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan.

- An accessible Neo4j instance (the Docker Compose files already ships a `neo4j` service).
- The following environment variables so Django and Celery can connect:
+Two graph backends are supported as the long-lived sink:

-  | Variable | Description | Default |
-  | --- | --- | --- |
-  | `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
-  | `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
-  | `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
+- **Neo4j** (default; the Docker Compose files already ship a `neo4j` service).
+- **Amazon Neptune** (cloud-managed; opt-in).
+
+Select the sink with `ATTACK_PATHS_SINK_DATABASE` (`neo4j` or `neptune`; default `neo4j`).
+
+> Note: Cartography ingestion always uses a temporary Neo4j database, regardless of the configured sink. The `NEO4J_*` variables below must remain set even when `ATTACK_PATHS_SINK_DATABASE=neptune`.
+
+### Neo4j sink
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
+| `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
+| `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
+
+### Neptune sink
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `NEPTUNE_WRITER_ENDPOINT` | Bolt host for the Neptune writer instance. Required when sink is `neptune`. | _empty_ |
+| `NEPTUNE_READER_ENDPOINT` | Optional reader endpoint for read-only queries. Falls back to the writer when unset. | _empty_ |
+| `NEPTUNE_PORT` | Bolt port exposed by Neptune. | `8182` |
+| `AWS_REGION` | Region the Neptune cluster lives in. Required when sink is `neptune`. | _empty_ |
+
+Neptune authenticates with SigV4 using the standard boto3 credential chain. The worker's IAM role (or `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`) supplies the credentials. There is no Neptune password variable.

 Every AWS provider scan will enqueue an Attack Paths ingestion job automatically. Other cloud providers will be added in future iterations.

@@ -104,26 +123,27 @@ Every AWS provider scan will enqueue an Attack Paths ingestion job automatically

 | Provider | Checks | Services | [Compliance Frameworks](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/compliance/) | [Categories](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/misc/#categories) | Support | Interface |
 |---|---|---|---|---|---|---|
-| AWS | 600 | 84 | 44 | 18 | Official | UI, API, CLI |
-| Azure | 167 | 22 | 19 | 16 | Official | UI, API, CLI |
-| GCP | 102 | 18 | 17 | 12 | Official | UI, API, CLI |
-| Kubernetes | 83 | 7 | 7 | 11 | Official | UI, API, CLI |
-| GitHub | 24 | 3 | 1 | 5 | Official | UI, API, CLI |
-| M365 | 102 | 10 | 4 | 10 | Official | UI, API, CLI |
-| OCI | 51 | 14 | 4 | 10 | Official | UI, API, CLI |
-| Alibaba Cloud | 63 | 9 | 4 | 9 | Official | UI, API, CLI |
-| Cloudflare | 29 | 3 | 0 | 5 | Official | UI, API, CLI |
+| AWS | 615 | 86 | 47 | 19 | Official | UI, API, CLI |
+| Azure | 190 | 22 | 21 | 16 | Official | UI, API, CLI |
+| GCP | 109 | 20 | 19 | 12 | Official | UI, API, CLI |
+| Kubernetes | 90 | 7 | 8 | 11 | Official | UI, API, CLI |
+| GitHub | 24 | 3 | 2 | 5 | Official | UI, API, CLI |
+| M365 | 109 | 10 | 6 | 10 | Official | UI, API, CLI |
+| OCI | 52 | 14 | 5 | 10 | Official | UI, API, CLI |
+| Alibaba Cloud | 63 | 9 | 6 | 9 | Official | UI, API, CLI |
+| Cloudflare | 29 | 3 | 2 | 5 | Official | UI, API, CLI |
 | IaC | [See `trivy` docs.](https://trivy.dev/latest/docs/coverage/iac/) | N/A | N/A | N/A | Official | UI, API, CLI |
-| MongoDB Atlas | 10 | 3 | 0 | 8 | Official | UI, API, CLI |
+| MongoDB Atlas | 10 | 3 | 1 | 8 | Official | UI, API, CLI |
 | LLM | [See `promptfoo` docs.](https://www.promptfoo.dev/docs/red-team/plugins/) | N/A | N/A | N/A | Official | CLI |
 | Image | N/A | N/A | N/A | N/A | Official | CLI, API |
-| Google Workspace | 39 | 5 | 2 | 5 | Official | UI, API, CLI |
-| OpenStack | 34 | 5 | 0 | 9 | Official | UI, API, CLI |
-| Vercel | 26 | 6 | 0 | 8 | Official | UI, API, CLI |
-| Okta | 1 | 1 | 0 | 1 | Official | CLI |
-| Scaleway [Contact us](https://prowler.com/contact) | 1 | 1 | 0 | 1 | Unofficial | CLI |
-| StackIT [Contact us](https://prowler.com/contact) | 7 | 2 | 0 | 3 | Unofficial | CLI |
-| NHN | 6 | 2 | 1 | 0 | Unofficial | CLI |
+| Google Workspace | 65 | 11 | 3 | 6 | Official | UI, API, CLI |
+| OpenStack | 34 | 5 | 1 | 9 | Official | UI, API, CLI |
+| Vercel | 26 | 6 | 1 | 8 | Official | UI, API, CLI |
+| Okta | 29 | 8 | 2 | 2 | Official | UI, API, CLI |
+| Linode [Contact us](https://prowler.com/contact) | 10 | 3 | 1 | 4 | Unofficial | CLI |
+| Scaleway [Contact us](https://prowler.com/contact) | 1 | 1 | 1 | 1 | Unofficial | CLI |
+| StackIT [Contact us](https://prowler.com/contact) | 7 | 2 | 1 | 3 | Unofficial | CLI |
+| NHN | 6 | 2 | 2 | 0 | Unofficial | CLI |

 > [!Note]
 > The numbers in the table are updated periodically.
@@ -24,6 +24,9 @@ DJANGO_THROTTLE_TOKEN_OBTAIN=50/minute
 # Decide whether to allow Django manage database table partitions
 DJANGO_MANAGE_DB_PARTITIONS=[True|False]
 DJANGO_CELERY_DEADLOCK_ATTEMPTS=5
+# Optional: bound Celery's prefork pool size. Unset → Celery uses os.cpu_count().
+# Useful on Kubernetes nodes with many CPUs where unbounded prefork balloons memory.
+# DJANGO_CELERY_WORKER_CONCURRENCY=4
 DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
 DJANGO_SENTRY_DSN=

@@ -2,6 +2,76 @@

 All notable changes to the **Prowler API** are documented in this file.

+## [1.33.0] (Prowler UNRELEASED)
+
+### 🔄 Changed
+
+- Attack Paths: AWS Neptune is now supported as a persistent sink database, selectable via `ATTACK_PATHS_SINK_DATABASE=neptune` (default `neo4j`), Cartography's (bumped to 0.138.1) per-scan ingest database stays on Neo4j [(#11524)](https://github.com/prowler-cloud/prowler/pull/11524)
+- Attack Paths: Scan task now checks the ingest Neo4j database and configured graph sink before starting graph ingestion [(#11743)](https://github.com/prowler-cloud/prowler/pull/11743)
+
+---
+
+## [1.32.2] (Prowler UNRELEASED)
+
+### 🐞 Fixed
+
+- `scan-perform` no longer reports an error when a provider is deleted during a running scan [(#11696)](https://github.com/prowler-cloud/prowler/pull/11696)
+
+---
+
+## [1.32.1] (Prowler v5.31.1)
+
+### 🐞 Fixed
+
+- API key auth no longer mutates `TenantAPIKey.objects` during admin DB lookups [(#11686)](https://github.com/prowler-cloud/prowler/pull/11686)
+
+---
+
+## [1.32.0] (Prowler v5.31.0)
+
+### 🚀 Added
+
+- Provider group filters for API endpoints that support cloud provider filtering, including exact and `__in` variants [(#11573)](https://github.com/prowler-cloud/prowler/pull/11573)
+- Provider filters for `GET /api/v1/compliance-overviews`, `/metadata`, and `/requirements`, using latest completed scans per matching provider [(#11587)](https://github.com/prowler-cloud/prowler/pull/11587)
+- Server-Sent Events (SSE) infrastructure for the API: a base viewset, a tenant-aware channel manager, and channel-name helpers backed by `django-eventstream` over Valkey Pub/Sub and served through the Gunicorn ASGI worker, so feature endpoints can stream events to clients over a single long-lived connection [(#11556)](https://github.com/prowler-cloud/prowler/pull/11556)
+- `DJANGO_CELERY_WORKER_CONCURRENCY` to configure Celery workers concurrency. Unset for default behaviour [(#11075)](https://github.com/prowler-cloud/prowler/pull/11075)
+
+### 🔄 Changed
+
+- Gunicorn worker timeout raised from the 30s default to 120s, so long-running requests are no longer killed prematurely [(#11631)](https://github.com/prowler-cloud/prowler/pull/11631)
+- Sentry now drops ASGI's `RequestAborted` errors from health-check probe disconnects on `/health/live` [(#11632)](https://github.com/prowler-cloud/prowler/pull/11632)
+- Gunicorn keep-alive timeout now exceeds the load balancer idle timeout, stopping 502s from reused connections [(#11647)](https://github.com/prowler-cloud/prowler/pull/11647)
+- API runs under the Uvicorn worker so keep-alive outlives the load balancer idle timeout, fixing Gunicorn's intermittent 502s [(#11663)](https://github.com/prowler-cloud/prowler/pull/11663)
+- SAML logins no longer wipe a user's roles when the IdP does not send the `userType` attribute; existing roles are kept, and when `userType` names a role that does not exist it is now created with read-only access (visibility over all providers, no management permissions) instead of no permissions at all [(#11520)](https://github.com/prowler-cloud/prowler/pull/11520)
+
+### 🐞 Fixed
+
+- Database connections no longer leak under the ASGI worker, which previously exhausted the read replica's connection slots and caused 500s on read endpoints [(#11640)](https://github.com/prowler-cloud/prowler/pull/11640)
+
+### 🔐 Security
+
+- `aiohttp` to 3.14.0 and `idna` to 3.15, patching known CVEs [(#11596)](https://github.com/prowler-cloud/prowler/pull/11596)
+- Container base image to `python:3.12.13-slim-bookworm` and `trivy` to 0.71.0, patching OS and Go module CVEs [(#11596)](https://github.com/prowler-cloud/prowler/pull/11596)
+- `trivy` binary bumped to 0.71.0 patching embedded `golang.org/x/crypto`, `golang.org/x/net`, and Go `stdlib` CVEs [(#11592)](https://github.com/prowler-cloud/prowler/pull/11592)
+
+---
+
+## [1.31.3] (Prowler v5.30.3)
+
+### 🔐 Security
+
+- SAML logins now link to an existing account only when the asserted email domain matches the ACS endpoint and the user is already a member of that domain's tenant, fixing a cross-tenant account takeover [(GHSA-h8m9-jgf8-vwvp)](https://github.com/prowler-cloud/prowler/security/advisories/GHSA-h8m9-jgf8-vwvp)
+
+---
+
+## [1.31.2] (Prowler v5.30.2)
+
+### 🔄 Changed
+
+- `scan-compliance-overviews` task now streams the findings aggregation and the requirement-row writes so it runs faster and its peak memory no longer grows with the number of regions and frameworks [(#11591)](https://github.com/prowler-cloud/prowler/pull/11591)
+
+---
+
 ## [1.31.1] (Prowler v5.30.1)

 ### 🐞 Fixed
@@ -1,11 +1,11 @@
-FROM python:3.12.10-slim-bookworm@sha256:fd95fa221297a88e1cf49c55ec1828edd7c5a428187e67b5d1805692d11588db AS build
+FROM python:3.12.13-slim-bookworm@sha256:76d4b7b6305788c6b4c6a19d6a22a3921bf802e9af4d5e1e5bd771208dba74bf AS build

 LABEL maintainer="https://github.com/prowler-cloud/api"

 ARG POWERSHELL_VERSION=7.5.0
 ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}

-ARG TRIVY_VERSION=0.70.0
+ARG TRIVY_VERSION=0.71.2
 ENV TRIVY_VERSION=${TRIVY_VERSION}

 ARG ZIZMOR_VERSION=1.24.1
@@ -102,6 +102,23 @@ RUN uv sync --locked --no-install-project && \

 RUN .venv/bin/python .venv/lib/python3.12/site-packages/prowler/providers/m365/lib/powershell/m365_powershell.py

+USER root
+
+# Remove build-only packages from the final image after Python dependencies are installed.
+RUN apt-get purge -y --auto-remove \
+    gcc \
+    g++ \
+    make \
+    libxml2-dev \
+    libxmlsec1-dev \
+    pkg-config \
+    libtool \
+    libxslt1-dev \
+    python3-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+USER prowler
+
 COPY --chown=prowler:prowler src/backend/ ./backend/
 COPY --chown=prowler:prowler docker-entrypoint.sh ./docker-entrypoint.sh

@@ -196,6 +196,42 @@ python -m celery -A config.celery worker -l info -E

 The Celery worker does not detect and reload changes in the code, so you need to restart it manually when you make changes.

+### Makefile-Assisted Local Deployment
+
+This method is an additional local development workflow. It does not replace the manual local deployment or the Docker deployment described in this guide.
+
+PostgreSQL, Valkey, and Neo4j run with Docker Compose, while Django and the Celery worker run natively through `uv`. Additionally, this workflow creates a `tmux` session with panes for the API, worker, and PostgreSQL logs.
+
+Before using this method, ensure `docker compose`, `tmux`, and `uv` are installed.
+
+This workflow is designed for macOS and should also work on Linux when Docker, `tmux`, and `uv` are available. Windows requires script changes before it can be supported.
+
+From the repository root, run:
+
+```console
+make dev
+```
+
+The API will be available at:
+
+```console
+http://localhost:8080/api/v1
+```
+
+Use these commands to manage the local stack:
+
+```console
+make dev-setup   # Bootstrap dependencies, migrations, and fixtures
+make dev-attach  # Attach to the tmux session
+make dev-launch  # Start the stack on fixed ports and attach
+make dev-stop    # Stop the tmux session and containers
+make dev-clean   # Remove stopped development containers
+make dev-wipe    # Stop everything and delete local development data
+make dev-status  # Show development container status
+```
+
+This workflow does not start the UI. Start it separately from the `ui/` directory when needed.
+
 ### Docker deployment

 This method requires `docker` and `docker compose`.
@@ -21,13 +21,19 @@ apply_fixtures() {
 }

 start_dev_server() {
-  echo "Starting the development server..."
-  exec uv run python manage.py runserver 0.0.0.0:"${DJANGO_PORT:-8080}"
+  echo "Starting the development server (Gunicorn ASGI, debug + reload)..."
+  # Same server/worker as prod (config.asgi via the native `asgi` worker), so
+  # SSE streams run on the event loop exactly as they do in production. DEBUG is
+  # on so guniconf's `reload = DEBUG` hot-reloads edited code (and flips
+  # `preload_app` off so reload actually takes).
+  export DJANGO_DEBUG="${DJANGO_DEBUG:-True}"
+  export DJANGO_BIND_ADDRESS="${DJANGO_BIND_ADDRESS:-0.0.0.0}"
+  exec uv run gunicorn -c config/guniconf.py config.asgi:application
 }

 start_prod_server() {
  echo "Starting the Gunicorn server..."
-  exec uv run gunicorn -c config/guniconf.py config.wsgi:application
+  exec uv run gunicorn -c config/guniconf.py config.asgi:application
 }

 resolve_worker_hostname() {
@@ -65,6 +65,7 @@ All settings have safe defaults; override via environment variables.
 | Env var | Default | Purpose |
 | --- | --- | --- |
 | `DJANGO_CELERY_WORKER_PREFETCH_MULTIPLIER` | `1` | Tasks reserved per worker process. |
+| `DJANGO_CELERY_WORKER_CONCURRENCY` | unset | Optional Celery prefork pool size. When unset, Celery uses its CPU-based default. Set this on worker containers to bound idle memory on hosts with many CPUs. |
 | `DJANGO_CELERY_WORKER_SOFT_SHUTDOWN_TIMEOUT` | `60` | Seconds the worker drains/re-queues on `SIGTERM` before force-kill. |
 | `DJANGO_CELERY_TASK_TIME_LIMIT` | `21600` (6h) | Hard limit for most tasks; connection checks are capped at 120s. |
 | `DJANGO_CELERY_TASK_SOFT_TIME_LIMIT` | hard - 600 | Soft limit; raises `SoftTimeLimitExceeded` for cleanup. |
@@ -14,7 +14,7 @@ dev = [
  "pytest-env==1.1.3",
  "pytest-randomly==3.15.0",
  "pytest-xdist==3.6.1",
-  "ruff==0.5.0",
+  "ruff==0.15.11",
  "tqdm==4.67.1",
  "vulture==2.14",
  "prek==0.3.9"
@@ -41,7 +41,9 @@ dependencies = [
  "drf-spectacular==0.27.2",
  "drf-spectacular-jsonapi==0.5.1",
  "defusedxml==0.7.1",
-  "gunicorn==23.0.0",
+  "django-eventstream==5.3.3",
+  "gunicorn==26.0.0",
+  "uvloop==0.22.1",
  "lxml==6.1.0",
  "prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
  "psycopg2-binary==2.9.9",
@@ -56,11 +58,12 @@ dependencies = [
  "matplotlib (==3.10.8)",
  "reportlab (==4.4.10)",
  "neo4j (==6.1.0)",
-  "cartography (==0.135.0)",
+  "cartography (==0.138.1)",
  "gevent (==25.9.1)",
  "werkzeug (==3.1.7)",
  "sqlparse (==0.5.5)",
-  "fonttools (==4.62.1)"
+  "fonttools (==4.62.1)",
+  "uvicorn-worker (==0.4.0)",
 ]
 description = "Prowler's API (Django/DRF)"
 license = "Apache-2.0"
@@ -68,7 +71,24 @@ name = "prowler-api"
 package-mode = false
 # Needed for the SDK compatibility
 requires-python = ">=3.11,<3.13"
-version = "1.32.0"
+version = "1.33.0"
+
+# Shared ruff baseline (kept in sync with mcp_server/pyproject.toml).
+# target-version tracks this project's lowest supported Python.
+[tool.ruff]
+src = ["src"]
+target-version = "py311"
+
+[tool.ruff.lint]
+# Defaults (E4/E7/E9, F) plus import sorting, modern-syntax upgrades, and
+# comprehension lints — all mechanically auto-fixable. flake8-bugbear (B) is a
+# good next step but needs manual cleanup (e.g. B904 raise-from), so it is left
+# out of the shared baseline for now.
+extend-select = [
+  "I",  # isort — import ordering (prek's isort hook covers only the SDK)
+  "UP",  # pyupgrade — modern syntax for the min supported Python
+  "C4"  # flake8-comprehensions
+]

 [tool.uv]
 # Transitive pins matching master to avoid silent drift; bump deliberately.
@@ -79,7 +99,7 @@ constraint-dependencies = [
  "aiobotocore==2.25.1",
  "aiofiles==24.1.0",
  "aiohappyeyeballs==2.6.1",
-  "aiohttp==3.13.5",
+  "aiohttp==3.14.0",
  "aioitertools==0.13.0",
  "aiosignal==1.4.0",
  "alibabacloud-actiontrail20200706==2.4.1",
@@ -124,9 +144,8 @@ constraint-dependencies = [
  "astroid==3.2.4",
  "async-timeout==5.0.1",
  "attrs==25.4.0",
-  "authlib==1.6.9",
+  "authlib==1.6.12",
  "autopep8==2.3.2",
-  "awsipranges==0.3.3",
  "azure-cli-core==2.83.0",
  "azure-cli-telemetry==1.1.0",
  "azure-common==1.1.28",
@@ -174,7 +193,7 @@ constraint-dependencies = [
  "blinker==1.9.0",
  "boto3==1.40.61",
  "botocore==1.40.61",
-  "cartography==0.135.0",
+  "cartography==0.138.1",
  "celery==5.6.2",
  "certifi==2026.1.4",
  "cffi==2.0.0",
@@ -199,7 +218,6 @@ constraint-dependencies = [
  "debugpy==1.8.20",
  "decorator==5.2.1",
  "defusedxml==0.7.1",
-  "detect-secrets==1.5.0",
  "dill==0.4.1",
  "distro==1.9.0",
  "dj-rest-auth==7.0.1",
@@ -209,6 +227,7 @@ constraint-dependencies = [
  "django-celery-results==2.6.0",
  "django-cors-headers==4.4.0",
  "django-environ==0.11.2",
+  "django-eventstream==5.3.3",
  "django-filter==24.3",
  "django-guid==3.5.0",
  "django-postgres-extra==2.0.9",
@@ -253,7 +272,7 @@ constraint-dependencies = [
  "grpc-google-iam-v1==0.14.3",
  "grpcio==1.76.0",
  "grpcio-status==1.76.0",
-  "gunicorn==23.0.0",
+  "gunicorn==26.0.0",
  "h11==0.16.0",
  "h2==4.3.0",
  "hpack==4.1.0",
@@ -262,8 +281,8 @@ constraint-dependencies = [
  "httpx==0.28.1",
  "humanfriendly==10.0",
  "hyperframe==6.1.0",
-  "iamdata==0.1.202602021",
-  "idna==3.11",
+  "iamdata==0.1.202605131",
+  "idna==3.15",
  "importlib-metadata==8.7.1",
  "inflection==0.5.1",
  "iniconfig==2.3.0",
@@ -281,6 +300,7 @@ constraint-dependencies = [
  "jsonschema==4.23.0",
  "jsonschema-specifications==2025.9.1",
  "keystoneauth1==5.13.0",
+  "kingfisher-bin==1.104.0",
  "kiwisolver==1.4.9",
  "knack==0.11.0",
  "kombu==5.6.2",
@@ -315,7 +335,7 @@ constraint-dependencies = [
  "neo4j==6.1.0",
  "nest-asyncio==1.6.0",
  "nltk==3.9.4",
-  "numpy==2.0.2",
+  "numpy==2.2.6",
  "oauthlib==3.3.1",
  "oci==2.169.0",
  "openai==1.109.1",
@@ -344,7 +364,7 @@ constraint-dependencies = [
  "psutil==7.2.2",
  "psycopg2-binary==2.9.9",
  "py-deviceid==0.1.1",
-  "py-iam-expand==0.1.0",
+  "py-iam-expand==0.3.0",
  "py-ocsf-models==0.8.1",
  "pyasn1==0.6.3",
  "pyasn1-modules==0.4.2",
@@ -390,7 +410,7 @@ constraint-dependencies = [
  "rpds-py==0.30.0",
  "rsa==4.9.1",
  "ruamel-yaml==0.19.1",
-  "ruff==0.5.0",
+  "ruff==0.15.11",
  "s3transfer==0.14.0",
  "scaleway==2.10.3",
  "scaleway-core==2.10.3",
@@ -420,12 +440,14 @@ constraint-dependencies = [
  "uritemplate==4.2.0",
  "urllib3==2.7.0",
  "uuid6==2024.7.10",
+  "uvicorn==0.49.0",
+  "uvloop==0.22.1",
  "vine==5.1.0",
  "vulture==2.14",
  "wcwidth==0.5.3",
  "websocket-client==1.9.0",
  "werkzeug==3.1.7",
-  "workos==6.0.4",
+  "workos==6.0.8",
  "wrapt==1.17.3",
  "xlsxwriter==3.2.9",
  "xmlsec==1.3.17",
@@ -436,8 +458,13 @@ constraint-dependencies = [
  "zope-interface==8.2",
  "zstd==1.5.7.3"
 ]
-# prowler@master needs okta==3.4.2; cartography 0.135.0 declares okta<1.0.0 for an
-# integration prowler does not import.
+# prowler@master needs okta==3.4.2, but cartography 0.138.1 requires okta<1.0.0.
+# Attack Paths does not ingest Okta today, so override the Cartography
+# dependency to the Prowler pin.
+#
+# prowler@master needs azure-mgmt-containerservice==34.1.0, but cartography
+# 0.138.1 requires azure-mgmt-containerservice>=41.0.0. Attack Paths does not
+# ingest Azure today, so override the Cartography dependency to the Prowler pin.
 #
 # prowler@master hard-pins microsoft-kiota-abstractions==1.9.2 in [project.dependencies].
 # The microsoft-kiota-http security bump to 1.9.9 (GHSA-7j59-v9qr-6fq9) requires
@@ -453,6 +480,7 @@ constraint-dependencies = [
 # that request pyjwt[crypto] and leave cryptography (needed for RS256) only transitive.
 override-dependencies = [
  "okta==3.4.2",
+  "azure-mgmt-containerservice==34.1.0",
  "microsoft-kiota-abstractions==1.9.9",
  "dulwich==1.2.5",
  "pyjwt[crypto]==2.13.0"
@@ -1,9 +1,15 @@
 from allauth.socialaccount.adapter import DefaultSocialAccountAdapter
-from django.db import transaction
-
 from api.db_router import MainRouter
 from api.db_utils import rls_transaction
-from api.models import Membership, Role, Tenant, User, UserRoleRelationship
+from api.models import (
+    Membership,
+    Role,
+    SAMLConfiguration,
+    Tenant,
+    User,
+    UserRoleRelationship,
+)
+from django.db import transaction


 class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
@@ -18,7 +24,42 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
        # Link existing accounts with the same email address
        email = sociallogin.account.extra_data.get("email")
        if sociallogin.provider.id == "saml":
+            # For SAML, the asserted NameID email cannot be trusted on its own:
+            # any tenant can claim any email domain in its SAML configuration. To
+            # prevent cross-tenant account takeover (GHSA-h8m9-jgf8-vwvp), only link
+            # the incoming SAML session to an existing account when (1) the email
+            # domain matches the tenant whose ACS endpoint is being used and (2) the
+            # existing user is already a member of that tenant.
            email = sociallogin.user.email
+            if not email:
+                return
+
+            domain = email.rsplit("@", 1)[-1].lower()
+            resolver_match = getattr(request, "resolver_match", None)
+            organization_slug = (
+                (resolver_match.kwargs or {}).get("organization_slug", "")
+                if resolver_match
+                else ""
+            ).lower()
+            # The ACS endpoint is scoped per email domain; reject mismatches so an
+            # attacker cannot replay an assertion through another tenant's endpoint.
+            if organization_slug != domain:
+                return
+
+            try:
+                saml_config = SAMLConfiguration.objects.using(MainRouter.admin_db).get(
+                    email_domain=domain
+                )
+            except SAMLConfiguration.DoesNotExist:
+                return
+
+            existing_user = self.get_user_by_email(email)
+            if existing_user and existing_user.is_member_of_tenant(
+                str(saml_config.tenant_id)
+            ):
+                sociallogin.connect(request, existing_user)
+            return
+
        if email:
            existing_user = self.get_user_by_email(email)
            if existing_user:
@@ -1,14 +1,12 @@
 import logging
 import os
 import sys
-
 from pathlib import Path

-from django.apps import AppConfig
-from django.conf import settings
-
 from config.custom_logging import BackendLogger
 from config.env import env
+from django.apps import AppConfig
+from django.conf import settings

 logger = logging.getLogger(BackendLogger.API)

@@ -30,8 +28,10 @@ class ApiConfig(AppConfig):
    name = "api"

    def ready(self):
-        from api import schema_extensions  # noqa: F401
-        from api import signals  # noqa: F401
+        from api import (
+            schema_extensions,  # noqa: F401
+            signals,  # noqa: F401
+        )

        # Generate required cryptographic keys if not present, but only if:
        #   `"manage.py" not in sys.argv[0]`: If an external server (e.g., Gunicorn) is running the app
@@ -42,9 +42,6 @@ class ApiConfig(AppConfig):
        ):
            self._ensure_crypto_keys()

-        # Neo4j driver is created lazily on first use (see api.attack_paths.database).
-        # App init never contacts Neo4j, so a Neo4j outage cannot block API startup.
-
    def _ensure_crypto_keys(self):
        """
        Orchestrator method that ensures all required cryptographic keys are present.
@@ -5,7 +5,6 @@ from api.attack_paths.queries import (
    get_query_by_id,
 )

-
 __all__ = [
    "AttackPathsQueryDefinition",
    "AttackPathsQueryParameterDefinition",
@@ -4,10 +4,10 @@ Cypher sanitizer for custom (user-supplied) Attack Paths queries.
 Two responsibilities:

 1. **Validation** - reject queries containing SSRF or dangerous procedure
-   patterns (defense-in-depth; the primary control is ``neo4j.READ_ACCESS``).
+   patterns (defense-in-depth; the primary control is `neo4j.READ_ACCESS`).

 2. **Provider-scoped label injection** - inject a dynamic
-   ``_Provider_{uuid}`` label into every node pattern so the database can
+   `_Provider_{uuid}` label into every node pattern so the database can
   use its native label index for provider isolation.

 Label-injection pipeline:
@@ -22,18 +22,16 @@ Label-injection pipeline:
 import re

 from rest_framework.exceptions import ValidationError
-
 from tasks.jobs.attack_paths.config import get_provider_label

-
 # Step 1 - String / comment protection
-# Single combined regex: strings first, then line comments.
+# Single combined regex: strings first, then line comments
 # The regex engine finds the leftmost match, so a string like 'https://prowler.com'
-# is consumed as a string before the // inside it can match as a comment.
+# is consumed as a string before the // inside it can match as a comment
 _PROTECTED_RE = re.compile(r"'(?:[^'\\]|\\.)*'|\"(?:[^\"\\]|\\.)*\"|//[^\n]*")

 # Step 2 - Clause splitting
-# OPTIONAL MATCH must come before MATCH to avoid partial matching.
+# `OPTIONAL MATCH` must come before `MATCH` to avoid partial matching
 _CLAUSE_RE = re.compile(
    r"\b(OPTIONAL\s+MATCH|MATCH|WHERE|RETURN|WITH|ORDER\s+BY"
    r"|SKIP|LIMIT|UNION|UNWIND|CALL)\b",
@@ -41,10 +39,10 @@ _CLAUSE_RE = re.compile(
 )

 # Pass A - Labeled node patterns (all segments)
-# Matches node patterns that have at least one :Label.
-# (?<!\w)\(  - open paren NOT preceded by a word char (excludes function calls).
-# Group 1:  optional variable + one or more :Label
-# Group 2:  optional {properties} + closing paren
+# Matches node patterns that have at least one `:Label`
+# `(?<!\w)\(`  - open paren NOT preceded by a word char, excludes function calls
+# Group 1:  optional variable + one or more `:Label`
+# Group 2:  optional `{`properties`}` + closing paren
 _LABELED_NODE_RE = re.compile(
    r"(?<!\w)\("
    r"("
@@ -57,9 +55,9 @@ _LABELED_NODE_RE = re.compile(
    r")"
 )

-# Pass B - Bare node patterns (MATCH segments only)
-# Matches (identifier) or (identifier {properties}) without any :Label.
-# Only applied in MATCH/OPTIONAL MATCH segments.
+# Pass B - Bare node patterns (`MATCH` segments only)
+# Matches (identifier) or (identifier {properties}) without any `:Label`
+# Only applied in `MATCH` / `OPTIONAL MATCH` segments
 _BARE_NODE_RE = re.compile(
    r"(?<!\w)\(" r"(\s*[a-zA-Z_]\w*)" r"(\s*(?:\{[^}]*\})?)" r"\s*\)"
 )
@@ -98,6 +96,11 @@ def inject_provider_label(cypher: str, provider_id: str) -> str:
        node pattern.
    """
    label = get_provider_label(provider_id)
+    return inject_label(cypher, label)
+
+
+def inject_label(cypher: str, label: str) -> str:
+    """Rewrite a Cypher query to append a label to every node pattern."""

    # Step 1: Protect strings and comments (single pass, leftmost-first)
    protected: list[str] = []
@@ -136,9 +139,7 @@ def inject_provider_label(cypher: str, provider_id: str) -> str:
    return work


-# ---------------------------------------------------------------------------
 # Validation
-# ---------------------------------------------------------------------------

 # Patterns that indicate SSRF or dangerous procedure calls
 # Defense-in-depth layer - the primary control is `neo4j.READ_ACCESS`
@@ -1,263 +1,32 @@
-import atexit
-import logging
-import threading
+"""Backwards-compatible facade over the ingest and sink modules.

-from contextlib import contextmanager
-from typing import Any, Iterator
+Historically this module owned a single Neo4j driver used for both the
+cartography temp database and the per-tenant sink database. The port to AWS
+Neptune split those roles: the cartography ingest (temp) database is always
+Neo4j and lives in `api.attack_paths.ingest`; the sink is configurable
+(Neo4j or Neptune) and lives in `api.attack_paths.sink`. This shim preserves
+the public API that `tasks/` and `api/v1/views.py` already depend on, and
+dispatches to the right module by database-name prefix.
+
+A database name starting with `db-tmp-scan-` is a cartography temp DB and
+routes to ingest. Everything else routes to the configured sink.
+"""
+
+from contextlib import AbstractContextManager
+from typing import Any
 from uuid import UUID

-import neo4j
-import neo4j.exceptions
-
+import neo4j  # noqa: F401 - kept for tests that patch api.attack_paths.database.neo4j
+from api.attack_paths import ingest
+from api.attack_paths import sink as sink_module
 from config.env import env
-from django.conf import settings
-
-from api.attack_paths.retryable_session import RetryableSession
-from tasks.jobs.attack_paths.config import (
-    BATCH_SIZE,
-    PROVIDER_RESOURCE_LABEL,
-    get_provider_label,
+from django.conf import (
+    settings,  # noqa: F401 - kept for tests that patch ...database.settings
 )

-# Without this Celery goes crazy with Neo4j logging
-logging.getLogger("neo4j").setLevel(logging.ERROR)
-logging.getLogger("neo4j").propagate = False
-
-SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
-    "ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
-)
-READ_QUERY_TIMEOUT_SECONDS = env.int(
-    "ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
-)
 MAX_CUSTOM_QUERY_NODES = env.int("ATTACK_PATHS_MAX_CUSTOM_QUERY_NODES", default=250)
-# Shorter than CONN_ACQUISITION_TIMEOUT — the driver requires acquisition to be
-# the longer of the two (it may include opening a new connection).
-CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
-CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
-READ_EXCEPTION_CODES = [
-    "Neo.ClientError.Statement.AccessMode",
-    "Neo.ClientError.Procedure.ProcedureNotFound",
-]
-CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."

-# Module-level process-wide driver singleton
-_driver: neo4j.Driver | None = None
-_lock = threading.Lock()
-
-# Base Neo4j functions
-
-
-def get_uri() -> str:
-    host = settings.DATABASES["neo4j"]["HOST"]
-    port = settings.DATABASES["neo4j"]["PORT"]
-    return f"bolt://{host}:{port}"
-
-
-def init_driver() -> neo4j.Driver:
-    global _driver
-    if _driver is not None:
-        return _driver
-
-    with _lock:
-        if _driver is None:
-            uri = get_uri()
-            config = settings.DATABASES["neo4j"]
-
-            driver = neo4j.GraphDatabase.driver(
-                uri,
-                auth=(config["USER"], config["PASSWORD"]),
-                keep_alive=True,
-                max_connection_lifetime=7200,
-                connection_timeout=CONNECTION_TIMEOUT,
-                connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
-                max_connection_pool_size=50,
-            )
-            # Publish the singleton only after connectivity is verified so a
-            # failed probe does not leave an unverified driver behind. Close the
-            # driver on failure so a repeatedly-probed outage cannot leak pools.
-            try:
-                driver.verify_connectivity()
-            except Exception:
-                driver.close()
-                raise
-            _driver = driver
-
-            # Register cleanup handler (only runs once since we're inside the _driver is None block)
-            atexit.register(close_driver)
-
-    return _driver
-
-
-def get_driver() -> neo4j.Driver:
-    return init_driver()
-
-
-def close_driver() -> None:  # TODO: Use it
-    global _driver
-    with _lock:
-        if _driver is not None:
-            try:
-                _driver.close()
-
-            finally:
-                _driver = None
-
-
-@contextmanager
-def get_session(
-    database: str | None = None, default_access_mode: str | None = None
-) -> Iterator[RetryableSession]:
-    session_wrapper: RetryableSession | None = None
-
-    try:
-        session_wrapper = RetryableSession(
-            session_factory=lambda: get_driver().session(
-                database=database, default_access_mode=default_access_mode
-            ),
-            max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
-        )
-        yield session_wrapper
-
-    except neo4j.exceptions.Neo4jError as exc:
-        if (
-            default_access_mode == neo4j.READ_ACCESS
-            and exc.code
-            and exc.code in READ_EXCEPTION_CODES
-        ):
-            message = "Read query not allowed"
-            code = READ_EXCEPTION_CODES[0]
-            raise WriteQueryNotAllowedException(message=message, code=code)
-
-        message = exc.message if exc.message is not None else str(exc)
-
-        if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
-            raise ClientStatementException(message=message, code=exc.code)
-
-        raise GraphDatabaseQueryException(message=message, code=exc.code)
-
-    finally:
-        if session_wrapper is not None:
-            session_wrapper.close()
-
-
-def execute_read_query(
-    database: str,
-    cypher: str,
-    parameters: dict[str, Any] | None = None,
-) -> neo4j.graph.Graph:
-    with get_session(database, default_access_mode=neo4j.READ_ACCESS) as session:
-
-        def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
-            result = tx.run(
-                cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
-            )
-            return result.graph()
-
-        return session.execute_read(_run)
-
-
-def create_database(database: str) -> None:
-    query = "CREATE DATABASE $database IF NOT EXISTS"
-    parameters = {"database": database}
-
-    with get_session() as session:
-        session.run(query, parameters)
-
-
-def drop_database(database: str) -> None:
-    query = f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA"
-
-    with get_session() as session:
-        session.run(query)
-
-
-def drop_subgraph(database: str, provider_id: str) -> int:
-    """
-    Delete all nodes for a provider from the tenant database.
-
-    Deletes relationships then nodes in batches (not `DETACH DELETE`) so a dense
-    provider's graph cannot exceed Neo4j's transaction memory limit.
-    Silently returns 0 if the database doesn't exist.
-    """
-    provider_label = get_provider_label(provider_id)
-    deleted_nodes = 0
-
-    try:
-        with get_session(database) as session:
-            # Phase 1: delete relationships incident to provider nodes in batches.
-            deleted_count = 1
-            while deleted_count > 0:
-                result = session.run(
-                    f"""
-                    MATCH (:`{provider_label}`)-[r]-()
-                    WITH DISTINCT r LIMIT $batch_size
-                    DELETE r
-                    RETURN COUNT(r) AS deleted_rels_count
-                    """,
-                    {"batch_size": BATCH_SIZE},
-                )
-                deleted_count = result.single().get("deleted_rels_count", 0)
-
-            # Phase 2: delete the now relationship-free nodes in batches.
-            deleted_count = 1
-            while deleted_count > 0:
-                result = session.run(
-                    f"""
-                    MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`)
-                    WITH n LIMIT $batch_size
-                    DELETE n
-                    RETURN COUNT(n) AS deleted_nodes_count
-                    """,
-                    {"batch_size": BATCH_SIZE},
-                )
-                deleted_count = result.single().get("deleted_nodes_count", 0)
-                deleted_nodes += deleted_count
-
-    except GraphDatabaseQueryException as exc:
-        if exc.code == "Neo.ClientError.Database.DatabaseNotFound":
-            return 0
-        raise
-
-    return deleted_nodes
-
-
-def has_provider_data(database: str, provider_id: str) -> bool:
-    """
-    Check if any ProviderResource node exists for this provider.
-
-    Returns `False` if the database doesn't exist.
-    """
-    provider_label = get_provider_label(provider_id)
-    query = f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
-
-    try:
-        with get_session(database, default_access_mode=neo4j.READ_ACCESS) as session:
-            result = session.run(query)
-            return result.single() is not None
-
-    except GraphDatabaseQueryException as exc:
-        if exc.code == "Neo.ClientError.Database.DatabaseNotFound":
-            return False
-        raise
-
-
-def clear_cache(database: str) -> None:
-    query = "CALL db.clearQueryCaches()"
-
-    try:
-        with get_session(database) as session:
-            session.run(query)
-
-    except GraphDatabaseQueryException as exc:
-        logging.warning(f"Failed to clear query cache for database `{database}`: {exc}")
-
-
-# Neo4j functions related to Prowler + Cartography
-
-
-def get_database_name(entity_id: str | UUID, temporary: bool = False) -> str:
-    prefix = "tmp-scan" if temporary else "tenant"
-    return f"db-{prefix}-{str(entity_id).lower()}"
+TEMP_DB_PREFIX = "db-tmp-scan-"


 # Exceptions
@@ -272,7 +41,6 @@ class GraphDatabaseQueryException(Exception):
    def __str__(self) -> str:
        if self.code:
            return f"{self.code}: {self.message}"
-
        return self.message


@@ -282,3 +50,177 @@ class WriteQueryNotAllowedException(GraphDatabaseQueryException):

 class ClientStatementException(GraphDatabaseQueryException):
    pass
+
+
+# Routing
+
+
+def _is_ingest_database(database: str | None) -> bool:
+    return bool(database) and database.startswith(TEMP_DB_PREFIX)
+
+
+# Driver lifecycle
+
+
+def init_driver() -> Any:
+    """Initialize the configured sink backend.
+
+    The ingest driver (Neo4j for cartography temp DBs) stays lazy: it is
+    only initialized when a temp-DB operation actually runs, which never
+    happens on API pods.
+    """
+    return sink_module.init()
+
+
+def close_driver() -> None:
+    """Close every driver held by this process."""
+    sink_module.close()
+    ingest.close_driver()
+
+
+def get_driver() -> neo4j.Driver:
+    """Return the sink backend's underlying driver.
+
+    Only meaningful for the Neo4j sink (where the backend has a single Neo4j
+    driver). On Neptune this returns the writer driver. Kept for tests and
+    legacy call-sites; prefer `get_session` for new code.
+    """
+    backend = sink_module.get_backend()
+
+    # Neo4jSink exposes get_driver(); NeptuneSink exposes get_writer()
+    if hasattr(backend, "get_driver"):
+        return backend.get_driver()
+
+    if hasattr(backend, "get_writer"):
+        return backend.get_writer()
+
+    raise RuntimeError("Active sink backend does not expose a driver handle")
+
+
+def verify_connectivity() -> None:
+    """Raise if the configured graph database is unreachable on the API read path.
+
+    Backend-agnostic entry point for the readiness probe: Neo4j verifies its
+    driver, Neptune verifies the reader endpoint.
+    """
+    sink_module.get_backend().verify_connectivity()
+
+
+def verify_scan_databases_available() -> None:
+    """Raise if either graph database needed by an Attack Paths scan is unavailable."""
+    errors: list[str] = []
+    first_error: Exception | None = None
+
+    try:
+        ingest.get_driver().verify_connectivity()
+    except Exception as exc:
+        errors.append(f"ingest Neo4j: {exc}")
+        first_error = exc
+
+    try:
+        get_driver().verify_connectivity()
+    except Exception as exc:
+        errors.append(f"sink {settings.ATTACK_PATHS_SINK_DATABASE}: {exc}")
+        if first_error is None:
+            first_error = exc
+
+    if errors:
+        raise RuntimeError(
+            "Attack Paths graph database unavailable before scan start: "
+            + "; ".join(errors)
+        ) from first_error
+
+
+def get_uri() -> str:
+    """Return the sink URI. Retained for backwards compatibility."""
+    if settings.ATTACK_PATHS_SINK_DATABASE == "neptune":
+        cfg = settings.DATABASES["neptune"]
+        return f"bolt+s://{cfg['WRITER_ENDPOINT']}:{cfg['PORT']}"
+
+    cfg = settings.DATABASES["neo4j"]
+    return f"bolt://{cfg['HOST']}:{cfg['PORT']}"
+
+
+def get_ingest_uri() -> str:
+    """Neo4j URI for the cartography temp (ingest) database, which is always
+    Neo4j regardless of the configured sink."""
+    return ingest.get_uri()
+
+
+# Session API
+
+
+def get_session(
+    database: str | None = None,
+    default_access_mode: str | None = None,
+) -> AbstractContextManager:
+    """Return a session against the right backend.
+
+    - `database` names starting with `db-tmp-scan-` always go to ingest.
+    - No database name → ingest (used for CREATE / DROP DATABASE admin ops).
+    - Any other name → sink.
+    """
+    if _is_ingest_database(database) or database is None:
+        return ingest.get_session(
+            database=database, default_access_mode=default_access_mode
+        )
+
+    return sink_module.get_backend().get_session(
+        database=database, default_access_mode=default_access_mode
+    )
+
+
+def execute_read_query(
+    database: str,
+    cypher: str,
+    parameters: dict[str, Any] | None = None,
+) -> neo4j.graph.Graph:
+    """Read-only query against the sink."""
+    return sink_module.get_backend().execute_read_query(database, cypher, parameters)
+
+
+def create_database(database: str) -> None:
+    """Create a database. Temp DBs always land on ingest (Neo4j).
+
+    On the Neo4j sink, tenant DBs also route to ingest because both drivers
+    connect to the same Neo4j cluster. On the Neptune sink, tenant DB creates
+    are no-ops.
+    """
+    if _is_ingest_database(database):
+        ingest.create_database(database)
+        return
+
+    sink_module.get_backend().create_database(database)
+
+
+def drop_database(database: str) -> None:
+    """Drop a database. Mirrors `create_database` routing."""
+    if _is_ingest_database(database):
+        ingest.drop_database(database)
+        return
+
+    sink_module.get_backend().drop_database(database)
+
+
+def drop_subgraph(database: str, provider_id: str) -> int:
+    return sink_module.get_backend().drop_subgraph(database, provider_id)
+
+
+def has_provider_data(database: str, provider_id: str) -> bool:
+    return sink_module.get_backend().has_provider_data(database, provider_id)
+
+
+def clear_cache(database: str) -> None:
+    if _is_ingest_database(database):
+        ingest.clear_cache(database)
+        return
+
+    sink_module.get_backend().clear_cache(database)
+
+
+# Name helper
+
+
+def get_database_name(entity_id: str | UUID, temporary: bool = False) -> str:
+    prefix = "tmp-scan" if temporary else "tenant"
+    return f"db-{prefix}-{str(entity_id).lower()}"
@@ -0,0 +1,29 @@
+"""Cartography ingest layer.
+
+Public surface for the per-scan Neo4j temp database driver. Implementation
+lives in `api.attack_paths.ingest.driver`.
+"""
+
+from api.attack_paths.ingest.driver import (
+    clear_cache,
+    close_driver,
+    create_database,
+    drop_database,
+    get_driver,
+    get_session,
+    get_uri,
+    init_driver,
+    run_cypher,
+)
+
+__all__ = [
+    "clear_cache",
+    "close_driver",
+    "create_database",
+    "drop_database",
+    "get_driver",
+    "get_session",
+    "get_uri",
+    "init_driver",
+    "run_cypher",
+]
@@ -0,0 +1,187 @@
+"""Cartography ingest driver: per-scan throw-away Neo4j database.
+
+Cartography writes each scan's graph into a throw-away Neo4j database named
+`db-tmp-scan-{scan_uuid}`. This is always Neo4j, regardless of the configured
+sink: Neptune is single-database and cannot host per-scan throw-away
+databases. This module owns the Neo4j driver used for those temp DBs and the
+admin ops they need (CREATE / DROP DATABASE).
+"""
+
+import atexit
+import logging
+import threading
+from collections.abc import Iterator
+from contextlib import contextmanager
+from typing import Any
+
+import neo4j
+import neo4j.exceptions
+from api.attack_paths.retryable_session import RetryableSession
+from config.env import env
+from django.conf import settings
+
+logging.getLogger("neo4j").setLevel(logging.ERROR)
+logging.getLogger("neo4j").propagate = False
+
+SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
+    "ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
+)
+CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
+# TCP connect timeout, ordered below the acquisition timeout so an unreachable
+# host can't pin a worker on a temp-DB op longer than this.
+CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
+MAX_CONNECTION_LIFETIME = env.int("NEO4J_MAX_CONNECTION_LIFETIME", default=7200)
+MAX_CONNECTION_POOL_SIZE = env.int("NEO4J_MAX_CONNECTION_POOL_SIZE", default=50)
+
+_driver: neo4j.Driver | None = None
+_lock = threading.Lock()
+
+
+def _neo4j_config() -> dict:
+    return settings.DATABASES["neo4j"]
+
+
+def get_uri() -> str:
+    """Bolt URI for the Neo4j temp (ingest) database. Always Neo4j."""
+    config = _neo4j_config()
+    host = config["HOST"]
+    port = config["PORT"]
+    if not host or not port:
+        raise RuntimeError(
+            "NEO4J_HOST / NEO4J_PORT must be set to use the attack-paths "
+            "temp database. Workers require Neo4j env even when the sink is Neptune."
+        )
+
+    return f"bolt://{host}:{port}"
+
+
+def init_driver() -> neo4j.Driver:
+    """Initialize the temp-database Neo4j driver. Idempotent."""
+    global _driver
+    if _driver is not None:
+        return _driver
+
+    with _lock:
+        if _driver is None:
+            config = _neo4j_config()
+            _driver = neo4j.GraphDatabase.driver(
+                get_uri(),
+                auth=(config["USER"], config["PASSWORD"]),
+                keep_alive=True,
+                max_connection_lifetime=MAX_CONNECTION_LIFETIME,
+                connection_timeout=CONNECTION_TIMEOUT,
+                connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
+                max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
+            )
+            # Best-effort connectivity check: a Neo4j that is down at boot must
+            # not crash the worker. The driver reconnects lazily on first use.
+            try:
+                _driver.verify_connectivity()
+
+            except Exception:
+                logging.warning(
+                    "Neo4j temp-database unreachable at init; continuing with a "
+                    "lazily-reconnecting driver",
+                    exc_info=True,
+                )
+
+            atexit.register(close_driver)
+
+    return _driver
+
+
+def get_driver() -> neo4j.Driver:
+    return init_driver()
+
+
+def close_driver() -> None:
+    global _driver
+    with _lock:
+        if _driver is not None:
+            try:
+                _driver.close()
+            finally:
+                _driver = None
+
+
+@contextmanager
+def get_session(
+    database: str | None = None,
+    default_access_mode: str | None = None,
+) -> Iterator[RetryableSession]:
+    """Session against the Neo4j temp-database cluster. Used for temp DB sessions
+    and for admin operations (CREATE / DROP DATABASE) when `database` is None."""
+    from api.attack_paths.database import (
+        ClientStatementException,
+        GraphDatabaseQueryException,
+        WriteQueryNotAllowedException,
+    )
+
+    READ_EXCEPTION_CODES = [
+        "Neo.ClientError.Statement.AccessMode",
+        "Neo.ClientError.Procedure.ProcedureNotFound",
+    ]
+    CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
+
+    session_wrapper: RetryableSession | None = None
+    try:
+        session_wrapper = RetryableSession(
+            session_factory=lambda: get_driver().session(
+                database=database, default_access_mode=default_access_mode
+            ),
+            max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
+        )
+        yield session_wrapper
+
+    except neo4j.exceptions.Neo4jError as exc:
+        if (
+            default_access_mode == neo4j.READ_ACCESS
+            and exc.code
+            and exc.code in READ_EXCEPTION_CODES
+        ):
+            raise WriteQueryNotAllowedException(
+                message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
+            )
+
+        message = exc.message if exc.message is not None else str(exc)
+        if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
+            raise ClientStatementException(message=message, code=exc.code)
+        raise GraphDatabaseQueryException(message=message, code=exc.code)
+
+    finally:
+        if session_wrapper is not None:
+            session_wrapper.close()
+
+
+def create_database(database: str) -> None:
+    """Create a database on the Neo4j cluster. Used for temp scan DBs."""
+    with get_session() as session:
+        session.run("CREATE DATABASE $database IF NOT EXISTS", {"database": database})
+
+
+def drop_database(database: str) -> None:
+    """Drop a database on the Neo4j cluster. Used for temp scan DBs."""
+    with get_session() as session:
+        session.run(f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA")
+
+
+def clear_cache(database: str) -> None:
+    """Best-effort cache clear for a Neo4j database."""
+    from api.attack_paths.database import GraphDatabaseQueryException
+
+    try:
+        with get_session(database) as session:
+            session.run("CALL db.clearQueryCaches()")
+
+    except GraphDatabaseQueryException as exc:
+        logging.warning(f"Failed to clear query cache for database `{database}`: {exc}")
+
+
+def run_cypher(
+    database: str | None,
+    cypher: str,
+    parameters: dict[str, Any] | None = None,
+) -> Any:
+    """Execute Cypher directly without the context manager. Thin helper."""
+    with get_session(database) as session:
+        return session.run(cypher, parameters or {})
@@ -1,12 +1,11 @@
-from api.attack_paths.queries.types import (
-    AttackPathsQueryDefinition,
-    AttackPathsQueryParameterDefinition,
-)
 from api.attack_paths.queries.registry import (
    get_queries_for_provider,
    get_query_by_id,
 )
-
+from api.attack_paths.queries.types import (
+    AttackPathsQueryDefinition,
+    AttackPathsQueryParameterDefinition,
+)

 __all__ = [
    "AttackPathsQueryDefinition",
@@ -1,13 +1,14 @@
-from api.attack_paths.queries.types import AttackPathsQueryDefinition
 from api.attack_paths.queries.aws import AWS_QUERIES

+# TODO: drop after Neptune cutover
+from api.attack_paths.queries.aws_deprecated import AWS_DEPRECATED_QUERIES
+from api.attack_paths.queries.types import AttackPathsQueryDefinition

-# Query definitions organized by provider
+# Query definitions for scans synced with the current schema.
 _QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
    "aws": AWS_QUERIES,
 }

-# Flat lookup by query ID for O(1) access
 _QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
    definition.id: definition
    for definitions in _QUERY_DEFINITIONS.values()
@@ -15,11 +16,45 @@ _QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
 }


-def get_queries_for_provider(provider: str) -> list[AttackPathsQueryDefinition]:
-    """Get all attack path queries for a specific provider."""
-    return _QUERY_DEFINITIONS.get(provider, [])
+# TODO: drop after Neptune cutover
+#
+# Query definitions for pre-cutover scans (`AttackPathsScan.is_migrated=False`)
+# whose graph data was written under the previous schema. Both maps expose the
+# same query IDs so the API contract is identical regardless of which set is
+# routed to.
+_DEPRECATED_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
+    "aws": AWS_DEPRECATED_QUERIES,
+}
+
+_DEPRECATED_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
+    definition.id: definition
+    for definitions in _DEPRECATED_QUERY_DEFINITIONS.values()
+    for definition in definitions
+}


-def get_query_by_id(query_id: str) -> AttackPathsQueryDefinition | None:
-    """Get a specific attack path query by its ID."""
-    return _QUERIES_BY_ID.get(query_id)
+def get_queries_for_provider(
+    provider: str,
+    is_migrated: bool = True,
+) -> list[AttackPathsQueryDefinition]:
+    """Get all attack path queries for a provider.
+
+    `is_migrated` selects the catalog: True for scans synced with the current
+    schema, False for pre-cutover scans still using the legacy graph shape.
+    # TODO: drop the `is_migrated` parameter after Neptune cutover
+    """
+    catalog = _QUERY_DEFINITIONS if is_migrated else _DEPRECATED_QUERY_DEFINITIONS
+    return catalog.get(provider, [])
+
+
+def get_query_by_id(
+    query_id: str,
+    is_migrated: bool = True,
+) -> AttackPathsQueryDefinition | None:
+    """Get a specific attack path query by ID.
+
+    `is_migrated` selects the catalog (see `get_queries_for_provider`).
+    # TODO: drop the `is_migrated` parameter after Neptune cutover
+    """
+    by_id = _QUERIES_BY_ID if is_migrated else _DEPRECATED_QUERIES_BY_ID
+    return by_id.get(query_id)
@@ -1,5 +1,4 @@
 import logging
-
 from collections.abc import Callable
 from typing import Any

@@ -0,0 +1,28 @@
+"""Attack-paths sink database layer.
+
+The sink is the persistent store where attack-paths graphs live after a scan
+finishes. Currently selectable between Neo4j (OSS / local dev default) and
+AWS Neptune (hosted dev/staging/prod). Backend is picked by the
+`ATTACK_PATHS_SINK_DATABASE` setting at process init.
+
+This package exposes the public factory API; the implementation lives in
+`api.attack_paths.sink.factory`.
+"""
+
+from api.attack_paths.sink.factory import (
+    SinkBackend,
+    close,
+    get_backend,
+    get_backend_for_name,
+    get_backend_for_scan,
+    init,
+)
+
+__all__ = [
+    "SinkBackend",
+    "close",
+    "get_backend",
+    "get_backend_for_name",
+    "get_backend_for_scan",
+    "init",
+]
@@ -0,0 +1,92 @@
+"""Protocol every sink backend must implement."""
+
+from contextlib import AbstractContextManager
+from typing import Any, Protocol
+
+import neo4j
+
+
+class SinkDatabase(Protocol):
+    """Contract for the persistent attack-paths graph store.
+
+    The `database` argument is an opaque identifier passed through from the
+    legacy `database.py` API surface. On Neo4j it is the per-tenant database
+    name (e.g. `db-tenant-{uuid}`). On Neptune it is ignored (the cluster
+    has a single graph, and isolation is label-based).
+    """
+
+    def init(self) -> None: ...
+
+    def close(self) -> None: ...
+
+    def verify_connectivity(self) -> None:
+        """Raise if the backend the API read path uses is unreachable.
+
+        Neo4j verifies its single driver. Neptune verifies the reader
+        driver (the endpoint the API serves reads from); on single-endpoint
+        clusters the reader aliases the writer, so that path is covered too.
+        Used by the readiness probe; must not block longer than the caller's
+        probe budget.
+        """
+        ...
+
+    def get_session(
+        self,
+        database: str | None = None,
+        default_access_mode: str | None = None,
+    ) -> AbstractContextManager: ...
+
+    def execute_read_query(
+        self,
+        database: str,
+        cypher: str,
+        parameters: dict[str, Any] | None = None,
+    ) -> neo4j.graph.Graph: ...
+
+    def create_database(self, database: str) -> None: ...
+
+    def drop_database(self, database: str) -> None: ...
+
+    def drop_subgraph(self, database: str, provider_id: str) -> int: ...
+
+    def has_provider_data(self, database: str, provider_id: str) -> bool: ...
+
+    def clear_cache(self, database: str) -> None: ...
+
+    def ensure_sync_indexes(self, database: str) -> None:
+        """Create any index needed for the sync write path.
+
+        Called once at the start of each provider sync; must be idempotent.
+        Neo4j creates a `_provider_element_id` index on `_ProviderResource`;
+        Neptune is a no-op (its `~id` lookup needs no index).
+        """
+        ...
+
+    def write_nodes(
+        self,
+        database: str,
+        labels: str,
+        rows: list[dict[str, Any]],
+    ) -> None:
+        """Upsert a batch of nodes into the sink.
+
+        `labels` is a pre-rendered Cypher label string ready to drop after
+        the node variable (e.g. `` `AWSUser`:`_ProviderResource`:`_Tenant_x` ``).
+        Each row carries `provider_element_id` and `props`.
+        """
+        ...
+
+    def write_relationships(
+        self,
+        database: str,
+        rel_type: str,
+        provider_id: str,
+        rows: list[dict[str, Any]],
+    ) -> None:
+        """Upsert a batch of relationships into the sink.
+
+        Each row carries `start_element_id`, `end_element_id`,
+        `provider_element_id` and `props`. `rel_type` is the relationship
+        type (already a valid Cypher identifier).
+        """
+        ...
@@ -0,0 +1,134 @@
+"""Sink backend factory and process-wide handle cache.
+
+Picks the active backend from `settings.ATTACK_PATHS_SINK_DATABASE` at first
+use, holds the active backend plus any secondary backends needed to serve
+scans written under the previous configuration, and tears them all down on
+process shutdown. Imported via `from api.attack_paths import sink as
+sink_module`.
+"""
+
+import threading
+from enum import StrEnum, auto
+
+from api.attack_paths.sink.base import SinkDatabase
+from api.models import AttackPathsScan
+from django.conf import settings
+
+# Backend names
+
+
+class SinkBackend(StrEnum):
+    NEO4J = auto()
+    NEPTUNE = auto()
+
+
+# Backend cache
+
+_backend: SinkDatabase | None = None
+_secondary_backends: dict[SinkBackend, SinkDatabase] = {}
+_lock = threading.Lock()
+
+
+def _resolve_setting() -> SinkBackend:
+    raw = settings.ATTACK_PATHS_SINK_DATABASE.lower()
+    try:
+        return SinkBackend(raw)
+
+    except ValueError:
+        valid = sorted(b.value for b in SinkBackend)
+        raise RuntimeError(
+            f"ATTACK_PATHS_SINK_DATABASE must be one of {valid}; got {raw!r}"
+        )
+
+
+def _build_backend(name: SinkBackend) -> SinkDatabase:
+    if name is SinkBackend.NEO4J:
+        from api.attack_paths.sink.neo4j import Neo4jSink
+
+        return Neo4jSink()
+
+    if name is SinkBackend.NEPTUNE:
+        from api.attack_paths.sink.neptune import NeptuneSink
+
+        return NeptuneSink()
+
+    raise RuntimeError(f"Unknown sink backend {name!r}")
+
+
+# Lifecycle
+
+
+def init(name: SinkBackend | str | None = None) -> SinkDatabase:
+    """Initialize the configured sink backend. Idempotent."""
+    global _backend
+    if _backend is not None:
+        return _backend
+
+    with _lock:
+        if _backend is None:
+            resolved = SinkBackend(name) if name else _resolve_setting()
+            backend = _build_backend(resolved)
+            backend.init()
+            _backend = backend
+
+    return _backend
+
+
+def close() -> None:
+    """Close the active backend and every cached secondary backend."""
+    global _backend
+    with _lock:
+        backends = [
+            b for b in (_backend, *_secondary_backends.values()) if b is not None
+        ]
+        _backend = None
+        _secondary_backends.clear()
+
+    for backend in backends:
+        try:
+            backend.close()
+
+        except Exception:  # pragma: no cover - best-effort
+            pass
+
+
+def get_backend() -> SinkDatabase:
+    """Return the active sink. Initializes on first call."""
+    return init()
+
+
+# Per-scan routing
+
+
+def get_backend_for_scan(scan: AttackPathsScan) -> SinkDatabase:
+    """Route reads by the sink that stores this scan's graph."""
+    raw_backend = getattr(scan, "sink_backend", SinkBackend.NEO4J.value)
+    if not isinstance(raw_backend, str):
+        raw_backend = SinkBackend.NEO4J.value
+    return get_backend_for_name(raw_backend)
+
+
+def get_backend_for_name(name: SinkBackend | str) -> SinkDatabase:
+    """Return the backend named by persisted scan metadata."""
+    resolved = SinkBackend(name)
+    if resolved is _resolve_setting():
+        return get_backend()
+
+    return _build_backend_cached(resolved)
+
+
+def _build_backend_cached(name: SinkBackend) -> SinkDatabase:
+    # TODO: drop after Neptune cutover
+    # Needed only during cutover to serve Neo4j-written scans from a Neptune-
+    # configured API pod (and vice versa). Once every scan is on Neptune,
+    # `get_backend_for_scan` becomes a one-liner returning `get_backend()`.
+    if name in _secondary_backends:
+        return _secondary_backends[name]
+
+    with _lock:
+        if name not in _secondary_backends:
+            backend = _build_backend(name)
+            backend.init()
+            _secondary_backends[name] = backend
+
+    return _secondary_backends[name]
@@ -0,0 +1,454 @@
+"""Neo4j sink implementation.
+
+Owns a Neo4j driver independent from the staging driver. On OSS and local dev
+this is the only sink; on hosted deployments it runs only as a legacy read
+path while phase-1 drains tenant DBs.
+"""
+
+import atexit
+import logging
+import threading
+import time
+from collections.abc import Iterator
+from contextlib import AbstractContextManager, contextmanager
+from typing import Any
+
+import neo4j
+import neo4j.exceptions
+from api.attack_paths.retryable_session import RetryableSession
+from api.attack_paths.sink.base import SinkDatabase
+from config.env import env
+from django.conf import settings
+
+logging.getLogger("neo4j").setLevel(logging.ERROR)
+logging.getLogger("neo4j").propagate = False
+
+logger = logging.getLogger(__name__)
+
+SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
+    "ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
+)
+READ_QUERY_TIMEOUT_SECONDS = env.int(
+    "ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
+)
+CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
+# TCP connect timeout, ordered below the acquisition timeout so an unreachable
+# host can't pin a request or the readiness probe longer than this.
+CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
+MAX_CONNECTION_LIFETIME = env.int("NEO4J_MAX_CONNECTION_LIFETIME", default=7200)
+MAX_CONNECTION_POOL_SIZE = env.int("NEO4J_MAX_CONNECTION_POOL_SIZE", default=50)
+
+READ_EXCEPTION_CODES = [
+    "Neo.ClientError.Statement.AccessMode",
+    "Neo.ClientError.Procedure.ProcedureNotFound",
+]
+CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
+DATABASE_NOT_FOUND_CODE = "Neo.ClientError.Database.DatabaseNotFound"
+
+
+class Neo4jSink(SinkDatabase):
+    """Neo4j-backed sink. Multi-database cluster; tenant isolation is physical."""
+
+    def __init__(self) -> None:
+        self._driver: neo4j.Driver | None = None
+        self._lock = threading.Lock()
+        self._atexit_registered = False
+
+    # Driver
+
+    def _config(self) -> dict:
+        return settings.DATABASES["neo4j"]
+
+    def _uri(self) -> str:
+        cfg = self._config()
+        host = cfg["HOST"]
+        port = cfg["PORT"]
+        if not host or not port:
+            raise RuntimeError(
+                "NEO4J_HOST / NEO4J_PORT must be set when ATTACK_PATHS_SINK_DATABASE=neo4j"
+            )
+        return f"bolt://{host}:{port}"
+
+    def init(self) -> neo4j.Driver:
+        if self._driver is not None:
+            return self._driver
+        with self._lock:
+            if self._driver is None:
+                cfg = self._config()
+                self._driver = neo4j.GraphDatabase.driver(
+                    self._uri(),
+                    auth=(cfg["USER"], cfg["PASSWORD"]),
+                    keep_alive=True,
+                    max_connection_lifetime=MAX_CONNECTION_LIFETIME,
+                    connection_timeout=CONNECTION_TIMEOUT,
+                    connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
+                    max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
+                )
+                # Eager connectivity check is best-effort:
+                # A Neo4j that is down at boot must not crash the process, same degradation model as Postgres
+                # The driver reconnects lazily on first use
+                # /health/ready surfaces the outage until it recovers
+                try:
+                    self._driver.verify_connectivity()
+
+                except Exception:
+                    logger.warning(
+                        "Neo4j sink unreachable at init; continuing with a lazily-reconnecting driver",
+                        exc_info=True,
+                    )
+
+                if not self._atexit_registered:
+                    atexit.register(self.close)
+                    self._atexit_registered = True
+        return self._driver
+
+    def _get_driver(self) -> neo4j.Driver:
+        return self.init()
+
+    def verify_connectivity(self) -> None:
+        self._get_driver().verify_connectivity()
+
+    def close(self) -> None:
+        with self._lock:
+            if self._driver is not None:
+                try:
+                    self._driver.close()
+                finally:
+                    self._driver = None
+
+    # Sessions
+
+    @contextmanager
+    def get_session(
+        self,
+        database: str | None = None,
+        default_access_mode: str | None = None,
+    ) -> Iterator[RetryableSession]:
+        from api.attack_paths.database import (
+            ClientStatementException,
+            GraphDatabaseQueryException,
+            WriteQueryNotAllowedException,
+        )
+
+        session_wrapper: RetryableSession | None = None
+        try:
+            session_wrapper = RetryableSession(
+                session_factory=lambda: self._get_driver().session(
+                    database=database, default_access_mode=default_access_mode
+                ),
+                max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
+            )
+            yield session_wrapper
+
+        except neo4j.exceptions.Neo4jError as exc:
+            if (
+                default_access_mode == neo4j.READ_ACCESS
+                and exc.code
+                and exc.code in READ_EXCEPTION_CODES
+            ):
+                raise WriteQueryNotAllowedException(
+                    message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
+                )
+
+            message = exc.message if exc.message is not None else str(exc)
+            if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
+                raise ClientStatementException(message=message, code=exc.code)
+            raise GraphDatabaseQueryException(message=message, code=exc.code)
+
+        finally:
+            if session_wrapper is not None:
+                session_wrapper.close()
+
+    # Operations
+
+    def execute_read_query(
+        self,
+        database: str,
+        cypher: str,
+        parameters: dict[str, Any] | None = None,
+    ) -> neo4j.graph.Graph:
+        with self.get_session(
+            database, default_access_mode=neo4j.READ_ACCESS
+        ) as session:
+
+            def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
+                result = tx.run(
+                    cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
+                )
+                return result.graph()
+
+            return session.execute_read(_run)
+
+    def create_database(self, database: str) -> None:
+        with self.get_session() as session:
+            session.run(
+                "CREATE DATABASE $database IF NOT EXISTS", {"database": database}
+            )
+
+    def drop_database(self, database: str) -> None:
+        with self.get_session() as session:
+            session.run(f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA")
+
+    def drop_subgraph(self, database: str, provider_id: str) -> int:
+        """Delete all nodes for a provider from a tenant database, batched.
+
+        Deletes relationships then nodes in batches (not `DETACH DELETE`) so a
+        dense provider's graph cannot exceed Neo4j's transaction memory limit.
+        Silently returns 0 if the database doesn't exist.
+        """
+        from api.attack_paths.database import GraphDatabaseQueryException
+        from tasks.jobs.attack_paths.config import (
+            BATCH_SIZE,
+            PROVIDER_RESOURCE_LABEL,
+            get_provider_label,
+        )
+
+        provider_label = get_provider_label(provider_id)
+        deleted_nodes = 0
+        deleted_relationships = 0
+        relationship_batches = 0
+        node_batches = 0
+        drop_t0 = time.perf_counter()
+
+        logger.info(
+            "Dropping provider graph from Neo4j sink database %s "
+            "(provider=%s, provider_label=%s)",
+            database,
+            provider_id,
+            provider_label,
+        )
+
+        try:
+            logger.info(
+                "Opening Neo4j sink session for provider graph drop "
+                "(database=%s, provider=%s)",
+                database,
+                provider_id,
+            )
+            with self.get_session(database) as session:
+                logger.info(
+                    "Opened Neo4j sink session for provider graph drop "
+                    "(database=%s, provider=%s)",
+                    database,
+                    provider_id,
+                )
+                # Phase 1: delete relationships incident to provider nodes in
+                # batches. The undirected pattern matches an edge between two
+                # provider nodes from both ends, so `DISTINCT r` dedupes it to
+                # delete a full batch of unique relationships each round.
+                deleted_count = 1
+                while deleted_count > 0:
+                    next_batch = relationship_batches + 1
+                    logger.info(
+                        "Deleting relationship batch from Neo4j sink database %s "
+                        "(provider=%s, batch=%s, total_rels=%s, elapsed=%.3fs)",
+                        database,
+                        provider_id,
+                        next_batch,
+                        deleted_relationships,
+                        time.perf_counter() - drop_t0,
+                    )
+                    result = session.run(
+                        f"""
+                        MATCH (:`{provider_label}`)-[r]-()
+                        WITH DISTINCT r LIMIT $batch_size
+                        DELETE r
+                        RETURN COUNT(r) AS deleted_rels_count
+                        """,
+                        {"batch_size": BATCH_SIZE},
+                    )
+                    deleted_count = result.single().get("deleted_rels_count", 0)
+                    if deleted_count > 0:
+                        relationship_batches += 1
+                        deleted_relationships += deleted_count
+                        logger.info(
+                            "Deleted relationship batch from Neo4j sink database %s "
+                            "(provider=%s, batch=%s, deleted_rels=%s, "
+                            "total_rels=%s, elapsed=%.3fs)",
+                            database,
+                            provider_id,
+                            relationship_batches,
+                            deleted_count,
+                            deleted_relationships,
+                            time.perf_counter() - drop_t0,
+                        )
+
+                # Phase 2: delete the now relationship-free nodes in batches.
+                deleted_count = 1
+                while deleted_count > 0:
+                    next_batch = node_batches + 1
+                    logger.info(
+                        "Deleting node batch from Neo4j sink database %s "
+                        "(provider=%s, batch=%s, total_nodes=%s, elapsed=%.3fs)",
+                        database,
+                        provider_id,
+                        next_batch,
+                        deleted_nodes,
+                        time.perf_counter() - drop_t0,
+                    )
+                    result = session.run(
+                        f"""
+                        MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`)
+                        WITH n LIMIT $batch_size
+                        DELETE n
+                        RETURN COUNT(n) AS deleted_nodes_count
+                        """,
+                        {"batch_size": BATCH_SIZE},
+                    )
+                    deleted_count = result.single().get("deleted_nodes_count", 0)
+                    if deleted_count > 0:
+                        node_batches += 1
+                        deleted_nodes += deleted_count
+                        logger.info(
+                            "Deleted node batch from Neo4j sink database %s "
+                            "(provider=%s, batch=%s, deleted_nodes=%s, "
+                            "total_nodes=%s, elapsed=%.3fs)",
+                            database,
+                            provider_id,
+                            node_batches,
+                            deleted_count,
+                            deleted_nodes,
+                            time.perf_counter() - drop_t0,
+                        )
+
+        except GraphDatabaseQueryException as exc:
+            if exc.code == DATABASE_NOT_FOUND_CODE:
+                logger.info(
+                    "Skipped provider graph drop from Neo4j sink database %s "
+                    "(provider=%s, reason=database_not_found, elapsed=%.3fs)",
+                    database,
+                    provider_id,
+                    time.perf_counter() - drop_t0,
+                )
+                return 0
+            raise
+
+        logger.info(
+            "Finished dropping provider graph from Neo4j sink database %s "
+            "(provider=%s, relationship_batches=%s, deleted_rels=%s, "
+            "node_batches=%s, deleted_nodes=%s, elapsed=%.3fs)",
+            database,
+            provider_id,
+            relationship_batches,
+            deleted_relationships,
+            node_batches,
+            deleted_nodes,
+            time.perf_counter() - drop_t0,
+        )
+        return deleted_nodes
+
+    def has_provider_data(self, database: str, provider_id: str) -> bool:
+        from api.attack_paths.database import GraphDatabaseQueryException
+        from tasks.jobs.attack_paths.config import (
+            PROVIDER_RESOURCE_LABEL,
+            get_provider_label,
+        )
+
+        provider_label = get_provider_label(provider_id)
+        query = (
+            f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
+        )
+        try:
+            with self.get_session(
+                database, default_access_mode=neo4j.READ_ACCESS
+            ) as session:
+                result = session.run(query)
+                return result.single() is not None
+
+        except GraphDatabaseQueryException as exc:
+            if exc.code == DATABASE_NOT_FOUND_CODE:
+                return False
+            raise
+
+    def clear_cache(self, database: str) -> None:
+        from api.attack_paths.database import GraphDatabaseQueryException
+
+        try:
+            with self.get_session(database) as session:
+                session.run("CALL db.clearQueryCaches()")
+        except GraphDatabaseQueryException as exc:
+            logger.warning(
+                f"Failed to clear query cache for database `{database}`: {exc}"
+            )
+
+    # Sync write path
+
+    def ensure_sync_indexes(self, database: str) -> None:
+        """Create the `_provider_element_id` lookup index on `_ProviderResource`.
+
+        Every synced node carries the `_ProviderResource` label, so a single
+        index covers both node-upserts and relationship endpoint MATCHes.
+        Without this index the rel sync degrades to a label scan per row and
+        large provider syncs become unworkable.
+        """
+        from tasks.jobs.attack_paths.config import (
+            PROVIDER_ELEMENT_ID_PROPERTY,
+            PROVIDER_RESOURCE_LABEL,
+        )
+
+        query = (
+            f"CREATE INDEX provider_element_id_idx IF NOT EXISTS "
+            f"FOR (n:`{PROVIDER_RESOURCE_LABEL}`) "
+            f"ON (n.`{PROVIDER_ELEMENT_ID_PROPERTY}`)"
+        )
+        with self.get_session(database) as session:
+            session.run(query).consume()
+
+    def write_nodes(
+        self,
+        database: str,
+        labels: str,
+        rows: list[dict[str, Any]],
+    ) -> None:
+        if not rows:
+            return
+        from tasks.jobs.attack_paths.config import (
+            PROVIDER_ELEMENT_ID_PROPERTY,
+            PROVIDER_RESOURCE_LABEL,
+        )
+
+        query = f"""
+            UNWIND $rows AS row
+            MERGE (n:`{PROVIDER_RESOURCE_LABEL}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}})
+            SET n:{labels}
+            SET n += row.props
+        """
+        with self.get_session(database) as session:
+            session.run(query, {"rows": rows}).consume()
+
+    def write_relationships(
+        self,
+        database: str,
+        rel_type: str,
+        provider_id: str,
+        rows: list[dict[str, Any]],
+    ) -> None:
+        if not rows:
+            return
+        from tasks.jobs.attack_paths.config import (
+            PROVIDER_ELEMENT_ID_PROPERTY,
+            PROVIDER_RESOURCE_LABEL,
+            get_provider_label,
+        )
+
+        provider_label = get_provider_label(provider_id)
+        query = f"""
+            UNWIND $rows AS row
+            MATCH (s:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.start_element_id}})
+            MATCH (t:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.end_element_id}})
+            MERGE (s)-[r:`{rel_type}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}}]->(t)
+            SET r += row.props
+        """
+        with self.get_session(database) as session:
+            session.run(query, {"rows": rows}).consume()
+
+    # For compatibility with test harnesses that patch the concrete driver
+    def get_driver(self) -> neo4j.Driver:
+        return self._get_driver()
+
+
+# Helper for tests / external callers that want a writer session specifically
+def get_read_session(
+    sink: Neo4jSink, database: str
+) -> AbstractContextManager[RetryableSession]:
+    return sink.get_session(database, default_access_mode=neo4j.READ_ACCESS)
@@ -0,0 +1,524 @@
+"""AWS Neptune sink implementation.
+
+Dual Bolt drivers: one against the writer endpoint for workers, one against
+the reader endpoint for the API read path. If `NEPTUNE_READER_ENDPOINT` is
+unset the reader falls back to the writer driver so single-node clusters work.
+
+Neptune is single-database. The `database` argument on the SinkDatabase
+protocol is ignored; tenant / provider isolation is enforced by labels that
+the sync step already writes on every node (see tasks/jobs/attack_paths/sync.py).
+
+SigV4 auth lives at the bottom of this file as `neptune_auth_provider`. The
+neo4j driver invokes the returned callable on each token refresh.
+"""
+
+import atexit
+import datetime
+import json
+import logging
+import threading
+import time
+from collections.abc import Callable, Iterator
+from contextlib import contextmanager
+from typing import Any
+from urllib.parse import urlsplit
+
+import neo4j
+import neo4j.exceptions
+from api.attack_paths.retryable_session import RetryableSession
+from api.attack_paths.sink.base import SinkDatabase
+from botocore.auth import SigV4Auth
+from botocore.awsrequest import AWSRequest
+from botocore.session import Session as BotoSession
+from config.env import env
+from django.conf import settings
+from neo4j.auth_management import AuthManagers, ExpiringAuth
+
+logging.getLogger("neo4j").setLevel(logging.ERROR)
+logging.getLogger("neo4j").propagate = False
+
+logger = logging.getLogger(__name__)
+
+SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
+    "ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
+)
+READ_QUERY_TIMEOUT_SECONDS = env.int(
+    "ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
+)
+# Neptune serverless cold-start can be >30s; give the driver room
+CONN_ACQUISITION_TIMEOUT = env.int("NEPTUNE_CONN_ACQUISITION_TIMEOUT", default=60)
+# TCP connect timeout, ordered below the acquisition timeout so an unreachable
+# endpoint can't pin a request or the readiness probe longer than this. Kept
+# generous: cold-start delays query execution, not the socket connect.
+CONNECTION_TIMEOUT = env.int("NEPTUNE_CONNECTION_TIMEOUT", default=10)
+# Roll connections hourly so SigV4 rotations and cert refreshes don't strand long-lived pool entries
+MAX_CONNECTION_LIFETIME = env.int("NEPTUNE_MAX_CONNECTION_LIFETIME", default=3600)
+MAX_CONNECTION_POOL_SIZE = env.int("NEPTUNE_MAX_CONNECTION_POOL_SIZE", default=50)
+
+READ_EXCEPTION_CODES = [
+    "Neo.ClientError.Statement.AccessMode",
+    "Neo.ClientError.Procedure.ProcedureNotFound",
+]
+CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
+
+# Refresh 60s before the 5-minute SigV4 window closes
+SIGV4_TOKEN_LIFETIME_MINUTES = 4
+
+
+class NeptuneSink(SinkDatabase):
+    """Neptune-backed sink. Single database; isolation is label-based."""
+
+    def __init__(self) -> None:
+        self._writer: neo4j.Driver | None = None
+        self._reader: neo4j.Driver | None = None
+        self._lock = threading.Lock()
+        self._atexit_registered = False
+
+    # Config
+
+    def _config(self) -> dict:
+        return settings.DATABASES["neptune"]
+
+    def _bolt_uri(self, endpoint: str, port: str) -> str:
+        return f"bolt+s://{endpoint}:{port}"
+
+    def _https_url(self, endpoint: str, port: str) -> str:
+        return f"https://{endpoint}:{port}"
+
+    def _build_driver(self, endpoint: str) -> neo4j.Driver:
+        cfg = self._config()
+        port = cfg["PORT"]
+        region = cfg["REGION"]
+        if not endpoint or not region:
+            raise RuntimeError(
+                "NEPTUNE_WRITER_ENDPOINT and AWS_REGION must be set when "
+                "ATTACK_PATHS_SINK_DATABASE=neptune"
+            )
+        return neo4j.GraphDatabase.driver(
+            self._bolt_uri(endpoint, port),
+            auth=AuthManagers.bearer(
+                neptune_auth_provider(region, self._https_url(endpoint, port))
+            ),
+            keep_alive=True,
+            max_connection_lifetime=MAX_CONNECTION_LIFETIME,
+            connection_timeout=CONNECTION_TIMEOUT,
+            connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
+            max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
+            max_transaction_retry_time=0,
+        )
+
+    # Lifecycle
+
+    def init(self) -> None:
+        if self._writer is not None:
+            return
+        with self._lock:
+            if self._writer is None:
+                cfg = self._config()
+                writer_endpoint = cfg["WRITER_ENDPOINT"]
+                reader_endpoint = cfg["READER_ENDPOINT"] or writer_endpoint
+
+                # Eager connectivity checks are best-effort
+                # A Neptune that is down at boot must not crash the process, same degradation model as Postgres
+                # Drivers reconnect lazily on first use
+                # /health/ready surfaces the outage until it recovers
+                self._writer = self._build_driver(writer_endpoint)
+                self._verify_best_effort(self._writer, "writer")
+
+                if reader_endpoint == writer_endpoint:
+                    self._reader = self._writer
+
+                else:
+                    self._reader = self._build_driver(reader_endpoint)
+                    self._verify_best_effort(self._reader, "reader")
+
+                if not self._atexit_registered:
+                    atexit.register(self.close)
+                    self._atexit_registered = True
+
+    def close(self) -> None:
+        with self._lock:
+            # `Driver.close()` is idempotent, so closing the same driver twice
+            # (when reader aliases writer on single-endpoint configs) is safe
+            for driver in (self._reader, self._writer):
+                if driver is None:
+                    continue
+                try:
+                    driver.close()
+                except Exception:  # pragma: no cover - best-effort
+                    pass
+            self._writer = None
+            self._reader = None
+
+    # Sessions
+
+    def _get_writer(self) -> neo4j.Driver:
+        self.init()
+        assert self._writer is not None
+        return self._writer
+
+    def _get_reader(self) -> neo4j.Driver:
+        self.init()
+        assert self._reader is not None
+        return self._reader
+
+    @staticmethod
+    def _verify_best_effort(driver: neo4j.Driver, role: str) -> None:
+        try:
+            driver.verify_connectivity()
+
+        except Exception:
+            logger.warning(
+                "Neptune %s endpoint unreachable at init; continuing with a lazily-reconnecting driver",
+                role,
+                exc_info=True,
+            )
+
+    def verify_connectivity(self) -> None:
+        # The API read path uses the reader driver
+        # On single-endpoint clusters it aliases the writer, so this also covers the writer
+        # A writer-only outage is a workers' concern (no HTTP probe there) and deliberately does not fail API readiness
+        self._get_reader().verify_connectivity()
+
+    @contextmanager
+    def get_session(
+        self,
+        database: str | None = None,  # noqa: ARG002 - ignored on Neptune
+        default_access_mode: str | None = None,
+    ) -> Iterator[RetryableSession]:
+        from api.attack_paths.database import (
+            ClientStatementException,
+            GraphDatabaseQueryException,
+            WriteQueryNotAllowedException,
+        )
+
+        driver = (
+            self._get_reader()
+            if default_access_mode == neo4j.READ_ACCESS
+            else self._get_writer()
+        )
+
+        session_wrapper: RetryableSession | None = None
+        try:
+            session_wrapper = RetryableSession(
+                session_factory=lambda: driver.session(
+                    default_access_mode=default_access_mode
+                ),
+                max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
+            )
+            yield session_wrapper
+
+        except neo4j.exceptions.Neo4jError as exc:
+            if (
+                default_access_mode == neo4j.READ_ACCESS
+                and exc.code
+                and exc.code in READ_EXCEPTION_CODES
+            ):
+                raise WriteQueryNotAllowedException(
+                    message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
+                )
+
+            message = exc.message if exc.message is not None else str(exc)
+            if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
+                raise ClientStatementException(message=message, code=exc.code)
+            raise GraphDatabaseQueryException(message=message, code=exc.code)
+
+        finally:
+            if session_wrapper is not None:
+                session_wrapper.close()
+
+    # Operations
+
+    def execute_read_query(
+        self,
+        database: str,  # noqa: ARG002 - ignored on Neptune
+        cypher: str,
+        parameters: dict[str, Any] | None = None,
+    ) -> neo4j.graph.Graph:
+        with self.get_session(default_access_mode=neo4j.READ_ACCESS) as session:
+
+            def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
+                result = tx.run(
+                    cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
+                )
+                return result.graph()
+
+            return session.execute_read(_run)
+
+    def create_database(self, database: str) -> None:  # noqa: ARG002
+        # Neptune clusters are single-database; there is nothing to create.
+        return None
+
+    def drop_database(self, database: str) -> None:  # noqa: ARG002
+        # Neptune clusters are single-database; there is nothing to drop.
+        return None
+
+    def drop_subgraph(self, database: str, provider_id: str) -> int:  # noqa: ARG002
+        """Delete a provider's subgraph in two bounded phases.
+
+        Neptune write transactions are capped at ~2 minutes. A naive
+        `DETACH DELETE` on a label-scanned batch grows unbounded with graph
+        density (one node can drag thousands of relationships into the same
+        transaction). Instead:
+
+        1. Delete relationships incident to provider nodes, one fixed-size
+           batch per transaction.
+        2. Delete the now-orphaned nodes, one fixed-size batch per transaction.
+
+        Each transaction does work proportional to `batch_size`, never to the
+        graph's branching factor.
+        """
+        from tasks.jobs.attack_paths.config import (
+            BATCH_SIZE,
+            PROVIDER_RESOURCE_LABEL,
+            get_provider_label,
+        )
+
+        provider_label = get_provider_label(provider_id)
+        deleted_relationships = 0
+        relationship_batches = 0
+        node_batches = 0
+        drop_t0 = time.perf_counter()
+
+        logger.info(
+            "Dropping provider graph from Neptune sink "
+            "(provider=%s, provider_label=%s)",
+            provider_id,
+            provider_label,
+        )
+
+        logger.info(
+            "Opening Neptune writer session for provider graph drop (provider=%s)",
+            provider_id,
+        )
+        with self.get_session() as session:
+            logger.info(
+                "Opened Neptune writer session for provider graph drop (provider=%s)",
+                provider_id,
+            )
+            while True:
+                next_batch = relationship_batches + 1
+                logger.info(
+                    "Deleting relationship batch from Neptune sink "
+                    "(provider=%s, batch=%s, total_rels=%s, elapsed=%.3fs)",
+                    provider_id,
+                    next_batch,
+                    deleted_relationships,
+                    time.perf_counter() - drop_t0,
+                )
+                result = session.run(
+                    f"""
+                    MATCH (:`{provider_label}`)-[r]-()
+                    WITH DISTINCT r LIMIT $batch_size
+                    DELETE r
+                    RETURN COUNT(r) AS deleted_rels_count
+                    """,
+                    {"batch_size": BATCH_SIZE},
+                )
+                record = result.single()
+                deleted_rels = (record["deleted_rels_count"] if record else 0) or 0
+                if deleted_rels == 0:
+                    break
+                relationship_batches += 1
+                deleted_relationships += deleted_rels
+                logger.info(
+                    "Deleted relationship batch from Neptune sink "
+                    "(provider=%s, batch=%s, deleted_rels=%s, total_rels=%s, "
+                    "elapsed=%.3fs)",
+                    provider_id,
+                    relationship_batches,
+                    deleted_rels,
+                    deleted_relationships,
+                    time.perf_counter() - drop_t0,
+                )
+
+            deleted_nodes = 0
+            while True:
+                next_batch = node_batches + 1
+                logger.info(
+                    "Deleting node batch from Neptune sink "
+                    "(provider=%s, batch=%s, total_nodes=%s, elapsed=%.3fs)",
+                    provider_id,
+                    next_batch,
+                    deleted_nodes,
+                    time.perf_counter() - drop_t0,
+                )
+                result = session.run(
+                    f"""
+                    MATCH (n:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}`)
+                    WITH n LIMIT $batch_size
+                    DELETE n
+                    RETURN COUNT(n) AS deleted_nodes_count
+                    """,
+                    {"batch_size": BATCH_SIZE},
+                )
+                record = result.single()
+                deleted = (record["deleted_nodes_count"] if record else 0) or 0
+                if deleted == 0:
+                    break
+                node_batches += 1
+                deleted_nodes += deleted
+                logger.info(
+                    "Deleted node batch from Neptune sink "
+                    "(provider=%s, batch=%s, deleted_nodes=%s, total_nodes=%s, "
+                    "elapsed=%.3fs)",
+                    provider_id,
+                    node_batches,
+                    deleted,
+                    deleted_nodes,
+                    time.perf_counter() - drop_t0,
+                )
+
+        logger.info(
+            "Finished dropping provider graph from Neptune sink "
+            "(provider=%s, relationship_batches=%s, deleted_rels=%s, "
+            "node_batches=%s, deleted_nodes=%s, elapsed=%.3fs)",
+            provider_id,
+            relationship_batches,
+            deleted_relationships,
+            node_batches,
+            deleted_nodes,
+            time.perf_counter() - drop_t0,
+        )
+        return deleted_nodes
+
+    def has_provider_data(self, database: str, provider_id: str) -> bool:  # noqa: ARG002
+        from tasks.jobs.attack_paths.config import (
+            PROVIDER_RESOURCE_LABEL,
+            get_provider_label,
+        )
+
+        provider_label = get_provider_label(provider_id)
+        query = (
+            f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
+        )
+        with self.get_session(default_access_mode=neo4j.READ_ACCESS) as session:
+            result = session.run(query)
+            return result.single() is not None
+
+    def clear_cache(self, database: str) -> None:  # noqa: ARG002
+        # Neptune has no user-facing cache-clear procedure; no-op.
+        return None
+
+    # Sync write path
+
+    def ensure_sync_indexes(self, database: str) -> None:  # noqa: ARG002
+        # Neptune routes node and relationship lookups through `~id`, which is the cluster's primary key
+        # No additional index is needed or supported
+        return None
+
+    def write_nodes(
+        self,
+        database: str,  # noqa: ARG002
+        labels: str,
+        rows: list[dict[str, Any]],
+    ) -> None:
+        if not rows:
+            return
+        from tasks.jobs.attack_paths.config import (
+            PROVIDER_ELEMENT_ID_PROPERTY,
+            PROVIDER_RESOURCE_LABEL,
+        )
+
+        # MERGE on `~id` is the documented and engine-optimized idempotent
+        # upsert pattern for Neptune openCypher. The label inside the MERGE
+        # matters: Neptune assigns a default `vertex` label to any node
+        # created without an explicit one, so we pin `_ProviderResource`
+        # (which every synced node carries anyway) at MERGE-time. Additional
+        # labels are added after
+        #
+        # We also write `_provider_element_id` as a regular property so
+        # non-sync code (drop_subgraph, query helpers) keeps a stable contract
+        # that doesn't know about `~id`
+        query = f"""
+            UNWIND $rows AS row
+            MERGE (n:`{PROVIDER_RESOURCE_LABEL}` {{`~id`: row.provider_element_id}})
+            SET n:{labels}
+            SET n += row.props
+            SET n.`{PROVIDER_ELEMENT_ID_PROPERTY}` = row.provider_element_id
+        """
+        with self.get_session() as session:
+            session.run(query, {"rows": rows}).consume()
+
+    def write_relationships(
+        self,
+        database: str,  # noqa: ARG002
+        rel_type: str,
+        provider_id: str,  # noqa: ARG002 - encoded in start/end `~id` already
+        rows: list[dict[str, Any]],
+    ) -> None:
+        if not rows:
+            return
+        from tasks.jobs.attack_paths.config import PROVIDER_ELEMENT_ID_PROPERTY
+
+        # `id(n) = $value` is Neptune's parameterized fast path; both endpoint
+        # MATCHes resolve in O(1) via the system `~id`, so per-row work stays
+        # bounded regardless of batch size
+        query = f"""
+            UNWIND $rows AS row
+            MATCH (s) WHERE id(s) = row.start_element_id
+            MATCH (e) WHERE id(e) = row.end_element_id
+            MERGE (s)-[r:`{rel_type}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}}]->(e)
+            SET r += row.props
+        """
+        with self.get_session() as session:
+            session.run(query, {"rows": rows}).consume()
+
+    # Test helpers
+
+    def get_writer(self) -> neo4j.Driver:
+        return self._get_writer()
+
+    def get_reader(self) -> neo4j.Driver:
+        return self._get_reader()
+
+
+# SigV4 auth provider
+
+
+class _NeptuneAuthToken(neo4j.Auth):
+    """Neo4j Auth backed by a SigV4-signed GET to `/opencypher`."""
+
+    def __init__(self, region: str, url: str) -> None:
+        session = BotoSession()
+        credentials = session.get_credentials()
+        if credentials is None:
+            raise RuntimeError(
+                "No AWS credentials available for Neptune SigV4 signing. "
+                "Ensure the boto3 credential chain can resolve."
+            )
+        credentials = credentials.get_frozen_credentials()
+
+        request = AWSRequest(method="GET", url=url + "/opencypher")
+        # SigV4 canonical Host must carry the real `host:port`
+        # Neptune runs on a non-default port (8182), so `.hostname` would drop it and break signing
+        request.headers.add_header("Host", urlsplit(url).netloc)
+        SigV4Auth(credentials, "neptune-db", region).add_auth(request)
+
+        auth_obj = {
+            header: request.headers[header]
+            for header in (
+                "Authorization",
+                "X-Amz-Date",
+                "X-Amz-Security-Token",
+                "Host",
+            )
+            if header in request.headers
+        }
+        auth_obj["HttpMethod"] = "GET"
+
+        super().__init__("basic", "username", json.dumps(auth_obj))
+
+
+def neptune_auth_provider(region: str, https_url: str) -> Callable[[], ExpiringAuth]:
+    """Return a callable the neo4j driver can invoke to refresh credentials."""
+
+    def _provider() -> ExpiringAuth:
+        token = _NeptuneAuthToken(region, https_url)
+        expires_at = (
+            datetime.datetime.now(datetime.UTC)
+            + datetime.timedelta(minutes=SIGV4_TOKEN_LIFETIME_MINUTES)
+        ).timestamp()
+        return ExpiringAuth(auth=token, expires_at=expires_at)
+
+    return _provider
@@ -1,12 +1,11 @@
 import logging
-
-from typing import Any, Iterable
+from collections.abc import Iterable
+from typing import Any

 import neo4j
-
-from rest_framework.exceptions import APIException, PermissionDenied, ValidationError
-
-from api.attack_paths import database as graph_database, AttackPathsQueryDefinition
+from api.attack_paths import AttackPathsQueryDefinition
+from api.attack_paths import database as graph_database
+from api.attack_paths import sink as sink_module
 from api.attack_paths.cypher_sanitizer import (
    inject_provider_label,
    validate_custom_query,
@@ -16,7 +15,10 @@ from api.attack_paths.queries.schema import (
    RAW_SCHEMA_URL,
    get_cartography_schema_query,
 )
+from api.models import AttackPathsScan
 from config.custom_logging import BackendLogger
+from config.env import env
+from rest_framework.exceptions import APIException, PermissionDenied, ValidationError
 from tasks.jobs.attack_paths.config import (
    INTERNAL_LABELS,
    INTERNAL_PROPERTIES,
@@ -27,6 +29,10 @@ from tasks.jobs.attack_paths.config import (
 logger = logging.getLogger(BackendLogger.API)


+def _custom_query_timeout_ms() -> int:
+    return env.int("ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30) * 1000
+
+
 # Predefined query helpers


@@ -103,13 +109,13 @@ def execute_query(
    definition: AttackPathsQueryDefinition,
    parameters: dict[str, Any],
    provider_id: str,
+    scan: AttackPathsScan,
 ) -> dict[str, Any]:
    try:
-        graph = graph_database.execute_read_query(
-            database=database_name,
-            cypher=definition.cypher,
-            parameters=parameters,
-        )
+        # TODO: drop after Neptune cutover
+        # Route reads by the scan row's recorded sink, not by current settings.
+        backend = sink_module.get_backend_for_scan(scan)
+        graph = backend.execute_read_query(database_name, definition.cypher, parameters)
        return _serialize_graph(graph, provider_id)

    except graph_database.WriteQueryNotAllowedException:
@@ -143,22 +149,31 @@ def execute_custom_query(
    database_name: str,
    cypher: str,
    provider_id: str,
+    scan: AttackPathsScan,
 ) -> dict[str, Any]:
    # Defense-in-depth for custom queries:
-    # 1. neo4j.READ_ACCESS — prevents mutations at the driver level
-    # 2. inject_provider_label() — regex-based label injection scopes node patterns
-    # 3. _serialize_graph() — post-query filter drops nodes without the provider label
+    # 1. `neo4j.READ_ACCESS` — prevents mutations at the driver level
+    # 2. `inject_provider_label()` — regex-based label injection scopes node patterns
+    # 3. `_serialize_graph()` — post-query filter drops nodes without the provider label
+    # 4. `USING QUERY:TIMEOUTMILLISECONDS` on Neptune — server-side runaway cutoff
    #
    # Layer 2 is best-effort (regex can't fully parse Cypher);
    # layer 3 is the safety net that guarantees provider isolation.
    validate_custom_query(cypher)
    cypher = inject_provider_label(cypher, provider_id)

+    # TODO: drop after Neptune cutover
+    backend = sink_module.get_backend_for_scan(scan)
+
+    # Neptune enforces a cluster-level query timeout; prepending the hint
+    # makes the limit explicit and matches the client-side read timeout.
+    # Applies only when the scan's graph lives in Neptune.
+    if getattr(scan, "sink_backend", None) == "neptune":
+        timeout_ms = _custom_query_timeout_ms()
+        cypher = f"USING QUERY:TIMEOUTMILLISECONDS {timeout_ms}\n{cypher}"
+
    try:
-        graph = graph_database.execute_read_query(
-            database=database_name,
-            cypher=cypher,
-        )
+        graph = backend.execute_read_query(database_name, cypher, None)
        serialized = _serialize_graph(graph, provider_id)
        return _truncate_graph(serialized)

@@ -181,10 +196,11 @@ def execute_custom_query(


 def get_cartography_schema(
-    database_name: str, provider_id: str
+    database_name: str, provider_id: str, scan: AttackPathsScan
 ) -> dict[str, str] | None:
    try:
-        with graph_database.get_session(
+        backend = sink_module.get_backend_for_scan(scan)
+        with backend.get_session(
            database_name, default_access_mode=neo4j.READ_ACCESS
        ) as session:
            result = session.run(get_cartography_schema_query(provider_id))
@@ -1,18 +1,19 @@
-from typing import Optional, Tuple
+from math import isfinite
 from uuid import UUID

+from api.db_router import MainRouter
+from api.models import TenantAPIKey, TenantAPIKeyManager
 from cryptography.fernet import InvalidToken
+from django.core.exceptions import ObjectDoesNotExist
 from django.utils import timezone
 from drf_simple_apikey.backends import APIKeyAuthentication as BaseAPIKeyAuth
 from drf_simple_apikey.crypto import get_crypto
+from drf_simple_apikey.settings import package_settings
 from rest_framework.authentication import BaseAuthentication
 from rest_framework.exceptions import AuthenticationFailed
 from rest_framework.request import Request
 from rest_framework_simplejwt.authentication import JWTAuthentication

-from api.db_router import MainRouter
-from api.models import TenantAPIKey, TenantAPIKeyManager
-

 class TenantAPIKeyAuthentication(BaseAPIKeyAuth):
    model = TenantAPIKey
@@ -23,18 +24,49 @@ class TenantAPIKeyAuthentication(BaseAPIKeyAuth):
    def _authenticate_credentials(self, request, key):
        """
        Override to use admin connection, bypassing RLS during authentication.
-        Delegates to parent after temporarily routing model queries to admin DB.
        """
-        # Temporarily point the model's manager to admin database
-        original_objects = self.model.objects
-        self.model.objects = self.model.objects.using(MainRouter.admin_db)
+        try:
+            payload = self.key_crypto.decrypt(key)
+        except ValueError:
+            raise AuthenticationFailed("Invalid API Key.")
+
+        if not isinstance(payload, dict):
+            raise AuthenticationFailed("Invalid API Key.")
+
+        payload_pk = payload.get("_pk")
+        payload_exp = payload.get("_exp")
+        if (
+            not isinstance(payload_pk, str)
+            or isinstance(payload_exp, bool)
+            or not isinstance(payload_exp, (int, float))
+            or not isfinite(payload_exp)
+        ):
+            raise AuthenticationFailed("Invalid API Key.")

        try:
-            # Call parent method which will now use admin database
-            return super()._authenticate_credentials(request, key)
-        finally:
-            # Restore original manager
-            self.model.objects = original_objects
+            api_key_pk = UUID(payload_pk)
+        except ValueError:
+            raise AuthenticationFailed("Invalid API Key.")
+
+        if payload_exp < timezone.now().timestamp():
+            raise AuthenticationFailed("API Key has already expired.")
+
+        try:
+            api_key = self.model.objects.using(MainRouter.admin_db).get(id=api_key_pk)
+        except ObjectDoesNotExist:
+            raise AuthenticationFailed("No entity matching this api key.")
+
+        if api_key.revoked:
+            raise AuthenticationFailed("This API Key has been revoked.")
+
+        client_ip = request.META.get(package_settings.IP_ADDRESS_HEADER)
+        if api_key.blacklisted_ips and client_ip in api_key.blacklisted_ips:
+            raise AuthenticationFailed("Access denied from blacklisted IP.")
+
+        if api_key.whitelisted_ips and client_ip not in api_key.whitelisted_ips:
+            raise AuthenticationFailed("Access restricted to specific IP addresses.")
+
+        return api_key.entity, key

    def authenticate(self, request: Request):
        prefixed_key = self.get_key(request)
@@ -81,7 +113,7 @@ class CombinedJWTOrAPIKeyAuthentication(BaseAuthentication):
    jwt_auth = JWTAuthentication()
    api_key_auth = TenantAPIKeyAuthentication()

-    def authenticate(self, request: Request) -> Optional[Tuple[object, dict]]:
+    def authenticate(self, request: Request) -> tuple[object, dict] | None:
        auth_header = request.headers.get("Authorization", "")

        # Prioritize JWT authentication if both are present
@@ -93,3 +125,30 @@ class CombinedJWTOrAPIKeyAuthentication(BaseAuthentication):

        # Default fallback
        return self.jwt_auth.authenticate(request)
+
+
+class SSEAuthentication(CombinedJWTOrAPIKeyAuthentication):
+    """JWT/API-Key auth that also accepts `?access_token=<jwt>`.
+
+    Browser `EventSource` is the only widely available SSE client API
+    and it cannot set the `Authorization` header (its constructor takes
+    only a URL and `withCredentials`). To keep browser SSE clients on
+    the same auth stack as the rest of the API, SSE endpoints additionally
+    accept a JWT via the `?access_token=<jwt>` query parameter — the
+    standard parameter name defined in RFC 6750 Section 2.3 for bearer tokens.
+    """
+
+    def authenticate(self, request: Request):
+        auth_header = request.headers.get("Authorization", "")
+        if auth_header:
+            return super().authenticate(request)
+
+        raw_token = request.query_params.get("access_token")
+        if not raw_token:
+            # No header and no query token — let the default path raise
+            # the canonical AuthenticationFailed via the parent class.
+            return super().authenticate(request)
+
+        validated_token = self.jwt_auth.get_validated_token(raw_token)
+        user = self.jwt_auth.get_user(validated_token)
+        return user, validated_token
@@ -1,3 +1,9 @@
+from api.authentication import CombinedJWTOrAPIKeyAuthentication
+from api.db_router import MainRouter, reset_read_db_alias, set_read_db_alias
+from api.db_utils import POSTGRES_USER_VAR, rls_transaction
+from api.filters import CustomDjangoFilterBackend
+from api.models import Role, UserRoleRelationship
+from api.rbac.permissions import HasPermissions
 from django.conf import settings
 from django.db import transaction
 from rest_framework import permissions
@@ -8,13 +14,6 @@ from rest_framework.response import Response
 from rest_framework_json_api import filters
 from rest_framework_json_api.views import ModelViewSet

-from api.authentication import CombinedJWTOrAPIKeyAuthentication
-from api.db_router import MainRouter, reset_read_db_alias, set_read_db_alias
-from api.db_utils import POSTGRES_USER_VAR, rls_transaction
-from api.filters import CustomDjangoFilterBackend
-from api.models import Role, UserRoleRelationship
-from api.rbac.permissions import HasPermissions
-

 class BaseViewSet(ModelViewSet):
    authentication_classes = [CombinedJWTOrAPIKeyAuthentication]
@@ -112,14 +112,14 @@ def get_compliance_frameworks(provider_type: Provider.ProviderChoices) -> list[s
    """List compliance framework identifiers available for `provider_type`.

    Includes both per-provider frameworks and universal top-level frameworks
-    (e.g. ``dora``, ``csa_ccm_4.0``).
+    (e.g. ``dora_2022_2554``, ``csa_ccm_4.0``).

    Args:
        provider_type (Provider.ProviderChoices): The cloud provider type
            (e.g., "aws", "azure", "gcp", "m365").

    Returns:
-        list[str]: Framework identifiers (e.g., "cis_1.4_aws", "dora").
+        list[str]: Framework identifiers (e.g., "cis_1.4_aws", "dora_2022_2554").
    """
    global AVAILABLE_COMPLIANCE_FRAMEWORKS
    if provider_type not in AVAILABLE_COMPLIANCE_FRAMEWORKS:
@@ -352,7 +352,7 @@ def generate_compliance_overview_template(
                total_requirements += 1
                provider_check_list = list(requirement.checks.get(provider_type, []))
                total_checks = len(provider_check_list)
-                checks_dict = {check: None for check in provider_check_list}
+                checks_dict = dict.fromkeys(provider_check_list)

                req_status_val = "MANUAL" if total_checks == 0 else "PASS"

@@ -3,8 +3,14 @@ import secrets
 import time
 import uuid
 from contextlib import contextmanager
-from datetime import datetime, timedelta, timezone
+from datetime import UTC, datetime, timedelta

+from api.db_router import (
+    READ_REPLICA_ALIAS,
+    get_read_db_alias,
+    reset_read_db_alias,
+    set_read_db_alias,
+)
 from celery.utils.log import get_task_logger
 from config.env import env
 from django.conf import settings
@@ -22,13 +28,6 @@ from psycopg2 import sql as psycopg2_sql
 from psycopg2.extensions import AsIs, new_type, register_adapter, register_type
 from rest_framework_json_api.serializers import ValidationError

-from api.db_router import (
-    READ_REPLICA_ALIAS,
-    get_read_db_alias,
-    reset_read_db_alias,
-    set_read_db_alias,
-)
-
 logger = get_task_logger(__name__)

 DB_USER = settings.DATABASES["default"]["USER"] if not settings.TESTING else "test"
@@ -170,7 +169,7 @@ def one_week_from_now():
    """
    Return a datetime object with a date one week from now.
    """
-    return datetime.now(timezone.utc) + timedelta(days=7)
+    return datetime.now(UTC) + timedelta(days=7)


 def generate_random_token(length: int = 14, symbols: str | None = None) -> str:
@@ -405,10 +404,10 @@ def _should_create_index_on_partition(
            # Unknown month abbreviation, include it to be safe
            return True

-        partition_date = datetime(year, month, 1, tzinfo=timezone.utc)
+        partition_date = datetime(year, month, 1, tzinfo=UTC)

        # Get current month start
-        now = datetime.now(timezone.utc)
+        now = datetime.now(UTC)
        current_month_start = now.replace(
            day=1, hour=0, minute=0, second=0, microsecond=0
        )
@@ -1,14 +1,13 @@
 import uuid
 from functools import wraps

-from django.core.exceptions import ObjectDoesNotExist
-from django.db import DatabaseError, connection, transaction
-from rest_framework_json_api.serializers import ValidationError
-
 from api.db_router import READ_REPLICA_ALIAS
 from api.db_utils import POSTGRES_TENANT_VAR, SET_CONFIG_QUERY, rls_transaction
 from api.exceptions import ProviderDeletedException
 from api.models import Provider, Scan
+from django.core.exceptions import ObjectDoesNotExist
+from django.db import DatabaseError, connection, transaction
+from rest_framework_json_api.serializers import ValidationError


 def set_tenant(func=None, *, keep_tenant=False):
@@ -1,19 +1,4 @@
-from datetime import date, datetime, timedelta, timezone
-
-from dateutil.parser import parse
-from django.conf import settings
-from django.db.models import F, Q
-from django_filters.rest_framework import (
-    BaseInFilter,
-    BooleanFilter,
-    CharFilter,
-    ChoiceFilter,
-    DateFilter,
-    FilterSet,
-    UUIDFilter,
-)
-from rest_framework_json_api.django_filters.backends import DjangoFilterBackend
-from rest_framework_json_api.serializers import ValidationError
+from datetime import UTC, date, datetime, timedelta

 from api.constants import SEVERITY_ORDER
 from api.db_utils import (
@@ -68,6 +53,20 @@ from api.uuid_utils import (
    uuid7_start,
 )
 from api.v1.serializers import TaskBase
+from dateutil.parser import parse
+from django.conf import settings
+from django.db.models import F, Q
+from django_filters.rest_framework import (
+    BaseInFilter,
+    BooleanFilter,
+    CharFilter,
+    ChoiceFilter,
+    DateFilter,
+    FilterSet,
+    UUIDFilter,
+)
+from rest_framework_json_api.django_filters.backends import DjangoFilterBackend
+from rest_framework_json_api.serializers import ValidationError


 class CustomDjangoFilterBackend(DjangoFilterBackend):
@@ -102,7 +101,7 @@ class BaseProviderFilter(FilterSet):
    """
    Abstract base filter for models with direct FK to Provider.

-    Provides standard provider_id and provider_type filters.
+    Provides standard provider_id, provider_type, and provider_groups filters.
    Subclasses must define Meta.model.
    """

@@ -116,6 +115,16 @@ class BaseProviderFilter(FilterSet):
        choices=Provider.ProviderChoices.choices,
        lookup_expr="in",
    )
+    provider_groups = UUIDFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )

    class Meta:
        abstract = True
@@ -126,7 +135,7 @@ class BaseScanProviderFilter(FilterSet):
    """
    Abstract base filter for models with FK to Scan (and Scan has FK to Provider).

-    Provides standard provider_id and provider_type filters via scan relationship.
+    Provides standard provider_id, provider_type, and provider_groups filters via scan relationship.
    Subclasses must define Meta.model.
    """

@@ -140,6 +149,16 @@ class BaseScanProviderFilter(FilterSet):
        choices=Provider.ProviderChoices.choices,
        lookup_expr="in",
    )
+    provider_groups = UUIDFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )

    class Meta:
        abstract = True
@@ -160,6 +179,16 @@ class CommonFindingFilters(FilterSet):
    provider_type__in = ChoiceInFilter(
        choices=Provider.ProviderChoices.choices, field_name="scan__provider__provider"
    )
+    provider_groups = UUIDFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )
    provider_uid = CharFilter(field_name="scan__provider__uid", lookup_expr="exact")
    provider_uid__in = CharInFilter(field_name="scan__provider__uid", lookup_expr="in")
    provider_uid__icontains = CharFilter(
@@ -370,6 +399,12 @@ class ProviderFilter(FilterSet):
        choices=Provider.ProviderChoices.choices,
        lookup_expr="in",
    )
+    provider_groups = UUIDFilter(
+        field_name="provider_groups__id", lookup_expr="exact", distinct=True
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="provider_groups__id", lookup_expr="in", distinct=True
+    )

    class Meta:
        model = Provider
@@ -395,6 +430,16 @@ class ProviderRelationshipFilterSet(FilterSet):
    provider_type__in = ChoiceInFilter(
        choices=Provider.ProviderChoices.choices, field_name="provider__provider"
    )
+    provider_groups = UUIDFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )
    provider_uid = CharFilter(field_name="provider__uid", lookup_expr="exact")
    provider_uid__in = CharInFilter(field_name="provider__uid", lookup_expr="in")
    provider_uid__icontains = CharFilter(
@@ -552,12 +597,12 @@ class ResourceFilter(ProviderRelationshipFilterSet):
        gte_date = (
            parse(self.data.get("updated_at__gte")).date()
            if self.data.get("updated_at__gte")
-            else datetime.now(timezone.utc).date()
+            else datetime.now(UTC).date()
        )
        lte_date = (
            parse(self.data.get("updated_at__lte")).date()
            if self.data.get("updated_at__lte")
-            else datetime.now(timezone.utc).date()
+            else datetime.now(UTC).date()
        )

        if abs(lte_date - gte_date) > timedelta(
@@ -702,9 +747,9 @@ class FindingFilter(CommonFindingFilters):
        lte_date = cleaned.get("inserted_at__lte") or exact_date

        if gte_date is None:
-            gte_date = datetime.now(timezone.utc).date()
+            gte_date = datetime.now(UTC).date()
        if lte_date is None:
-            lte_date = datetime.now(timezone.utc).date()
+            lte_date = datetime.now(UTC).date()

        if abs(lte_date - gte_date) > timedelta(
            days=settings.FINDINGS_MAX_DAYS_IN_RANGE
@@ -798,7 +843,7 @@ class FindingFilter(CommonFindingFilters):
    def maybe_date_to_datetime(value):
        dt = value
        if isinstance(value, date):
-            dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
+            dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
        return dt


@@ -887,9 +932,9 @@ class FindingGroupFilter(CommonFindingFilters):
        lte_date = cleaned.get("inserted_at__lte") or exact_date

        if gte_date is None:
-            gte_date = datetime.now(timezone.utc).date()
+            gte_date = datetime.now(UTC).date()
        if lte_date is None:
-            lte_date = datetime.now(timezone.utc).date()
+            lte_date = datetime.now(UTC).date()

        if abs(lte_date - gte_date) > timedelta(
            days=settings.FINDINGS_MAX_DAYS_IN_RANGE
@@ -931,7 +976,7 @@ class FindingGroupFilter(CommonFindingFilters):
        """Convert date to datetime if needed."""
        dt = value
        if isinstance(value, date):
-            dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
+            dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
        return dt


@@ -1001,6 +1046,16 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
        field_name="provider__provider", choices=Provider.ProviderChoices.choices
    )
    provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
+    provider_groups = UUIDFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )

    class Meta:
        model = FindingGroupDailySummary
@@ -1035,9 +1090,9 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
        lte_date = cleaned.get("inserted_at__lte") or exact_date

        if gte_date is None:
-            gte_date = datetime.now(timezone.utc).date()
+            gte_date = datetime.now(UTC).date()
        if lte_date is None:
-            lte_date = datetime.now(timezone.utc).date()
+            lte_date = datetime.now(UTC).date()

        if abs(lte_date - gte_date) > timedelta(
            days=settings.FINDINGS_MAX_DAYS_IN_RANGE
@@ -1076,7 +1131,7 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
    def _maybe_date_to_datetime(value):
        dt = value
        if isinstance(value, date):
-            dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
+            dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
        return dt


@@ -1101,6 +1156,16 @@ class LatestFindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
        field_name="provider__provider", choices=Provider.ProviderChoices.choices
    )
    provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
+    provider_groups = UUIDFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )

    class Meta:
        model = FindingGroupDailySummary
@@ -1280,12 +1345,19 @@ class RoleFilter(FilterSet):
        }


-class ComplianceOverviewFilter(FilterSet):
+class ComplianceOverviewFilter(BaseScanProviderFilter):
+    """
+    Keep provider filters in the schema while runtime filtering resolves scans first.
+
+    Compliance overview provider filters are applied to the latest completed scans
+    in the viewset, then this filterset handles the remaining compliance fields.
+    """
+
    inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
-    scan_id = UUIDFilter(field_name="scan_id", required=True)
+    scan_id = UUIDFilter(field_name="scan_id")
    region = CharFilter(field_name="region")

-    class Meta:
+    class Meta(BaseScanProviderFilter.Meta):
        model = ComplianceRequirementOverview
        fields = {
            "inserted_at": ["date", "gte", "lte"],
@@ -1306,6 +1378,16 @@ class ScanSummaryFilter(FilterSet):
    provider_type__in = ChoiceInFilter(
        field_name="scan__provider__provider", choices=Provider.ProviderChoices.choices
    )
+    provider_groups = UUIDFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )
    region = CharFilter(field_name="region")

    class Meta:
@@ -1329,6 +1411,16 @@ class DailySeveritySummaryFilter(FilterSet):
    provider_type__in = ChoiceInFilter(
        field_name="provider__provider", choices=Provider.ProviderChoices.choices
    )
+    provider_groups = UUIDFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )
    date_from = DateFilter(method="filter_noop")
    date_to = DateFilter(method="filter_noop")

@@ -1585,6 +1677,16 @@ class ThreatScoreSnapshotFilter(FilterSet):
        choices=Provider.ProviderChoices.choices,
        lookup_expr="in",
    )
+    provider_groups = UUIDFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )
    compliance_id = CharFilter(field_name="compliance_id", lookup_expr="exact")
    compliance_id__in = CharInFilter(field_name="compliance_id", lookup_expr="in")

@@ -1628,6 +1730,16 @@ class ResourceGroupOverviewFilter(FilterSet):
        choices=Provider.ProviderChoices.choices,
        lookup_expr="in",
    )
+    provider_groups = UUIDFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="exact",
+        distinct=True,
+    )
+    provider_groups__in = UUIDInFilter(
+        field_name="scan__provider__provider_groups__id",
+        lookup_expr="in",
+        distinct=True,
+    )
    resource_group = CharFilter(field_name="resource_group", lookup_expr="exact")
    resource_group__in = CharInFilter(field_name="resource_group", lookup_expr="in")

@@ -2,8 +2,9 @@
 Format (draft-inadarei-api-health-check-06).

 Liveness reports only process status. Readiness verifies that PostgreSQL,
-Valkey and Neo4j are reachable and returns per-dependency detail when any
-of them is unreachable.
+Valkey and the attack-paths graph store (Neo4j or Neptune, per
+``ATTACK_PATHS_SINK_DATABASE``) are reachable and returns per-dependency
+detail when any of them is unreachable.
 """

 from __future__ import annotations
@@ -11,8 +12,10 @@ from __future__ import annotations
 import logging
 import threading
 import time
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import TimeoutError as FuturesTimeoutError
 from contextlib import suppress
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from typing import Any

 import redis
@@ -37,9 +40,28 @@ STATUS_FAIL = "fail"
 STATUS_WARN = "warn"

 # Short socket timeout so a stuck Valkey cannot stall the probe.
-# Neo4j inherits its driver-level ``connection_acquisition_timeout``.
 VALKEY_PROBE_TIMEOUT_SECONDS = 2

+# Probe-scoped budget for the graph database.
+# ``Driver.verify_connectivity()`` takes no timeout; its only bound is the
+# driver-level ``connection_acquisition_timeout`` (60s on Neptune). The
+# probe needs its own budget, independent of the workload driver, so a
+# graph-database outage cannot pin a worker thread (and the readiness lock)
+# for a minute.
+GRAPH_DB_PROBE_TIMEOUT_SECONDS = 5
+
+# Bounded pool that enforces ``GRAPH_DB_PROBE_TIMEOUT_SECONDS``. If the
+# graph database is unreachable the probe call blocks until the driver's
+# own acquisition timeout fires; we abandon the future after the budget and
+# report ``fail``. Orphaned tasks are capped by ``max_workers`` plus the 3s
+# readiness cache plus the per-IP throttle, so they cannot pile up: worst
+# case during a graph-database outage is every readiness call failing fast
+# in ``GRAPH_DB_PROBE_TIMEOUT_SECONDS`` with at most 2 background threads
+# stuck for <= the driver acquisition timeout.
+_graph_db_probe_executor = ThreadPoolExecutor(
+    max_workers=2, thread_name_prefix="health-graph-db-probe"
+)
+
 # Brief cache window so high-frequency probes (ALB target groups, scrapers)
 # do not stampede the actual dependency checks.
 CACHE_CONTROL_HEADER = "max-age=3, must-revalidate"
@@ -62,11 +84,7 @@ class HealthJSONRenderer(JSONRenderer):


 def _now_iso() -> str:
-    return (
-        datetime.now(timezone.utc)
-        .isoformat(timespec="milliseconds")
-        .replace("+00:00", "Z")
-    )
+    return datetime.now(UTC).isoformat(timespec="milliseconds").replace("+00:00", "Z")


 def _measure(name: str, check_fn) -> tuple[dict[str, Any], float]:
@@ -113,11 +131,24 @@ def _probe_valkey() -> None:
            client.close()


-def _probe_neo4j() -> None:
-    # Lazy import: avoids pulling attack_paths into the boot import graph.
-    from api.attack_paths.database import get_driver
+def _graph_db_component_id() -> str:
+    """Return the active graph database name for the ``componentId`` field."""
+    return settings.ATTACK_PATHS_SINK_DATABASE.strip().lower()

-    get_driver().verify_connectivity()
+
+def _probe_graph_db() -> None:
+    # Lazy import: avoids pulling attack_paths into the boot import graph
+    from api.attack_paths.database import verify_connectivity
+
+    future = _graph_db_probe_executor.submit(verify_connectivity)
+    try:
+        future.result(timeout=GRAPH_DB_PROBE_TIMEOUT_SECONDS)
+    except FuturesTimeoutError as exc:
+        # Do not wait for the abandoned task; it ends when the driver's own acquisition timeout fires
+        future.cancel()
+        raise TimeoutError(
+            f"graph-db probe exceeded {GRAPH_DB_PROBE_TIMEOUT_SECONDS}s"
+        ) from exc


 def _build_check_entry(
@@ -180,14 +211,18 @@ def _readiness_payload() -> tuple[dict[str, Any], int]:
        ):
            return snapshot[1], snapshot[2]

+        graph_db_component_id = _graph_db_component_id()
+
        postgres_result, postgres_ms = _measure("postgres", _probe_postgres)
        valkey_result, valkey_ms = _measure("valkey", _probe_valkey)
-        neo4j_result, neo4j_ms = _measure("neo4j", _probe_neo4j)
+        graph_db_result, graph_db_ms = _measure(graph_db_component_id, _probe_graph_db)

        entries = [
            _build_check_entry("postgres", "datastore", postgres_result, postgres_ms),
            _build_check_entry("valkey", "datastore", valkey_result, valkey_ms),
-            _build_check_entry("neo4j", "datastore", neo4j_result, neo4j_ms),
+            _build_check_entry(
+                graph_db_component_id, "datastore", graph_db_result, graph_db_ms
+            ),
        ]
        overall = _aggregate_status(entries)

@@ -195,7 +230,7 @@ def _readiness_payload() -> tuple[dict[str, Any], int]:
        payload["checks"] = {
            "postgres:responseTime": [entries[0]],
            "valkey:responseTime": [entries[1]],
-            "neo4j:responseTime": [entries[2]],
+            "graphdb:responseTime": [entries[2]],
        }

        http_status = (
@@ -237,10 +272,10 @@ class LivenessView(APIView):
 class ReadinessView(APIView):
    """Readiness probe.

-    Returns 200 when PostgreSQL, Valkey and Neo4j all respond, or 503 with
-    per-dependency detail when any of them is unreachable. Per-IP throttle
-    plus the short in-process result cache cap the real dependency hits
-    regardless of inbound traffic shape.
+    Returns 200 when PostgreSQL, Valkey and the attack-paths graph store
+    all respond, or 503 with per-dependency detail when any of them is
+    unreachable. Per-IP throttle plus the short in-process result cache cap
+    the real dependency hits regardless of inbound traffic shape.
    """

    authentication_classes: list = []
@@ -1,11 +1,8 @@
 import random
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from math import ceil
 from uuid import uuid4

-from django.core.management.base import BaseCommand
-from tqdm import tqdm
-
 from api.db_utils import rls_transaction
 from api.models import (
    Finding,
@@ -16,7 +13,9 @@ from api.models import (
    Scan,
    StatusChoices,
 )
+from django.core.management.base import BaseCommand
 from prowler.lib.check.models import CheckMetadata
+from tqdm import tqdm


 class Command(BaseCommand):
@@ -116,7 +115,7 @@ class Command(BaseCommand):
                trigger="manual",
                state="executing",
                progress=0,
-                started_at=datetime.now(timezone.utc),
+                started_at=datetime.now(UTC),
            )
        scan_state = "completed"

@@ -272,10 +271,8 @@ class Command(BaseCommand):
            self.stdout.write(self.style.ERROR(f"Failed to populate test data: {e}"))
            scan_state = "failed"
        finally:
-            scan.completed_at = datetime.now(timezone.utc)
-            scan.duration = int(
-                (datetime.now(timezone.utc) - scan.started_at).total_seconds()
-            )
+            scan.completed_at = datetime.now(UTC)
+            scan.duration = int((datetime.now(UTC) - scan.started_at).total_seconds())
            scan.progress = 100
            scan.state = scan_state
            scan.unique_resource_count = num_resources
@@ -1,5 +1,4 @@
 from django.core.management.base import BaseCommand
-
 from tasks.jobs.orphan_recovery import reconcile_orphans


@@ -2,6 +2,31 @@ import logging
 import time

 from config.custom_logging import BackendLogger
+from django.core.handlers.asgi import ASGIRequest
+from django.db import connections
+
+
+class CloseDBConnectionsMiddleware:
+    """
+    Close request-scoped DB connections at the end of each ASGI request.
+
+    Under the ASGI worker, connections opened by sync views are not released
+    by Django's normal request-boundary cleanup, so they accumulate idle until
+    Postgres runs out of slots. Only ASGI requests are handled; the sync WSGI
+    test client manages its own connections and must be left alone.
+    """
+
+    def __init__(self, get_response):
+        self.get_response = get_response
+
+    def __call__(self, request):
+        try:
+            return self.get_response(request)
+        finally:
+            if isinstance(request, ASGIRequest):
+                for conn in connections.all(initialized_only=True):
+                    if not conn.in_atomic_block:
+                        conn.close_if_unusable_or_obsolete()


 def extract_auth_info(request) -> dict:
@@ -1,26 +1,13 @@
 import uuid
 from functools import partial

+import api.rls
 import django.contrib.auth.models
 import django.contrib.postgres.indexes
 import django.contrib.postgres.search
 import django.core.validators
 import django.db.models.deletion
 import django.utils.timezone
-from django.conf import settings
-from django.db import migrations, models
-from psqlextra.backend.migrations.operations.add_default_partition import (
-    PostgresAddDefaultPartition,
-)
-from psqlextra.backend.migrations.operations.create_partitioned_model import (
-    PostgresCreatePartitionedModel,
-)
-from psqlextra.manager.manager import PostgresManager
-from psqlextra.models.partitioned import PostgresPartitionedModel
-from psqlextra.types import PostgresPartitioningMethod
-from uuid6 import uuid7
-
-import api.rls
 from api.db_utils import (
    DB_PROWLER_PASSWORD,
    DB_PROWLER_USER,
@@ -53,6 +40,18 @@ from api.models import (
    StateChoices,
    StatusChoices,
 )
+from django.conf import settings
+from django.db import migrations, models
+from psqlextra.backend.migrations.operations.add_default_partition import (
+    PostgresAddDefaultPartition,
+)
+from psqlextra.backend.migrations.operations.create_partitioned_model import (
+    PostgresCreatePartitionedModel,
+)
+from psqlextra.manager.manager import PostgresManager
+from psqlextra.models.partitioned import PostgresPartitionedModel
+from psqlextra.types import PostgresPartitioningMethod
+from uuid6 import uuid7

 DB_NAME = settings.DATABASES["default"]["NAME"]

@@ -1,8 +1,7 @@
+from api.db_utils import DB_PROWLER_USER
 from django.conf import settings
 from django.db import migrations

-from api.db_utils import DB_PROWLER_USER
-
 DB_NAME = settings.DATABASES["default"]["NAME"]


@@ -2,12 +2,11 @@

 import uuid

+import api.rls
 import django.db.models.deletion
 from django.conf import settings
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -1,6 +1,5 @@
-from django.db import migrations
-
 from api.db_router import MainRouter
+from django.db import migrations


 def create_admin_role(apps, schema_editor):
@@ -1,12 +1,11 @@
 import json
-from datetime import datetime, timedelta, timezone
+from datetime import UTC, datetime, timedelta

 import django.db.models.deletion
-from django.db import migrations, models
-from django_celery_beat.models import PeriodicTask
-
 from api.db_utils import rls_transaction
 from api.models import Scan, StateChoices
+from django.db import migrations, models
+from django_celery_beat.models import PeriodicTask


 def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
@@ -17,11 +16,11 @@ def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
        tenant_id = task_kwargs["tenant_id"]
        provider_id = task_kwargs["provider_id"]

-        current_time = datetime.now(timezone.utc)
+        current_time = datetime.now(UTC)
        scheduled_time_today = datetime.combine(
            current_time.date(),
            daily_scheduled_scan_task.start_time.time(),
-            tzinfo=timezone.utc,
+            tzinfo=UTC,
        )

        if current_time < scheduled_time_today:
@@ -2,10 +2,9 @@

 from functools import partial

-from django.db import migrations
-
 from api.db_utils import IntegrationTypeEnum, PostgresEnumMigration, register_enum
 from api.models import Integration
+from django.db import migrations

 IntegrationTypeEnumMigration = PostgresEnumMigration(
    enum_name="integration_type",
@@ -2,12 +2,11 @@

 import uuid

-import django.db.models.deletion
-from django.db import migrations, models
-
 import api.db_utils
 import api.rls
+import django.db.models.deletion
 from api.rls import RowLevelSecurityConstraint
+from django.db import migrations, models


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 # Generated by Django 5.1.5 on 2025-03-25 11:29

-from django.db import migrations, models
-
 import api.db_utils
+from django.db import migrations, models


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 # Generated by Django 5.1.7 on 2025-04-16 08:47

-from django.db import migrations
-
 import api.db_utils
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -2,12 +2,11 @@

 import uuid

+import api.rls
 import django.db.models.deletion
 import uuid6
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -1,8 +1,7 @@
 from functools import partial

-from django.db import migrations
-
 from api.db_utils import create_index_on_partitions, drop_index_on_partitions
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 from functools import partial

-from django.db import migrations
-
 from api.db_utils import create_index_on_partitions, drop_index_on_partitions
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -2,12 +2,11 @@

 import uuid

-import django.db.models.deletion
-from django.db import migrations, models
-
 import api.db_utils
 import api.rls
+import django.db.models.deletion
 from api.rls import RowLevelSecurityConstraint
+from django.db import migrations, models


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 from functools import partial

-from django.db import migrations
-
 from api.db_utils import create_index_on_partitions, drop_index_on_partitions
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -2,12 +2,11 @@

 import uuid

+import api.rls
 import django.core.validators
 import django.db.models.deletion
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -2,13 +2,12 @@

 import uuid

+import api.db_utils
+import api.rls
 import django.db.models.deletion
 from django.conf import settings
 from django.db import migrations, models

-import api.db_utils
-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -2,10 +2,9 @@

 from functools import partial

-from django.db import migrations
-
 from api.db_utils import PostgresEnumMigration, ProcessorTypeEnum, register_enum
 from api.models import Processor
+from django.db import migrations

 ProcessorTypeEnumMigration = PostgresEnumMigration(
    enum_name="processor_type",
@@ -2,12 +2,11 @@

 import uuid

-import django.db.models.deletion
-from django.db import migrations, models
-
 import api.db_utils
 import api.rls
+import django.db.models.deletion
 from api.rls import RowLevelSecurityConstraint
+from django.db import migrations, models


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 from functools import partial

-from django.db import migrations
-
 from api.db_utils import create_index_on_partitions, drop_index_on_partitions
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 from functools import partial

-from django.db import migrations
-
 from api.db_utils import create_index_on_partitions, drop_index_on_partitions
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 # Generated by Django 5.1.7 on 2025-07-09 14:44

-from django.db import migrations
-
 import api.db_utils
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -2,15 +2,14 @@

 import uuid

+import api.db_utils
+import api.rls
 import django.core.validators
 import django.db.models.deletion
 import drf_simple_apikey.models
 from django.conf import settings
 from django.db import migrations, models

-import api.db_utils
-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -4,15 +4,14 @@ import json
 import logging
 import uuid

+import api.rls
 import django.db.models.deletion
+from api.db_router import MainRouter
 from config.custom_logging import BackendLogger
 from cryptography.fernet import Fernet
 from django.conf import settings
 from django.db import migrations, models

-import api.rls
-from api.db_router import MainRouter
-
 logger = logging.getLogger(BackendLogger.API)


@@ -1,8 +1,7 @@
 # Generated by Django 5.1.7 on 2025-10-14 00:00

-from django.db import migrations
-
 import api.db_utils
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -2,14 +2,13 @@

 import uuid

+import api.rls
 import django.contrib.postgres.fields
 import django.core.validators
 import django.db.models.deletion
 from django.conf import settings
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -1,8 +1,7 @@
 # Generated by Django 5.1.10 on 2025-09-09 09:25

-from django.db import migrations
-
 import api.db_utils
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 # Generated by Django 5.1.13 on 2025-11-05 08:37

-from django.db import migrations
-
 import api.db_utils
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -2,11 +2,10 @@

 import uuid

+import api.rls
 import django.db.models.deletion
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -2,11 +2,10 @@

 import uuid

+import api.rls
 import django.db.models.deletion
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -2,11 +2,10 @@

 import uuid

+import api.rls
 import django.db.models.deletion
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -2,11 +2,10 @@

 import uuid

+import api.rls
 import django.db.models.deletion
 from django.db import migrations, models

-import api.rls
-

 class Migration(migrations.Migration):
    dependencies = [
@@ -1,10 +1,9 @@
 # Generated by Django 5.1.14 on 2025-12-10

-from django.db import migrations
-from tasks.tasks import backfill_daily_severity_summaries_task
-
 from api.db_router import MainRouter
 from api.rls import Tenant
+from django.db import migrations
+from tasks.tasks import backfill_daily_severity_summaries_task


 def trigger_backfill_task(apps, schema_editor):
@@ -1,10 +1,9 @@
 import uuid

-import django.db.models.deletion
-from django.db import migrations, models
-
 import api.db_utils
 import api.rls
+import django.db.models.deletion
+from django.db import migrations, models


 class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
 # Generated by Django migration for Alibaba Cloud provider support

-from django.db import migrations
-
 import api.db_utils
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -1,10 +1,9 @@
 import uuid

-import django.db.models.deletion
-from django.db import migrations, models
-
 import api.db_utils
 import api.rls
+import django.db.models.deletion
+from django.db import migrations, models


 class Migration(migrations.Migration):
--- a/Show More
+++ b/Show More