Merge branch 'master' of github.com:prowler-cloud/prowler into pr11569-coderabbit-fix

This commit is contained in:
Hugo P.Brito
2026-06-30 16:05:15 +01:00
1245 changed files with 106147 additions and 12120 deletions
+17 -5
View File
@@ -6,14 +6,20 @@
PROWLER_UI_VERSION="stable"
AUTH_URL=http://localhost:3000
API_BASE_URL=http://prowler-api:8080/api/v1
# deprecated, use UI_API_BASE_URL
NEXT_PUBLIC_API_BASE_URL=${API_BASE_URL}
UI_API_BASE_URL=${API_BASE_URL}
# deprecated, use UI_API_DOCS_URL
NEXT_PUBLIC_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
UI_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
AUTH_TRUST_HOST=true
UI_PORT=3000
# openssl rand -base64 32
AUTH_SECRET="N/c6mnaS5+SWq81+819OrzQZlmx1Vxtp/orjttJSmw8="
# Google Tag Manager ID
# Google Tag Manager ID (empty/unset ⇒ GTM not loaded, zero egress)
# deprecated, use UI_GOOGLE_TAG_MANAGER_ID
NEXT_PUBLIC_GOOGLE_TAG_MANAGER_ID=""
UI_GOOGLE_TAG_MANAGER_ID=""
#### MCP Server ####
PROWLER_MCP_VERSION=stable
@@ -139,13 +145,19 @@ DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
DJANGO_SENTRY_DSN=
DJANGO_THROTTLE_TOKEN_OBTAIN=50/minute
# Sentry settings
SENTRY_ENVIRONMENT=local
# Sentry for the web app (server + browser). Empty/unset UI_SENTRY_DSN ⇒
# Sentry disabled, zero egress. SENTRY_RELEASE (unprefixed) feeds the web app's
# server/edge SDKs.
UI_SENTRY_DSN=
UI_SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT}
# Reserved runtime public config (registered now; no UI consumer yet)
# POSTHOG_KEY=
# POSTHOG_HOST=
# REO_DEV_CLIENT_ID=
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.31.0
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.32.0
# Social login credentials
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
-1
View File
@@ -1,7 +1,6 @@
# SDK
/* @prowler-cloud/detection-remediation
/prowler/ @prowler-cloud/detection-remediation
/prowler/compliance/ @prowler-cloud/compliance
/tests/ @prowler-cloud/detection-remediation
/dashboard/ @prowler-cloud/detection-remediation
/docs/ @prowler-cloud/detection-remediation
+3 -3
View File
@@ -1,5 +1,5 @@
name: 'OSV-Scanner'
description: 'Install osv-scanner and scan a lockfile, failing on HIGH/CRITICAL/UNKNOWN severity findings. Posts/updates a PR comment with findings on pull_request events (requires pull-requests: write).'
description: 'Install osv-scanner and scan a lockfile, failing on CRITICAL severity findings. Posts/updates a PR comment with findings on pull_request events (requires pull-requests: write).'
author: 'Prowler'
inputs:
@@ -7,9 +7,9 @@ inputs:
description: 'Path to the lockfile to scan, relative to the repository root (e.g. uv.lock, api/uv.lock, ui/pnpm-lock.yaml).'
required: true
severity-levels:
description: 'Comma-separated severity levels that fail the scan. Default: HIGH,CRITICAL,UNKNOWN.'
description: 'Comma-separated severity levels that fail the scan. Default: CRITICAL.'
required: false
default: 'HIGH,CRITICAL,UNKNOWN'
default: 'CRITICAL'
version:
description: 'osv-scanner release tag to install. When overriding, you MUST also override binary-sha256.'
required: false
+20 -2
View File
@@ -43,8 +43,17 @@ runs:
if: github.repository_owner == 'prowler-cloud' && github.repository != 'prowler-cloud/prowler'
shell: bash
working-directory: ${{ inputs.working-directory }}
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
LATEST_COMMIT=$(curl -s "https://api.github.com/repos/prowler-cloud/prowler/commits/master" | jq -r '.sha')
LATEST_COMMIT=$(curl -sf --retry 3 --retry-all-errors --retry-delay 2 --retry-max-time 60 \
-H "Authorization: Bearer ${GITHUB_TOKEN}" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/prowler-cloud/prowler/commits/master" \
| jq -er '.sha') || {
echo "::error::Failed to fetch latest prowler/master commit from the GitHub API (HTTP error or missing .sha). Check the GITHUB_TOKEN and API rate limits."
exit 1
}
echo "Latest commit hash: $LATEST_COMMIT"
sed -i "s|\(git = \"https://github\.com/prowler-cloud/prowler\.git?rev=master\)#[a-f0-9]\{40\}\"|\1#${LATEST_COMMIT}\"|g" uv.lock
echo "Updated uv.lock entry:"
@@ -54,8 +63,17 @@ runs:
if: github.event_name == 'push' && github.ref == 'refs/heads/master' && github.repository == 'prowler-cloud/prowler'
shell: bash
working-directory: ${{ inputs.working-directory }}
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
LATEST_COMMIT=$(curl -s "https://api.github.com/repos/prowler-cloud/prowler/commits/master" | jq -r '.sha')
LATEST_COMMIT=$(curl -sf --retry 3 --retry-all-errors --retry-delay 2 --retry-max-time 60 \
-H "Authorization: Bearer ${GITHUB_TOKEN}" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/prowler-cloud/prowler/commits/master" \
| jq -er '.sha') || {
echo "::error::Failed to fetch latest prowler/master commit from the GitHub API (HTTP error or missing .sha). Check the GITHUB_TOKEN and API rate limits."
exit 1
}
echo "Latest commit hash: $LATEST_COMMIT"
sed -i "s|\(git = \"https://github\.com/prowler-cloud/prowler\.git?rev=master\)#[a-f0-9]\{40\}\"|\1#${LATEST_COMMIT}\"|g" uv.lock
echo "Updated uv.lock entry:"
+2 -2
View File
@@ -63,7 +63,7 @@ runs:
exit-code: '0'
scanners: 'vuln'
timeout: '5m'
version: 'v0.69.2'
version: 'v0.71.2'
- name: Run Trivy vulnerability scan (SARIF)
if: inputs.upload-sarif == 'true' && github.event_name == 'push'
@@ -76,7 +76,7 @@ runs:
exit-code: '0'
scanners: 'vuln'
timeout: '5m'
version: 'v0.69.2'
version: 'v0.71.2'
- name: Upload Trivy results to GitHub Security tab
if: inputs.upload-sarif == 'true' && github.event_name == 'push'
+5
View File
@@ -77,6 +77,11 @@ provider/okta:
- any-glob-to-any-file: "prowler/providers/okta/**"
- any-glob-to-any-file: "tests/providers/okta/**"
provider/linode:
- changed-files:
- any-glob-to-any-file: "prowler/providers/linode/**"
- any-glob-to-any-file: "tests/providers/linode/**"
github_actions:
- changed-files:
- any-glob-to-any-file: ".github/workflows/*"
+2 -3
View File
@@ -6,8 +6,7 @@
# - .github/workflows/api-security.yml, sdk-security.yml, ui-security.yml
#
# Severity levels (comma-separated) are read from OSV_SEVERITY_LEVELS.
# Default: HIGH,CRITICAL,UNKNOWN — preserves prior .safety-policy.yml policy
# (ignore-cvss-severity-below: 7 + ignore-cvss-unknown-severity: False).
# Default: CRITICAL — only CVSS >= 9.0 findings fail the scan.
# osv-scanner has no native CVSS threshold (google/osv-scanner#1400, closed
# not-planned). Severity is derived from $group.max_severity (numeric CVSS
# score string) which osv-scanner emits per group.
@@ -33,7 +32,7 @@ set -euo pipefail
ROOT="$(git rev-parse --show-toplevel)"
CONFIG="${ROOT}/osv-scanner.toml"
SEVERITY_LEVELS="${OSV_SEVERITY_LEVELS:-HIGH,CRITICAL,UNKNOWN}"
SEVERITY_LEVELS="${OSV_SEVERITY_LEVELS:-CRITICAL}"
for bin in osv-scanner jq; do
if ! command -v "${bin}" >/dev/null 2>&1; then
@@ -272,27 +272,3 @@ jobs:
payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
step-outcome: ${{ steps.outcome.outputs.outcome }}
update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
trigger-deployment:
needs: [setup, container-build-push]
if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
with:
egress-policy: block
allowed-endpoints: >
api.github.com:443
- name: Trigger API deployment
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: api-prowler-deployment
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
+1 -4
View File
@@ -12,9 +12,6 @@ on:
branches:
- 'master'
- 'v5.*'
paths:
- 'api/**'
- '.github/workflows/api-container-checks.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -134,5 +131,5 @@ jobs:
with:
image-name: ${{ env.IMAGE_NAME }}
image-tag: ${{ github.sha }}
fail-on-critical: 'false'
fail-on-critical: 'true'
severity: 'CRITICAL'
-7
View File
@@ -16,13 +16,6 @@ on:
branches:
- "master"
- "v5.*"
paths:
- 'api/**'
- '.github/workflows/api-tests.yml'
- '.github/workflows/api-security.yml'
- '.github/actions/setup-python-uv/**'
- '.github/actions/osv-scanner/**'
- '.github/scripts/osv-scan.sh'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
+5 -4
View File
@@ -29,10 +29,11 @@ jobs:
with:
# We can't block as Trufflehog needs to verify secrets against vendors
egress-policy: audit
# allowed-endpoints: >
# github.com:443
# ghcr.io:443
# pkg-containers.githubusercontent.com:443
allowed-endpoints: >
github.com:443
ghcr.io:443
pkg-containers.githubusercontent.com:443
www.formbucket.com:443
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -263,27 +263,3 @@ jobs:
payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
step-outcome: ${{ steps.outcome.outputs.outcome }}
update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
trigger-deployment:
needs: [setup, container-build-push]
if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
with:
egress-policy: block
allowed-endpoints: >
api.github.com:443
- name: Trigger MCP deployment
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: mcp-prowler-deployment
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
+1 -4
View File
@@ -12,9 +12,6 @@ on:
branches:
- 'master'
- 'v5.*'
paths:
- 'mcp_server/**'
- '.github/workflows/mcp-container-checks.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -127,5 +124,5 @@ jobs:
with:
image-name: ${{ env.IMAGE_NAME }}
image-tag: ${{ github.sha }}
fail-on-critical: 'false'
fail-on-critical: 'true'
severity: 'CRITICAL'
-7
View File
@@ -15,12 +15,6 @@ on:
branches:
- 'master'
- 'v5.*'
paths:
- 'mcp_server/pyproject.toml'
- 'mcp_server/uv.lock'
- '.github/workflows/mcp-security.yml'
- '.github/actions/osv-scanner/**'
- '.github/scripts/osv-scan.sh'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -30,7 +24,6 @@ permissions: {}
jobs:
mcp-security-scans:
if: github.repository == 'prowler-cloud/prowler'
runs-on: ubuntu-latest
timeout-minutes: 15
permissions:
+1
View File
@@ -29,6 +29,7 @@ jobs:
- '3.10'
- '3.11'
- '3.12'
- '3.13'
steps:
- name: Harden Runner
+7 -24
View File
@@ -15,12 +15,6 @@ on:
branches:
- 'master'
- 'v5.*'
paths:
- 'prowler/**'
- 'Dockerfile*'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/sdk-container-checks.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -111,25 +105,14 @@ jobs:
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files: ./**
files: |
prowler/**
Dockerfile*
pyproject.toml
uv.lock
.github/workflows/sdk-container-checks.yml
files_ignore: |
.github/**
prowler/CHANGELOG.md
docs/**
permissions/**
api/**
ui/**
dashboard/**
mcp_server/**
skills/**
README.md
mkdocs.yml
.backportrc.json
.env
docker-compose*
examples/**
.gitignore
contrib/**
**/AGENTS.md
- name: Set up Docker Buildx
@@ -153,5 +136,5 @@ jobs:
with:
image-name: ${{ env.IMAGE_NAME }}
image-tag: ${{ github.sha }}
fail-on-critical: 'false'
fail-on-critical: 'true'
severity: 'CRITICAL'
+9 -28
View File
@@ -19,16 +19,6 @@ on:
branches:
- 'master'
- 'v5.*'
paths:
- 'prowler/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/sdk-tests.yml'
- '.github/workflows/sdk-security.yml'
- '.github/actions/setup-python-uv/**'
- '.github/actions/osv-scanner/**'
- '.github/scripts/osv-scan.sh'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -71,27 +61,18 @@ jobs:
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files:
./**
files: |
prowler/**
tests/**
pyproject.toml
uv.lock
.github/workflows/sdk-tests.yml
.github/workflows/sdk-security.yml
.github/actions/setup-python-uv/**
.github/actions/osv-scanner/**
.github/scripts/osv-scan.sh
files_ignore: |
.github/**
prowler/CHANGELOG.md
docs/**
permissions/**
api/**
ui/**
dashboard/**
mcp_server/**
skills/**
README.md
mkdocs.yml
.backportrc.json
.env
docker-compose*
examples/**
.gitignore
contrib/**
**/AGENTS.md
- name: Setup Python with uv
+29 -4
View File
@@ -29,6 +29,7 @@ jobs:
- '3.10'
- '3.11'
- '3.12'
- '3.13'
steps:
- name: Harden Runner
@@ -540,7 +541,7 @@ jobs:
with:
flags: prowler-py${{ matrix.python-version }}-vercel
files: ./vercel_coverage.xml
# Scaleway Provider
- name: Check if Scaleway files changed
if: steps.check-changes.outputs.any_changed == 'true'
@@ -588,7 +589,31 @@ jobs:
with:
flags: prowler-py${{ matrix.python-version }}-stackit
files: ./stackit_coverage.xml
# Linode Provider
- name: Check if Linode files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-linode
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files: |
./prowler/**/linode/**
./tests/**/linode/**
./uv.lock
- name: Run Linode tests
if: steps.changed-linode.outputs.any_changed == 'true'
run: uv run pytest -n auto --cov=./prowler/providers/linode --cov-report=xml:linode_coverage.xml tests/providers/linode
- name: Upload Linode coverage to Codecov
if: steps.changed-linode.outputs.any_changed == 'true'
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: prowler-py${{ matrix.python-version }}-linode
files: ./linode_coverage.xml
# External Provider (dynamic loading)
- name: Check if External Provider files changed
if: steps.check-changes.outputs.any_changed == 'true'
@@ -608,14 +633,14 @@ jobs:
- name: Upload External Provider coverage to Codecov
if: steps.changed-external.outputs.any_changed == 'true'
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: prowler-py${{ matrix.python-version }}-external
files: ./external_coverage.xml
# Lib
- name: Check if Lib files changed
if: steps.check-changes.outputs.any_changed == 'true'
@@ -32,9 +32,6 @@ env:
PROWLERCLOUD_DOCKERHUB_REPOSITORY: prowlercloud
PROWLERCLOUD_DOCKERHUB_IMAGE: prowler-ui
# Build args
NEXT_PUBLIC_API_BASE_URL: http://prowler-api:8080/api/v1
permissions: {}
jobs:
@@ -146,7 +143,6 @@ jobs:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=${{ (github.event_name == 'release' || github.event_name == 'workflow_dispatch') && format('v{0}', env.RELEASE_TAG) || needs.setup.outputs.short-sha }}
NEXT_PUBLIC_API_BASE_URL=${{ env.NEXT_PUBLIC_API_BASE_URL }}
push: true
platforms: ${{ matrix.platform }}
tags: |
@@ -262,27 +258,3 @@ jobs:
payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
step-outcome: ${{ steps.outcome.outputs.outcome }}
update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
trigger-deployment:
needs: [setup, container-build-push]
if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
with:
egress-policy: block
allowed-endpoints: >
api.github.com:443
- name: Trigger UI deployment
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: ui-prowler-deployment
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
+1 -4
View File
@@ -12,9 +12,6 @@ on:
branches:
- 'master'
- 'v5.*'
paths:
- 'ui/**'
- '.github/workflows/ui-container-checks.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -132,5 +129,5 @@ jobs:
with:
image-name: ${{ env.IMAGE_NAME }}
image-tag: ${{ github.sha }}
fail-on-critical: 'false'
fail-on-critical: 'true'
severity: 'CRITICAL'
+11 -2
View File
@@ -81,7 +81,8 @@ jobs:
AUTH_SECRET: 'fallback-ci-secret-for-testing'
AUTH_TRUST_HOST: true
NEXTAUTH_URL: 'http://localhost:3000'
NEXT_PUBLIC_API_BASE_URL: 'http://localhost:8080/api/v1'
AUTH_URL: 'http://localhost:3000'
UI_API_BASE_URL: 'http://localhost:8080/api/v1'
E2E_ADMIN_USER: ${{ secrets.E2E_ADMIN_USER }}
E2E_ADMIN_PASSWORD: ${{ secrets.E2E_ADMIN_PASSWORD }}
E2E_AWS_PROVIDER_ACCOUNT_ID: ${{ secrets.E2E_AWS_PROVIDER_ACCOUNT_ID }}
@@ -118,6 +119,14 @@ jobs:
E2E_ALIBABACLOUD_ACCESS_KEY_ID: ${{ secrets.E2E_ALIBABACLOUD_ACCESS_KEY_ID }}
E2E_ALIBABACLOUD_ACCESS_KEY_SECRET: ${{ secrets.E2E_ALIBABACLOUD_ACCESS_KEY_SECRET }}
E2E_ALIBABACLOUD_ROLE_ARN: ${{ secrets.E2E_ALIBABACLOUD_ROLE_ARN }}
E2E_OKTA_DOMAIN: ${{ secrets.E2E_OKTA_DOMAIN }}
E2E_OKTA_CLIENT_ID: ${{ secrets.E2E_OKTA_CLIENT_ID }}
E2E_OKTA_BASE64_PRIVATE_KEY: ${{ secrets.E2E_OKTA_BASE64_PRIVATE_KEY }}
E2E_GOOGLEWORKSPACE_CUSTOMER_ID: ${{ secrets.E2E_GOOGLEWORKSPACE_CUSTOMER_ID }}
E2E_GOOGLEWORKSPACE_SERVICE_ACCOUNT_JSON: ${{ secrets.E2E_GOOGLEWORKSPACE_SERVICE_ACCOUNT_JSON }}
E2E_GOOGLEWORKSPACE_DELEGATED_USER: ${{ secrets.E2E_GOOGLEWORKSPACE_DELEGATED_USER }}
E2E_VERCEL_TEAM_ID: ${{ secrets.E2E_VERCEL_TEAM_ID }}
E2E_VERCEL_API_TOKEN: ${{ secrets.E2E_VERCEL_API_TOKEN }}
# Pass E2E paths from impact analysis
E2E_TEST_PATHS: ${{ needs.impact-analysis.outputs.ui-e2e }}
RUN_ALL_TESTS: ${{ needs.impact-analysis.outputs.run-all }}
@@ -198,7 +207,7 @@ jobs:
timeout=150
elapsed=0
while [ $elapsed -lt $timeout ]; do
if curl -s ${NEXT_PUBLIC_API_BASE_URL}/docs >/dev/null 2>&1; then
if curl -s ${UI_API_BASE_URL}/docs >/dev/null 2>&1; then
echo "Prowler API is ready!"
exit 0
fi
-7
View File
@@ -15,12 +15,6 @@ on:
branches:
- 'master'
- 'v5.*'
paths:
- 'ui/package.json'
- 'ui/pnpm-lock.yaml'
- '.github/workflows/ui-security.yml'
- '.github/actions/osv-scanner/**'
- '.github/scripts/osv-scan.sh'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -30,7 +24,6 @@ permissions: {}
jobs:
ui-security-scans:
if: github.repository == 'prowler-cloud/prowler'
runs-on: ubuntu-latest
timeout-minutes: 15
permissions:
+4
View File
@@ -131,6 +131,10 @@ jobs:
if: steps.check-changes.outputs.any_changed == 'true'
run: pnpm run healthcheck
- name: Check product-tour alignment
if: steps.check-changes.outputs.any_changed == 'true'
run: pnpm run tour:check
- name: Run pnpm audit
if: steps.check-changes.outputs.any_changed == 'true'
run: pnpm run audit
+4
View File
@@ -169,3 +169,7 @@ GEMINI.md
# Claude Code
.claude/*
# Docker
docker-compose.override.yml
docker-compose-dev.override.yml
+36 -9
View File
@@ -7,6 +7,10 @@
# P50 — dependency validation
default_install_hook_types: [pre-commit]
# Hooks run on commit only by default;
# NOTE: default_stages does NOT override a hook's manifest stages, so fixers shipping pre-push in their
# manifest need an explicit stages: ["pre-commit"] below to stay off push.
default_stages: [pre-commit]
repos:
## GENERAL (prek built-in — no external repo needed)
@@ -21,13 +25,16 @@ repos:
- id: check-json
priority: 10
- id: end-of-file-fixer
stages: ["pre-commit"]
priority: 0
- id: trailing-whitespace
stages: ["pre-commit"]
priority: 0
- id: no-commit-to-branch
priority: 10
- id: pretty-format-json
args: ["--autofix", --no-sort-keys, --no-ensure-ascii]
stages: ["pre-commit"]
priority: 10
## TOML
@@ -82,6 +89,7 @@ repos:
name: "SDK - isort"
files: { glob: ["{prowler,tests,dashboard,util,scripts}/**/*.py"] }
args: ["--profile", "black"]
stages: ["pre-commit"]
priority: 20
- repo: https://github.com/psf/black
@@ -102,17 +110,36 @@ repos:
priority: 30
## PYTHON — API + MCP Server (ruff)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.11
# Run ruff through `uv run` against each project so prek uses the exact ruff
# version pinned in that project's uv.lock — the same version GitHub Actions
# runs via `uv run ruff`. This removes the drift between the local hooks and
# CI. api/ and mcp_server/ are separate uv projects, so they need separate
# hooks (each `uv run --project` resolves its own pinned ruff + config).
- repo: local
hooks:
- id: ruff
name: "API + MCP - ruff check"
files: { glob: ["{api,mcp_server}/**/*.py"] }
args: ["--fix"]
- id: ruff-check-api
name: "API - ruff check"
entry: uv run --project ./api ruff check --fix
language: system
files: { glob: ["api/**/*.py"] }
priority: 30
- id: ruff-format
name: "API + MCP - ruff format"
files: { glob: ["{api,mcp_server}/**/*.py"] }
- id: ruff-format-api
name: "API - ruff format"
entry: uv run --project ./api ruff format
language: system
files: { glob: ["api/**/*.py"] }
priority: 20
- id: ruff-check-mcp
name: "MCP - ruff check"
entry: uv run --project ./mcp_server ruff check --fix
language: system
files: { glob: ["mcp_server/**/*.py"] }
priority: 30
- id: ruff-format-mcp
name: "MCP - ruff format"
entry: uv run --project ./mcp_server ruff format
language: system
files: { glob: ["mcp_server/**/*.py"] }
priority: 20
## PYTHON — uv (API + SDK)
+98
View File
@@ -0,0 +1,98 @@
# Trivy ignore file for prowlercloud/prowler SDK container image.
# Each entry below documents (a) the affected package and why it ships in the
# image, (b) why the CVE is not exploitable in Prowler's runtime, and (c) the
# upstream fix status. Entries carry an expiry so they auto-force re-review.
# Entries are scoped per-package so suppressions cannot drift onto unrelated
# packages that may be assigned the same CVE in the future.
#
# Scanned by: .github/actions/trivy-scan via .github/workflows/sdk-container-checks.yml
# CVE-2026-42496 — perl-archive-tar path traversal via crafted symlinks.
# CVE-2026-8376 — perl heap buffer overflow when compiling regex.
# Packages: perl, perl-base, perl-modules-5.36, libperl5.36.
# Why ignored: perl-base is part of Debian's "Essential: yes" set; it cannot be
# removed without breaking dpkg. The Prowler SDK does not invoke perl at runtime;
# neither vulnerable code path (Archive::Tar parsing or regex compilation of
# attacker-controlled input) is reachable from Prowler. No Debian bookworm fix
# is available yet.
CVE-2026-42496 pkg:perl exp:2026-07-15
CVE-2026-42496 pkg:perl-base exp:2026-07-15
CVE-2026-42496 pkg:perl-modules-5.36 exp:2026-07-15
CVE-2026-42496 pkg:libperl5.36 exp:2026-07-15
CVE-2026-8376 pkg:perl exp:2026-07-15
CVE-2026-8376 pkg:perl-base exp:2026-07-15
CVE-2026-8376 pkg:perl-modules-5.36 exp:2026-07-15
CVE-2026-8376 pkg:libperl5.36 exp:2026-07-15
# CVE-2025-7458 — SQLite integer overflow.
# Package: libsqlite3-0.
# Why ignored: transitive dependency of CPython's stdlib sqlite3 module. The
# Prowler SDK does not open user-supplied SQLite databases; SQLite usage is
# internal and bounded. No Debian bookworm fix is available.
CVE-2025-7458 pkg:libsqlite3-0 exp:2026-07-15
# CVE-2026-43185 — Linux kernel ksmbd signedness bug.
# Package: linux-libc-dev.
# Why ignored: linux-libc-dev ships kernel headers for build-time compilation,
# not a running kernel. Containers execute against the host kernel, so these
# headers are inert at runtime. The upstream fix landed in kernel 7.0-rc2 and
# has not been backported to Debian's 6.1 LTS line.
CVE-2026-43185 pkg:linux-libc-dev exp:2026-07-15
# CVE-2023-45853 — zlib MiniZip integer overflow / heap overflow in
# zipOpenNewFileInZip4_64.
# Packages: zlib1g, zlib1g-dev.
# Why ignored: Debian Security Tracker status for bookworm is <ignored>, with
# the published rationale "contrib/minizip not built and src:zlib not producing
# binary packages" — i.e. the vulnerable symbol is not present in the libz.so
# shipped by Debian. Real-not-affected, not unpatched. Upstream fix is in
# zlib 1.3.1, available in Debian trixie (13); migrating the base image would
# clear it fully.
# Ref: https://security-tracker.debian.org/tracker/CVE-2023-45853
CVE-2023-45853 pkg:zlib1g exp:2026-07-15
CVE-2023-45853 pkg:zlib1g-dev exp:2026-07-15
# CVE-2026-55200 — libssh2 out-of-bounds write in ssh2_transport_read() due to
# an unchecked packet_length field in transport.c (heap corruption, possible RCE).
# Package: libssh2-1.
# Why ignored: libssh2-1 is pulled in only as a transitive dependency of libcurl4
# (installed in the SDK Dockerfile for the networking/PowerShell stack). The
# vulnerable path is reached exclusively when libssh2 acts as an SSH/SCP/SFTP
# client parsing transport packets from a server. Prowler never uses libcurl's
# SSH/SCP/SFTP transports; it talks to cloud provider HTTPS endpoints only, so the
# affected code is unreachable at runtime. Fixed upstream in libssh2 commit
# 97acf3df (PR #2052); no Debian bookworm fix is available yet.
# Ref: https://security-tracker.debian.org/tracker/CVE-2026-55200
CVE-2026-55200 pkg:libssh2-1 exp:2026-07-15
# --- API container image (api/Dockerfile) ---
# The entries below are specific to the Prowler API image, which ships
# PowerShell and additional build tooling on top of the same bookworm base.
# CVE-2026-7210 — CPython/Expat hash-flooding denial of service in
# `xml.parsers.expat` and `xml.etree.ElementTree`.
# Packages: the Debian system Python 3.11 (python3.11*, libpython3.11*).
# Why ignored: the API runs under the Python 3.12 interpreter shipped in its
# `.venv`; the system `python3.11` is only present because `python3-dev` is
# pulled in to compile native extensions (xmlsec, lxml) and is never executed
# at runtime. The vulnerable path requires parsing attacker-controlled XML with
# the affected interpreter, which Prowler does not do with the system Python.
# Full mitigation also needs libexpat >= 2.8.0; no Debian bookworm fix yet.
CVE-2026-7210 pkg:python3.11 exp:2026-07-15
CVE-2026-7210 pkg:python3.11-dev exp:2026-07-15
CVE-2026-7210 pkg:python3.11-minimal exp:2026-07-15
CVE-2026-7210 pkg:libpython3.11 exp:2026-07-15
CVE-2026-7210 pkg:libpython3.11-dev exp:2026-07-15
CVE-2026-7210 pkg:libpython3.11-minimal exp:2026-07-15
CVE-2026-7210 pkg:libpython3.11-stdlib exp:2026-07-15
# CVE-2026-33278 — Unbound DNSSEC validator use-after-free (DoS, possible RCE).
# CVE-2026-42960 — Unbound DNS cache poisoning via promiscuous additional records.
# Package: libunbound8.
# Why ignored: libunbound8 is a transitive apt dependency of the TLS/networking
# stack (GnuTLS DANE support); only the shared library ships in the image. Both
# vulnerabilities require operating a live Unbound recursive DNSSEC validator
# that processes attacker-influenced DNS responses. Prowler never starts an
# Unbound resolver, so neither code path is reachable. No Debian bookworm fix yet.
CVE-2026-33278 pkg:libunbound8 exp:2026-07-15
CVE-2026-42960 pkg:libunbound8 exp:2026-07-15
+6
View File
@@ -51,6 +51,7 @@ Use these skills for detailed patterns on-demand:
| `django-migration-psql` | Django migration best practices for PostgreSQL | [SKILL.md](skills/django-migration-psql/SKILL.md) |
| `postgresql-indexing` | PostgreSQL indexing, EXPLAIN, monitoring, maintenance | [SKILL.md](skills/postgresql-indexing/SKILL.md) |
| `prowler-attack-paths-query` | Create Attack Paths openCypher queries | [SKILL.md](skills/prowler-attack-paths-query/SKILL.md) |
| `prowler-tour` | Keep product-tour definitions aligned with the UI | [SKILL.md](skills/prowler-tour/SKILL.md) |
| `gh-aw` | GitHub Agentic Workflows (gh-aw) | [SKILL.md](skills/gh-aw/SKILL.md) |
| `skill-creator` | Create new AI agent skills | [SKILL.md](skills/skill-creator/SKILL.md) |
@@ -67,10 +68,12 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Adding new providers | `prowler-provider` |
| Adding privilege escalation detection queries | `prowler-attack-paths-query` |
| Adding services to existing providers | `prowler-provider` |
| Adding, updating, or removing a tour definition (*.tour.ts) | `prowler-tour` |
| After creating/modifying a skill | `skill-sync` |
| App Router / Server Actions | `nextjs-16` |
| Auditing check-to-requirement mappings as a cloud auditor | `prowler-compliance` |
| Building AI chat features | `ai-sdk-5` |
| Changing button labels or section headings on a tour-covered page | `prowler-tour` |
| Committing changes | `prowler-commit` |
| Configuring MCP servers in agentic workflows | `gh-aw` |
| Create PR that requires changelog entry | `prowler-changelog` |
@@ -89,6 +92,7 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Creating/updating compliance frameworks | `prowler-compliance` |
| Debug why a GitHub Actions job is failing | `prowler-ci` |
| Debugging gh-aw compilation errors | `gh-aw` |
| Editing a UI file containing data-tour-id attributes | `prowler-tour` |
| Fill .github/pull_request_template.md (Context/Description/Steps to review/Checklist) | `prowler-pr` |
| Fixing bug | `tdd` |
| Fixing compliance JSON bugs (duplicate IDs, empty Section, stale refs) | `prowler-compliance` |
@@ -105,6 +109,8 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Modifying gh-aw workflow frontmatter or safe-outputs | `gh-aw` |
| Refactoring code | `tdd` |
| Regenerate AGENTS.md Auto-invoke tables (sync.sh) | `skill-sync` |
| Renaming or removing a data-tour-id attribute value | `prowler-tour` |
| Restructuring routes or layouts covered by a tour | `prowler-tour` |
| Review PR requirements: template, title conventions, changelog gate | `prowler-pr` |
| Review changelog format and conventions | `prowler-changelog` |
| Reviewing JSON:API compliance | `jsonapi` |
+14 -2
View File
@@ -1,4 +1,4 @@
FROM python:3.12.11-slim-bookworm@sha256:519591d6871b7bc437060736b9f7456b8731f1499a57e22e6c285135ae657bf7 AS build
FROM python:3.12.13-slim-bookworm@sha256:76d4b7b6305788c6b4c6a19d6a22a3921bf802e9af4d5e1e5bd771208dba74bf AS build
LABEL maintainer="https://github.com/prowler-cloud/prowler"
LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
@@ -6,7 +6,7 @@ LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
ARG POWERSHELL_VERSION=7.5.0
ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}
ARG TRIVY_VERSION=0.70.0
ARG TRIVY_VERSION=0.71.2
ENV TRIVY_VERSION=${TRIVY_VERSION}
ARG ZIZMOR_VERSION=1.24.1
@@ -95,6 +95,18 @@ RUN uv sync --locked --compile-bytecode && \
# Install PowerShell modules
RUN .venv/bin/python prowler/providers/m365/lib/powershell/m365_powershell.py
USER root
# Remove build-only packages from the final image after Python dependencies are installed.
RUN apt-get purge -y --auto-remove \
build-essential \
pkg-config \
libzstd-dev \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
USER prowler
# Remove deprecated dash dependencies
RUN pip uninstall dash-html-components -y && \
pip uninstall dash-core-components -y
+63 -11
View File
@@ -1,5 +1,34 @@
.DEFAULT_GOAL:=help
DEV_LOCAL := ./scripts/development/dev-local.sh
.PHONY: dev dev-setup dev-attach dev-launch dev-stop dev-clean dev-wipe dev-status
##@ Local Development
dev: ## Start local API, worker, and database logs
$(DEV_LOCAL) all
dev-setup: ## Bootstrap local dependencies, migrations, and fixtures
$(DEV_LOCAL) setup
dev-attach: ## Attach to the local tmux development session
$(DEV_LOCAL) attach
dev-launch: ## Start the local stack on fixed ports and attach
$(DEV_LOCAL) launch
dev-stop: ## Stop the local tmux session and containers
$(DEV_LOCAL) kill
dev-clean: ## Remove stopped local development containers
$(DEV_LOCAL) clean
dev-wipe: ## Stop everything and delete local development data
$(DEV_LOCAL) wipe
dev-status: ## Show local development container status
$(DEV_LOCAL) status
##@ Testing
test: ## Test with pytest
rm -rf .coverage && \
@@ -16,18 +45,41 @@ coverage-html: ## Show Test Coverage
coverage html && \
open htmlcov/index.html
##@ Linting
format: ## Format Code
@echo "Running black..."
black .
##@ Code Quality
# `make` is the single entrypoint and mirrors CI exactly (uv run + same flags):
# SDK (prowler/, util/) -> flake8 + black + pylint
# API & MCP server -> ruff (rules live in each project's pyproject.toml)
# `format` applies fixes (incl. ruff's import/upgrade autofixes); `lint` only
# verifies and is what CI gates on.
.PHONY: format format-sdk format-api format-mcp lint lint-sdk lint-api lint-mcp
lint: ## Lint Code
@echo "Running flake8..."
flake8 . --ignore=E266,W503,E203,E501,W605,E128 --exclude .venv,contrib
@echo "Running black... "
black --check .
@echo "Running pylint..."
pylint --disable=W,C,R,E -j 0 prowler util
format: format-sdk format-api format-mcp ## Format & autofix all components (SDK, API, MCP)
lint: lint-sdk lint-api lint-mcp ## Lint all components (SDK, API, MCP) — mirrors CI
format-sdk: ## Format SDK code (black)
uv run black --exclude "\.venv|api|ui|skills|mcp_server" .
lint-sdk: ## Lint SDK code (flake8, black --check, pylint)
uv run flake8 . --ignore=E266,W503,E203,E501,W605,E128 --exclude .venv,contrib,ui,api,skills,mcp_server
uv run black --exclude "\.venv|api|ui|skills|mcp_server" --check .
uv run pylint --disable=W,C,R,E -j 0 -rn -sn prowler/
format-api: ## Format & autofix API code (ruff)
cd api && uv run ruff check . --exclude contrib --fix
cd api && uv run ruff format . --exclude contrib
lint-api: ## Lint API code (ruff check + format --check)
cd api && uv run ruff check . --exclude contrib
cd api && uv run ruff format --check . --exclude contrib
format-mcp: ## Format & autofix MCP server code (ruff)
cd mcp_server && uv run ruff check . --fix
cd mcp_server && uv run ruff format .
lint-mcp: ## Lint MCP server code (ruff check + format --check)
cd mcp_server && uv run ruff check .
cd mcp_server && uv run ruff format --check .
##@ PyPI
pypi-clean: ## Delete the distribution files
+45 -25
View File
@@ -83,16 +83,35 @@ prowler dashboard
## Attack Paths
Attack Paths automatically extends every completed AWS scan with a Neo4j graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan and therefore requires:
Attack Paths automatically extends every completed AWS scan with a graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan.
- An accessible Neo4j instance (the Docker Compose files already ships a `neo4j` service).
- The following environment variables so Django and Celery can connect:
Two graph backends are supported as the long-lived sink:
| Variable | Description | Default |
| --- | --- | --- |
| `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
| `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
| `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
- **Neo4j** (default; the Docker Compose files already ship a `neo4j` service).
- **Amazon Neptune** (cloud-managed; opt-in).
Select the sink with `ATTACK_PATHS_SINK_DATABASE` (`neo4j` or `neptune`; default `neo4j`).
> Note: Cartography ingestion always uses a temporary Neo4j database, regardless of the configured sink. The `NEO4J_*` variables below must remain set even when `ATTACK_PATHS_SINK_DATABASE=neptune`.
### Neo4j sink
| Variable | Description | Default |
| --- | --- | --- |
| `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
| `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
| `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
### Neptune sink
| Variable | Description | Default |
| --- | --- | --- |
| `NEPTUNE_WRITER_ENDPOINT` | Bolt host for the Neptune writer instance. Required when sink is `neptune`. | _empty_ |
| `NEPTUNE_READER_ENDPOINT` | Optional reader endpoint for read-only queries. Falls back to the writer when unset. | _empty_ |
| `NEPTUNE_PORT` | Bolt port exposed by Neptune. | `8182` |
| `AWS_REGION` | Region the Neptune cluster lives in. Required when sink is `neptune`. | _empty_ |
Neptune authenticates with SigV4 using the standard boto3 credential chain. The worker's IAM role (or `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`) supplies the credentials. There is no Neptune password variable.
Every AWS provider scan will enqueue an Attack Paths ingestion job automatically. Other cloud providers will be added in future iterations.
@@ -104,26 +123,27 @@ Every AWS provider scan will enqueue an Attack Paths ingestion job automatically
| Provider | Checks | Services | [Compliance Frameworks](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/compliance/) | [Categories](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/misc/#categories) | Support | Interface |
|---|---|---|---|---|---|---|
| AWS | 600 | 84 | 44 | 18 | Official | UI, API, CLI |
| Azure | 167 | 22 | 19 | 16 | Official | UI, API, CLI |
| GCP | 102 | 18 | 17 | 12 | Official | UI, API, CLI |
| Kubernetes | 83 | 7 | 7 | 11 | Official | UI, API, CLI |
| GitHub | 24 | 3 | 1 | 5 | Official | UI, API, CLI |
| M365 | 102 | 10 | 4 | 10 | Official | UI, API, CLI |
| OCI | 51 | 14 | 4 | 10 | Official | UI, API, CLI |
| Alibaba Cloud | 63 | 9 | 4 | 9 | Official | UI, API, CLI |
| Cloudflare | 29 | 3 | 0 | 5 | Official | UI, API, CLI |
| AWS | 615 | 86 | 47 | 19 | Official | UI, API, CLI |
| Azure | 190 | 22 | 21 | 16 | Official | UI, API, CLI |
| GCP | 109 | 20 | 19 | 12 | Official | UI, API, CLI |
| Kubernetes | 90 | 7 | 8 | 11 | Official | UI, API, CLI |
| GitHub | 24 | 3 | 2 | 5 | Official | UI, API, CLI |
| M365 | 109 | 10 | 6 | 10 | Official | UI, API, CLI |
| OCI | 52 | 14 | 5 | 10 | Official | UI, API, CLI |
| Alibaba Cloud | 63 | 9 | 6 | 9 | Official | UI, API, CLI |
| Cloudflare | 29 | 3 | 2 | 5 | Official | UI, API, CLI |
| IaC | [See `trivy` docs.](https://trivy.dev/latest/docs/coverage/iac/) | N/A | N/A | N/A | Official | UI, API, CLI |
| MongoDB Atlas | 10 | 3 | 0 | 8 | Official | UI, API, CLI |
| MongoDB Atlas | 10 | 3 | 1 | 8 | Official | UI, API, CLI |
| LLM | [See `promptfoo` docs.](https://www.promptfoo.dev/docs/red-team/plugins/) | N/A | N/A | N/A | Official | CLI |
| Image | N/A | N/A | N/A | N/A | Official | CLI, API |
| Google Workspace | 39 | 5 | 2 | 5 | Official | UI, API, CLI |
| OpenStack | 34 | 5 | 0 | 9 | Official | UI, API, CLI |
| Vercel | 26 | 6 | 0 | 8 | Official | UI, API, CLI |
| Okta | 1 | 1 | 0 | 1 | Official | CLI |
| Scaleway [Contact us](https://prowler.com/contact) | 1 | 1 | 0 | 1 | Unofficial | CLI |
| StackIT [Contact us](https://prowler.com/contact) | 7 | 2 | 0 | 3 | Unofficial | CLI |
| NHN | 6 | 2 | 1 | 0 | Unofficial | CLI |
| Google Workspace | 65 | 11 | 3 | 6 | Official | UI, API, CLI |
| OpenStack | 34 | 5 | 1 | 9 | Official | UI, API, CLI |
| Vercel | 26 | 6 | 1 | 8 | Official | UI, API, CLI |
| Okta | 29 | 8 | 2 | 2 | Official | UI, API, CLI |
| Linode [Contact us](https://prowler.com/contact) | 10 | 3 | 1 | 4 | Unofficial | CLI |
| Scaleway [Contact us](https://prowler.com/contact) | 1 | 1 | 1 | 1 | Unofficial | CLI |
| StackIT [Contact us](https://prowler.com/contact) | 7 | 2 | 1 | 3 | Unofficial | CLI |
| NHN | 6 | 2 | 2 | 0 | Unofficial | CLI |
> [!Note]
> The numbers in the table are updated periodically.
+3
View File
@@ -24,6 +24,9 @@ DJANGO_THROTTLE_TOKEN_OBTAIN=50/minute
# Decide whether to allow Django manage database table partitions
DJANGO_MANAGE_DB_PARTITIONS=[True|False]
DJANGO_CELERY_DEADLOCK_ATTEMPTS=5
# Optional: bound Celery's prefork pool size. Unset → Celery uses os.cpu_count().
# Useful on Kubernetes nodes with many CPUs where unbounded prefork balloons memory.
# DJANGO_CELERY_WORKER_CONCURRENCY=4
DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
DJANGO_SENTRY_DSN=
+70
View File
@@ -2,6 +2,76 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.33.0] (Prowler UNRELEASED)
### 🔄 Changed
- Attack Paths: AWS Neptune is now supported as a persistent sink database, selectable via `ATTACK_PATHS_SINK_DATABASE=neptune` (default `neo4j`), Cartography's (bumped to 0.138.1) per-scan ingest database stays on Neo4j [(#11524)](https://github.com/prowler-cloud/prowler/pull/11524)
- Attack Paths: Scan task now checks the ingest Neo4j database and configured graph sink before starting graph ingestion [(#11743)](https://github.com/prowler-cloud/prowler/pull/11743)
---
## [1.32.2] (Prowler UNRELEASED)
### 🐞 Fixed
- `scan-perform` no longer reports an error when a provider is deleted during a running scan [(#11696)](https://github.com/prowler-cloud/prowler/pull/11696)
---
## [1.32.1] (Prowler v5.31.1)
### 🐞 Fixed
- API key auth no longer mutates `TenantAPIKey.objects` during admin DB lookups [(#11686)](https://github.com/prowler-cloud/prowler/pull/11686)
---
## [1.32.0] (Prowler v5.31.0)
### 🚀 Added
- Provider group filters for API endpoints that support cloud provider filtering, including exact and `__in` variants [(#11573)](https://github.com/prowler-cloud/prowler/pull/11573)
- Provider filters for `GET /api/v1/compliance-overviews`, `/metadata`, and `/requirements`, using latest completed scans per matching provider [(#11587)](https://github.com/prowler-cloud/prowler/pull/11587)
- Server-Sent Events (SSE) infrastructure for the API: a base viewset, a tenant-aware channel manager, and channel-name helpers backed by `django-eventstream` over Valkey Pub/Sub and served through the Gunicorn ASGI worker, so feature endpoints can stream events to clients over a single long-lived connection [(#11556)](https://github.com/prowler-cloud/prowler/pull/11556)
- `DJANGO_CELERY_WORKER_CONCURRENCY` to configure Celery workers concurrency. Unset for default behaviour [(#11075)](https://github.com/prowler-cloud/prowler/pull/11075)
### 🔄 Changed
- Gunicorn worker timeout raised from the 30s default to 120s, so long-running requests are no longer killed prematurely [(#11631)](https://github.com/prowler-cloud/prowler/pull/11631)
- Sentry now drops ASGI's `RequestAborted` errors from health-check probe disconnects on `/health/live` [(#11632)](https://github.com/prowler-cloud/prowler/pull/11632)
- Gunicorn keep-alive timeout now exceeds the load balancer idle timeout, stopping 502s from reused connections [(#11647)](https://github.com/prowler-cloud/prowler/pull/11647)
- API runs under the Uvicorn worker so keep-alive outlives the load balancer idle timeout, fixing Gunicorn's intermittent 502s [(#11663)](https://github.com/prowler-cloud/prowler/pull/11663)
- SAML logins no longer wipe a user's roles when the IdP does not send the `userType` attribute; existing roles are kept, and when `userType` names a role that does not exist it is now created with read-only access (visibility over all providers, no management permissions) instead of no permissions at all [(#11520)](https://github.com/prowler-cloud/prowler/pull/11520)
### 🐞 Fixed
- Database connections no longer leak under the ASGI worker, which previously exhausted the read replica's connection slots and caused 500s on read endpoints [(#11640)](https://github.com/prowler-cloud/prowler/pull/11640)
### 🔐 Security
- `aiohttp` to 3.14.0 and `idna` to 3.15, patching known CVEs [(#11596)](https://github.com/prowler-cloud/prowler/pull/11596)
- Container base image to `python:3.12.13-slim-bookworm` and `trivy` to 0.71.0, patching OS and Go module CVEs [(#11596)](https://github.com/prowler-cloud/prowler/pull/11596)
- `trivy` binary bumped to 0.71.0 patching embedded `golang.org/x/crypto`, `golang.org/x/net`, and Go `stdlib` CVEs [(#11592)](https://github.com/prowler-cloud/prowler/pull/11592)
---
## [1.31.3] (Prowler v5.30.3)
### 🔐 Security
- SAML logins now link to an existing account only when the asserted email domain matches the ACS endpoint and the user is already a member of that domain's tenant, fixing a cross-tenant account takeover [(GHSA-h8m9-jgf8-vwvp)](https://github.com/prowler-cloud/prowler/security/advisories/GHSA-h8m9-jgf8-vwvp)
---
## [1.31.2] (Prowler v5.30.2)
### 🔄 Changed
- `scan-compliance-overviews` task now streams the findings aggregation and the requirement-row writes so it runs faster and its peak memory no longer grows with the number of regions and frameworks [(#11591)](https://github.com/prowler-cloud/prowler/pull/11591)
---
## [1.31.1] (Prowler v5.30.1)
### 🐞 Fixed
+19 -2
View File
@@ -1,11 +1,11 @@
FROM python:3.12.10-slim-bookworm@sha256:fd95fa221297a88e1cf49c55ec1828edd7c5a428187e67b5d1805692d11588db AS build
FROM python:3.12.13-slim-bookworm@sha256:76d4b7b6305788c6b4c6a19d6a22a3921bf802e9af4d5e1e5bd771208dba74bf AS build
LABEL maintainer="https://github.com/prowler-cloud/api"
ARG POWERSHELL_VERSION=7.5.0
ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}
ARG TRIVY_VERSION=0.70.0
ARG TRIVY_VERSION=0.71.2
ENV TRIVY_VERSION=${TRIVY_VERSION}
ARG ZIZMOR_VERSION=1.24.1
@@ -102,6 +102,23 @@ RUN uv sync --locked --no-install-project && \
RUN .venv/bin/python .venv/lib/python3.12/site-packages/prowler/providers/m365/lib/powershell/m365_powershell.py
USER root
# Remove build-only packages from the final image after Python dependencies are installed.
RUN apt-get purge -y --auto-remove \
gcc \
g++ \
make \
libxml2-dev \
libxmlsec1-dev \
pkg-config \
libtool \
libxslt1-dev \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
USER prowler
COPY --chown=prowler:prowler src/backend/ ./backend/
COPY --chown=prowler:prowler docker-entrypoint.sh ./docker-entrypoint.sh
+36
View File
@@ -196,6 +196,42 @@ python -m celery -A config.celery worker -l info -E
The Celery worker does not detect and reload changes in the code, so you need to restart it manually when you make changes.
### Makefile-Assisted Local Deployment
This method is an additional local development workflow. It does not replace the manual local deployment or the Docker deployment described in this guide.
PostgreSQL, Valkey, and Neo4j run with Docker Compose, while Django and the Celery worker run natively through `uv`. Additionally, this workflow creates a `tmux` session with panes for the API, worker, and PostgreSQL logs.
Before using this method, ensure `docker compose`, `tmux`, and `uv` are installed.
This workflow is designed for macOS and should also work on Linux when Docker, `tmux`, and `uv` are available. Windows requires script changes before it can be supported.
From the repository root, run:
```console
make dev
```
The API will be available at:
```console
http://localhost:8080/api/v1
```
Use these commands to manage the local stack:
```console
make dev-setup # Bootstrap dependencies, migrations, and fixtures
make dev-attach # Attach to the tmux session
make dev-launch # Start the stack on fixed ports and attach
make dev-stop # Stop the tmux session and containers
make dev-clean # Remove stopped development containers
make dev-wipe # Stop everything and delete local development data
make dev-status # Show development container status
```
This workflow does not start the UI. Start it separately from the `ui/` directory when needed.
### Docker deployment
This method requires `docker` and `docker compose`.
+9 -3
View File
@@ -21,13 +21,19 @@ apply_fixtures() {
}
start_dev_server() {
echo "Starting the development server..."
exec uv run python manage.py runserver 0.0.0.0:"${DJANGO_PORT:-8080}"
echo "Starting the development server (Gunicorn ASGI, debug + reload)..."
# Same server/worker as prod (config.asgi via the native `asgi` worker), so
# SSE streams run on the event loop exactly as they do in production. DEBUG is
# on so guniconf's `reload = DEBUG` hot-reloads edited code (and flips
# `preload_app` off so reload actually takes).
export DJANGO_DEBUG="${DJANGO_DEBUG:-True}"
export DJANGO_BIND_ADDRESS="${DJANGO_BIND_ADDRESS:-0.0.0.0}"
exec uv run gunicorn -c config/guniconf.py config.asgi:application
}
start_prod_server() {
echo "Starting the Gunicorn server..."
exec uv run gunicorn -c config/guniconf.py config.wsgi:application
exec uv run gunicorn -c config/guniconf.py config.asgi:application
}
resolve_worker_hostname() {
+1
View File
@@ -65,6 +65,7 @@ All settings have safe defaults; override via environment variables.
| Env var | Default | Purpose |
| --- | --- | --- |
| `DJANGO_CELERY_WORKER_PREFETCH_MULTIPLIER` | `1` | Tasks reserved per worker process. |
| `DJANGO_CELERY_WORKER_CONCURRENCY` | unset | Optional Celery prefork pool size. When unset, Celery uses its CPU-based default. Set this on worker containers to bound idle memory on hosts with many CPUs. |
| `DJANGO_CELERY_WORKER_SOFT_SHUTDOWN_TIMEOUT` | `60` | Seconds the worker drains/re-queues on `SIGTERM` before force-kill. |
| `DJANGO_CELERY_TASK_TIME_LIMIT` | `21600` (6h) | Hard limit for most tasks; connection checks are capped at 120s. |
| `DJANGO_CELERY_TASK_SOFT_TIME_LIMIT` | hard - 600 | Soft limit; raises `SoftTimeLimitExceeded` for cleanup. |
+47 -19
View File
@@ -14,7 +14,7 @@ dev = [
"pytest-env==1.1.3",
"pytest-randomly==3.15.0",
"pytest-xdist==3.6.1",
"ruff==0.5.0",
"ruff==0.15.11",
"tqdm==4.67.1",
"vulture==2.14",
"prek==0.3.9"
@@ -41,7 +41,9 @@ dependencies = [
"drf-spectacular==0.27.2",
"drf-spectacular-jsonapi==0.5.1",
"defusedxml==0.7.1",
"gunicorn==23.0.0",
"django-eventstream==5.3.3",
"gunicorn==26.0.0",
"uvloop==0.22.1",
"lxml==6.1.0",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"psycopg2-binary==2.9.9",
@@ -56,11 +58,12 @@ dependencies = [
"matplotlib (==3.10.8)",
"reportlab (==4.4.10)",
"neo4j (==6.1.0)",
"cartography (==0.135.0)",
"cartography (==0.138.1)",
"gevent (==25.9.1)",
"werkzeug (==3.1.7)",
"sqlparse (==0.5.5)",
"fonttools (==4.62.1)"
"fonttools (==4.62.1)",
"uvicorn-worker (==0.4.0)",
]
description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
@@ -68,7 +71,24 @@ name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
version = "1.32.0"
version = "1.33.0"
# Shared ruff baseline (kept in sync with mcp_server/pyproject.toml).
# target-version tracks this project's lowest supported Python.
[tool.ruff]
src = ["src"]
target-version = "py311"
[tool.ruff.lint]
# Defaults (E4/E7/E9, F) plus import sorting, modern-syntax upgrades, and
# comprehension lints — all mechanically auto-fixable. flake8-bugbear (B) is a
# good next step but needs manual cleanup (e.g. B904 raise-from), so it is left
# out of the shared baseline for now.
extend-select = [
"I", # isort — import ordering (prek's isort hook covers only the SDK)
"UP", # pyupgrade — modern syntax for the min supported Python
"C4" # flake8-comprehensions
]
[tool.uv]
# Transitive pins matching master to avoid silent drift; bump deliberately.
@@ -79,7 +99,7 @@ constraint-dependencies = [
"aiobotocore==2.25.1",
"aiofiles==24.1.0",
"aiohappyeyeballs==2.6.1",
"aiohttp==3.13.5",
"aiohttp==3.14.0",
"aioitertools==0.13.0",
"aiosignal==1.4.0",
"alibabacloud-actiontrail20200706==2.4.1",
@@ -124,9 +144,8 @@ constraint-dependencies = [
"astroid==3.2.4",
"async-timeout==5.0.1",
"attrs==25.4.0",
"authlib==1.6.9",
"authlib==1.6.12",
"autopep8==2.3.2",
"awsipranges==0.3.3",
"azure-cli-core==2.83.0",
"azure-cli-telemetry==1.1.0",
"azure-common==1.1.28",
@@ -174,7 +193,7 @@ constraint-dependencies = [
"blinker==1.9.0",
"boto3==1.40.61",
"botocore==1.40.61",
"cartography==0.135.0",
"cartography==0.138.1",
"celery==5.6.2",
"certifi==2026.1.4",
"cffi==2.0.0",
@@ -199,7 +218,6 @@ constraint-dependencies = [
"debugpy==1.8.20",
"decorator==5.2.1",
"defusedxml==0.7.1",
"detect-secrets==1.5.0",
"dill==0.4.1",
"distro==1.9.0",
"dj-rest-auth==7.0.1",
@@ -209,6 +227,7 @@ constraint-dependencies = [
"django-celery-results==2.6.0",
"django-cors-headers==4.4.0",
"django-environ==0.11.2",
"django-eventstream==5.3.3",
"django-filter==24.3",
"django-guid==3.5.0",
"django-postgres-extra==2.0.9",
@@ -253,7 +272,7 @@ constraint-dependencies = [
"grpc-google-iam-v1==0.14.3",
"grpcio==1.76.0",
"grpcio-status==1.76.0",
"gunicorn==23.0.0",
"gunicorn==26.0.0",
"h11==0.16.0",
"h2==4.3.0",
"hpack==4.1.0",
@@ -262,8 +281,8 @@ constraint-dependencies = [
"httpx==0.28.1",
"humanfriendly==10.0",
"hyperframe==6.1.0",
"iamdata==0.1.202602021",
"idna==3.11",
"iamdata==0.1.202605131",
"idna==3.15",
"importlib-metadata==8.7.1",
"inflection==0.5.1",
"iniconfig==2.3.0",
@@ -281,6 +300,7 @@ constraint-dependencies = [
"jsonschema==4.23.0",
"jsonschema-specifications==2025.9.1",
"keystoneauth1==5.13.0",
"kingfisher-bin==1.104.0",
"kiwisolver==1.4.9",
"knack==0.11.0",
"kombu==5.6.2",
@@ -315,7 +335,7 @@ constraint-dependencies = [
"neo4j==6.1.0",
"nest-asyncio==1.6.0",
"nltk==3.9.4",
"numpy==2.0.2",
"numpy==2.2.6",
"oauthlib==3.3.1",
"oci==2.169.0",
"openai==1.109.1",
@@ -344,7 +364,7 @@ constraint-dependencies = [
"psutil==7.2.2",
"psycopg2-binary==2.9.9",
"py-deviceid==0.1.1",
"py-iam-expand==0.1.0",
"py-iam-expand==0.3.0",
"py-ocsf-models==0.8.1",
"pyasn1==0.6.3",
"pyasn1-modules==0.4.2",
@@ -390,7 +410,7 @@ constraint-dependencies = [
"rpds-py==0.30.0",
"rsa==4.9.1",
"ruamel-yaml==0.19.1",
"ruff==0.5.0",
"ruff==0.15.11",
"s3transfer==0.14.0",
"scaleway==2.10.3",
"scaleway-core==2.10.3",
@@ -420,12 +440,14 @@ constraint-dependencies = [
"uritemplate==4.2.0",
"urllib3==2.7.0",
"uuid6==2024.7.10",
"uvicorn==0.49.0",
"uvloop==0.22.1",
"vine==5.1.0",
"vulture==2.14",
"wcwidth==0.5.3",
"websocket-client==1.9.0",
"werkzeug==3.1.7",
"workos==6.0.4",
"workos==6.0.8",
"wrapt==1.17.3",
"xlsxwriter==3.2.9",
"xmlsec==1.3.17",
@@ -436,8 +458,13 @@ constraint-dependencies = [
"zope-interface==8.2",
"zstd==1.5.7.3"
]
# prowler@master needs okta==3.4.2; cartography 0.135.0 declares okta<1.0.0 for an
# integration prowler does not import.
# prowler@master needs okta==3.4.2, but cartography 0.138.1 requires okta<1.0.0.
# Attack Paths does not ingest Okta today, so override the Cartography
# dependency to the Prowler pin.
#
# prowler@master needs azure-mgmt-containerservice==34.1.0, but cartography
# 0.138.1 requires azure-mgmt-containerservice>=41.0.0. Attack Paths does not
# ingest Azure today, so override the Cartography dependency to the Prowler pin.
#
# prowler@master hard-pins microsoft-kiota-abstractions==1.9.2 in [project.dependencies].
# The microsoft-kiota-http security bump to 1.9.9 (GHSA-7j59-v9qr-6fq9) requires
@@ -453,6 +480,7 @@ constraint-dependencies = [
# that request pyjwt[crypto] and leave cryptography (needed for RS256) only transitive.
override-dependencies = [
"okta==3.4.2",
"azure-mgmt-containerservice==34.1.0",
"microsoft-kiota-abstractions==1.9.9",
"dulwich==1.2.5",
"pyjwt[crypto]==2.13.0"
+44 -3
View File
@@ -1,9 +1,15 @@
from allauth.socialaccount.adapter import DefaultSocialAccountAdapter
from django.db import transaction
from api.db_router import MainRouter
from api.db_utils import rls_transaction
from api.models import Membership, Role, Tenant, User, UserRoleRelationship
from api.models import (
Membership,
Role,
SAMLConfiguration,
Tenant,
User,
UserRoleRelationship,
)
from django.db import transaction
class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
@@ -18,7 +24,42 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
# Link existing accounts with the same email address
email = sociallogin.account.extra_data.get("email")
if sociallogin.provider.id == "saml":
# For SAML, the asserted NameID email cannot be trusted on its own:
# any tenant can claim any email domain in its SAML configuration. To
# prevent cross-tenant account takeover (GHSA-h8m9-jgf8-vwvp), only link
# the incoming SAML session to an existing account when (1) the email
# domain matches the tenant whose ACS endpoint is being used and (2) the
# existing user is already a member of that tenant.
email = sociallogin.user.email
if not email:
return
domain = email.rsplit("@", 1)[-1].lower()
resolver_match = getattr(request, "resolver_match", None)
organization_slug = (
(resolver_match.kwargs or {}).get("organization_slug", "")
if resolver_match
else ""
).lower()
# The ACS endpoint is scoped per email domain; reject mismatches so an
# attacker cannot replay an assertion through another tenant's endpoint.
if organization_slug != domain:
return
try:
saml_config = SAMLConfiguration.objects.using(MainRouter.admin_db).get(
email_domain=domain
)
except SAMLConfiguration.DoesNotExist:
return
existing_user = self.get_user_by_email(email)
if existing_user and existing_user.is_member_of_tenant(
str(saml_config.tenant_id)
):
sociallogin.connect(request, existing_user)
return
if email:
existing_user = self.get_user_by_email(email)
if existing_user:
+6 -9
View File
@@ -1,14 +1,12 @@
import logging
import os
import sys
from pathlib import Path
from django.apps import AppConfig
from django.conf import settings
from config.custom_logging import BackendLogger
from config.env import env
from django.apps import AppConfig
from django.conf import settings
logger = logging.getLogger(BackendLogger.API)
@@ -30,8 +28,10 @@ class ApiConfig(AppConfig):
name = "api"
def ready(self):
from api import schema_extensions # noqa: F401
from api import signals # noqa: F401
from api import (
schema_extensions, # noqa: F401
signals, # noqa: F401
)
# Generate required cryptographic keys if not present, but only if:
# `"manage.py" not in sys.argv[0]`: If an external server (e.g., Gunicorn) is running the app
@@ -42,9 +42,6 @@ class ApiConfig(AppConfig):
):
self._ensure_crypto_keys()
# Neo4j driver is created lazily on first use (see api.attack_paths.database).
# App init never contacts Neo4j, so a Neo4j outage cannot block API startup.
def _ensure_crypto_keys(self):
"""
Orchestrator method that ensures all required cryptographic keys are present.
@@ -5,7 +5,6 @@ from api.attack_paths.queries import (
get_query_by_id,
)
__all__ = [
"AttackPathsQueryDefinition",
"AttackPathsQueryParameterDefinition",
@@ -4,10 +4,10 @@ Cypher sanitizer for custom (user-supplied) Attack Paths queries.
Two responsibilities:
1. **Validation** - reject queries containing SSRF or dangerous procedure
patterns (defense-in-depth; the primary control is ``neo4j.READ_ACCESS``).
patterns (defense-in-depth; the primary control is `neo4j.READ_ACCESS`).
2. **Provider-scoped label injection** - inject a dynamic
``_Provider_{uuid}`` label into every node pattern so the database can
`_Provider_{uuid}` label into every node pattern so the database can
use its native label index for provider isolation.
Label-injection pipeline:
@@ -22,18 +22,16 @@ Label-injection pipeline:
import re
from rest_framework.exceptions import ValidationError
from tasks.jobs.attack_paths.config import get_provider_label
# Step 1 - String / comment protection
# Single combined regex: strings first, then line comments.
# Single combined regex: strings first, then line comments
# The regex engine finds the leftmost match, so a string like 'https://prowler.com'
# is consumed as a string before the // inside it can match as a comment.
# is consumed as a string before the // inside it can match as a comment
_PROTECTED_RE = re.compile(r"'(?:[^'\\]|\\.)*'|\"(?:[^\"\\]|\\.)*\"|//[^\n]*")
# Step 2 - Clause splitting
# OPTIONAL MATCH must come before MATCH to avoid partial matching.
# `OPTIONAL MATCH` must come before `MATCH` to avoid partial matching
_CLAUSE_RE = re.compile(
r"\b(OPTIONAL\s+MATCH|MATCH|WHERE|RETURN|WITH|ORDER\s+BY"
r"|SKIP|LIMIT|UNION|UNWIND|CALL)\b",
@@ -41,10 +39,10 @@ _CLAUSE_RE = re.compile(
)
# Pass A - Labeled node patterns (all segments)
# Matches node patterns that have at least one :Label.
# (?<!\w)\( - open paren NOT preceded by a word char (excludes function calls).
# Group 1: optional variable + one or more :Label
# Group 2: optional {properties} + closing paren
# Matches node patterns that have at least one `:Label`
# `(?<!\w)\(` - open paren NOT preceded by a word char, excludes function calls
# Group 1: optional variable + one or more `:Label`
# Group 2: optional `{`properties`}` + closing paren
_LABELED_NODE_RE = re.compile(
r"(?<!\w)\("
r"("
@@ -57,9 +55,9 @@ _LABELED_NODE_RE = re.compile(
r")"
)
# Pass B - Bare node patterns (MATCH segments only)
# Matches (identifier) or (identifier {properties}) without any :Label.
# Only applied in MATCH/OPTIONAL MATCH segments.
# Pass B - Bare node patterns (`MATCH` segments only)
# Matches (identifier) or (identifier {properties}) without any `:Label`
# Only applied in `MATCH` / `OPTIONAL MATCH` segments
_BARE_NODE_RE = re.compile(
r"(?<!\w)\(" r"(\s*[a-zA-Z_]\w*)" r"(\s*(?:\{[^}]*\})?)" r"\s*\)"
)
@@ -98,6 +96,11 @@ def inject_provider_label(cypher: str, provider_id: str) -> str:
node pattern.
"""
label = get_provider_label(provider_id)
return inject_label(cypher, label)
def inject_label(cypher: str, label: str) -> str:
"""Rewrite a Cypher query to append a label to every node pattern."""
# Step 1: Protect strings and comments (single pass, leftmost-first)
protected: list[str] = []
@@ -136,9 +139,7 @@ def inject_provider_label(cypher: str, provider_id: str) -> str:
return work
# ---------------------------------------------------------------------------
# Validation
# ---------------------------------------------------------------------------
# Patterns that indicate SSRF or dangerous procedure calls
# Defense-in-depth layer - the primary control is `neo4j.READ_ACCESS`
+195 -253
View File
@@ -1,263 +1,32 @@
import atexit
import logging
import threading
"""Backwards-compatible facade over the ingest and sink modules.
from contextlib import contextmanager
from typing import Any, Iterator
Historically this module owned a single Neo4j driver used for both the
cartography temp database and the per-tenant sink database. The port to AWS
Neptune split those roles: the cartography ingest (temp) database is always
Neo4j and lives in `api.attack_paths.ingest`; the sink is configurable
(Neo4j or Neptune) and lives in `api.attack_paths.sink`. This shim preserves
the public API that `tasks/` and `api/v1/views.py` already depend on, and
dispatches to the right module by database-name prefix.
A database name starting with `db-tmp-scan-` is a cartography temp DB and
routes to ingest. Everything else routes to the configured sink.
"""
from contextlib import AbstractContextManager
from typing import Any
from uuid import UUID
import neo4j
import neo4j.exceptions
import neo4j # noqa: F401 - kept for tests that patch api.attack_paths.database.neo4j
from api.attack_paths import ingest
from api.attack_paths import sink as sink_module
from config.env import env
from django.conf import settings
from api.attack_paths.retryable_session import RetryableSession
from tasks.jobs.attack_paths.config import (
BATCH_SIZE,
PROVIDER_RESOURCE_LABEL,
get_provider_label,
from django.conf import (
settings, # noqa: F401 - kept for tests that patch ...database.settings
)
# Without this Celery goes crazy with Neo4j logging
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j").propagate = False
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
)
READ_QUERY_TIMEOUT_SECONDS = env.int(
"ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
)
MAX_CUSTOM_QUERY_NODES = env.int("ATTACK_PATHS_MAX_CUSTOM_QUERY_NODES", default=250)
# Shorter than CONN_ACQUISITION_TIMEOUT — the driver requires acquisition to be
# the longer of the two (it may include opening a new connection).
CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
READ_EXCEPTION_CODES = [
"Neo.ClientError.Statement.AccessMode",
"Neo.ClientError.Procedure.ProcedureNotFound",
]
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
# Module-level process-wide driver singleton
_driver: neo4j.Driver | None = None
_lock = threading.Lock()
# Base Neo4j functions
def get_uri() -> str:
host = settings.DATABASES["neo4j"]["HOST"]
port = settings.DATABASES["neo4j"]["PORT"]
return f"bolt://{host}:{port}"
def init_driver() -> neo4j.Driver:
global _driver
if _driver is not None:
return _driver
with _lock:
if _driver is None:
uri = get_uri()
config = settings.DATABASES["neo4j"]
driver = neo4j.GraphDatabase.driver(
uri,
auth=(config["USER"], config["PASSWORD"]),
keep_alive=True,
max_connection_lifetime=7200,
connection_timeout=CONNECTION_TIMEOUT,
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
max_connection_pool_size=50,
)
# Publish the singleton only after connectivity is verified so a
# failed probe does not leave an unverified driver behind. Close the
# driver on failure so a repeatedly-probed outage cannot leak pools.
try:
driver.verify_connectivity()
except Exception:
driver.close()
raise
_driver = driver
# Register cleanup handler (only runs once since we're inside the _driver is None block)
atexit.register(close_driver)
return _driver
def get_driver() -> neo4j.Driver:
return init_driver()
def close_driver() -> None: # TODO: Use it
global _driver
with _lock:
if _driver is not None:
try:
_driver.close()
finally:
_driver = None
@contextmanager
def get_session(
database: str | None = None, default_access_mode: str | None = None
) -> Iterator[RetryableSession]:
session_wrapper: RetryableSession | None = None
try:
session_wrapper = RetryableSession(
session_factory=lambda: get_driver().session(
database=database, default_access_mode=default_access_mode
),
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
)
yield session_wrapper
except neo4j.exceptions.Neo4jError as exc:
if (
default_access_mode == neo4j.READ_ACCESS
and exc.code
and exc.code in READ_EXCEPTION_CODES
):
message = "Read query not allowed"
code = READ_EXCEPTION_CODES[0]
raise WriteQueryNotAllowedException(message=message, code=code)
message = exc.message if exc.message is not None else str(exc)
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
raise ClientStatementException(message=message, code=exc.code)
raise GraphDatabaseQueryException(message=message, code=exc.code)
finally:
if session_wrapper is not None:
session_wrapper.close()
def execute_read_query(
database: str,
cypher: str,
parameters: dict[str, Any] | None = None,
) -> neo4j.graph.Graph:
with get_session(database, default_access_mode=neo4j.READ_ACCESS) as session:
def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
result = tx.run(
cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
)
return result.graph()
return session.execute_read(_run)
def create_database(database: str) -> None:
query = "CREATE DATABASE $database IF NOT EXISTS"
parameters = {"database": database}
with get_session() as session:
session.run(query, parameters)
def drop_database(database: str) -> None:
query = f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA"
with get_session() as session:
session.run(query)
def drop_subgraph(database: str, provider_id: str) -> int:
"""
Delete all nodes for a provider from the tenant database.
Deletes relationships then nodes in batches (not `DETACH DELETE`) so a dense
provider's graph cannot exceed Neo4j's transaction memory limit.
Silently returns 0 if the database doesn't exist.
"""
provider_label = get_provider_label(provider_id)
deleted_nodes = 0
try:
with get_session(database) as session:
# Phase 1: delete relationships incident to provider nodes in batches.
deleted_count = 1
while deleted_count > 0:
result = session.run(
f"""
MATCH (:`{provider_label}`)-[r]-()
WITH DISTINCT r LIMIT $batch_size
DELETE r
RETURN COUNT(r) AS deleted_rels_count
""",
{"batch_size": BATCH_SIZE},
)
deleted_count = result.single().get("deleted_rels_count", 0)
# Phase 2: delete the now relationship-free nodes in batches.
deleted_count = 1
while deleted_count > 0:
result = session.run(
f"""
MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`)
WITH n LIMIT $batch_size
DELETE n
RETURN COUNT(n) AS deleted_nodes_count
""",
{"batch_size": BATCH_SIZE},
)
deleted_count = result.single().get("deleted_nodes_count", 0)
deleted_nodes += deleted_count
except GraphDatabaseQueryException as exc:
if exc.code == "Neo.ClientError.Database.DatabaseNotFound":
return 0
raise
return deleted_nodes
def has_provider_data(database: str, provider_id: str) -> bool:
"""
Check if any ProviderResource node exists for this provider.
Returns `False` if the database doesn't exist.
"""
provider_label = get_provider_label(provider_id)
query = f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
try:
with get_session(database, default_access_mode=neo4j.READ_ACCESS) as session:
result = session.run(query)
return result.single() is not None
except GraphDatabaseQueryException as exc:
if exc.code == "Neo.ClientError.Database.DatabaseNotFound":
return False
raise
def clear_cache(database: str) -> None:
query = "CALL db.clearQueryCaches()"
try:
with get_session(database) as session:
session.run(query)
except GraphDatabaseQueryException as exc:
logging.warning(f"Failed to clear query cache for database `{database}`: {exc}")
# Neo4j functions related to Prowler + Cartography
def get_database_name(entity_id: str | UUID, temporary: bool = False) -> str:
prefix = "tmp-scan" if temporary else "tenant"
return f"db-{prefix}-{str(entity_id).lower()}"
TEMP_DB_PREFIX = "db-tmp-scan-"
# Exceptions
@@ -272,7 +41,6 @@ class GraphDatabaseQueryException(Exception):
def __str__(self) -> str:
if self.code:
return f"{self.code}: {self.message}"
return self.message
@@ -282,3 +50,177 @@ class WriteQueryNotAllowedException(GraphDatabaseQueryException):
class ClientStatementException(GraphDatabaseQueryException):
pass
# Routing
def _is_ingest_database(database: str | None) -> bool:
return bool(database) and database.startswith(TEMP_DB_PREFIX)
# Driver lifecycle
def init_driver() -> Any:
"""Initialize the configured sink backend.
The ingest driver (Neo4j for cartography temp DBs) stays lazy: it is
only initialized when a temp-DB operation actually runs, which never
happens on API pods.
"""
return sink_module.init()
def close_driver() -> None:
"""Close every driver held by this process."""
sink_module.close()
ingest.close_driver()
def get_driver() -> neo4j.Driver:
"""Return the sink backend's underlying driver.
Only meaningful for the Neo4j sink (where the backend has a single Neo4j
driver). On Neptune this returns the writer driver. Kept for tests and
legacy call-sites; prefer `get_session` for new code.
"""
backend = sink_module.get_backend()
# Neo4jSink exposes get_driver(); NeptuneSink exposes get_writer()
if hasattr(backend, "get_driver"):
return backend.get_driver()
if hasattr(backend, "get_writer"):
return backend.get_writer()
raise RuntimeError("Active sink backend does not expose a driver handle")
def verify_connectivity() -> None:
"""Raise if the configured graph database is unreachable on the API read path.
Backend-agnostic entry point for the readiness probe: Neo4j verifies its
driver, Neptune verifies the reader endpoint.
"""
sink_module.get_backend().verify_connectivity()
def verify_scan_databases_available() -> None:
"""Raise if either graph database needed by an Attack Paths scan is unavailable."""
errors: list[str] = []
first_error: Exception | None = None
try:
ingest.get_driver().verify_connectivity()
except Exception as exc:
errors.append(f"ingest Neo4j: {exc}")
first_error = exc
try:
get_driver().verify_connectivity()
except Exception as exc:
errors.append(f"sink {settings.ATTACK_PATHS_SINK_DATABASE}: {exc}")
if first_error is None:
first_error = exc
if errors:
raise RuntimeError(
"Attack Paths graph database unavailable before scan start: "
+ "; ".join(errors)
) from first_error
def get_uri() -> str:
"""Return the sink URI. Retained for backwards compatibility."""
if settings.ATTACK_PATHS_SINK_DATABASE == "neptune":
cfg = settings.DATABASES["neptune"]
return f"bolt+s://{cfg['WRITER_ENDPOINT']}:{cfg['PORT']}"
cfg = settings.DATABASES["neo4j"]
return f"bolt://{cfg['HOST']}:{cfg['PORT']}"
def get_ingest_uri() -> str:
"""Neo4j URI for the cartography temp (ingest) database, which is always
Neo4j regardless of the configured sink."""
return ingest.get_uri()
# Session API
def get_session(
database: str | None = None,
default_access_mode: str | None = None,
) -> AbstractContextManager:
"""Return a session against the right backend.
- `database` names starting with `db-tmp-scan-` always go to ingest.
- No database name → ingest (used for CREATE / DROP DATABASE admin ops).
- Any other name → sink.
"""
if _is_ingest_database(database) or database is None:
return ingest.get_session(
database=database, default_access_mode=default_access_mode
)
return sink_module.get_backend().get_session(
database=database, default_access_mode=default_access_mode
)
def execute_read_query(
database: str,
cypher: str,
parameters: dict[str, Any] | None = None,
) -> neo4j.graph.Graph:
"""Read-only query against the sink."""
return sink_module.get_backend().execute_read_query(database, cypher, parameters)
def create_database(database: str) -> None:
"""Create a database. Temp DBs always land on ingest (Neo4j).
On the Neo4j sink, tenant DBs also route to ingest because both drivers
connect to the same Neo4j cluster. On the Neptune sink, tenant DB creates
are no-ops.
"""
if _is_ingest_database(database):
ingest.create_database(database)
return
sink_module.get_backend().create_database(database)
def drop_database(database: str) -> None:
"""Drop a database. Mirrors `create_database` routing."""
if _is_ingest_database(database):
ingest.drop_database(database)
return
sink_module.get_backend().drop_database(database)
def drop_subgraph(database: str, provider_id: str) -> int:
return sink_module.get_backend().drop_subgraph(database, provider_id)
def has_provider_data(database: str, provider_id: str) -> bool:
return sink_module.get_backend().has_provider_data(database, provider_id)
def clear_cache(database: str) -> None:
if _is_ingest_database(database):
ingest.clear_cache(database)
return
sink_module.get_backend().clear_cache(database)
# Name helper
def get_database_name(entity_id: str | UUID, temporary: bool = False) -> str:
prefix = "tmp-scan" if temporary else "tenant"
return f"db-{prefix}-{str(entity_id).lower()}"
@@ -0,0 +1,29 @@
"""Cartography ingest layer.
Public surface for the per-scan Neo4j temp database driver. Implementation
lives in `api.attack_paths.ingest.driver`.
"""
from api.attack_paths.ingest.driver import (
clear_cache,
close_driver,
create_database,
drop_database,
get_driver,
get_session,
get_uri,
init_driver,
run_cypher,
)
__all__ = [
"clear_cache",
"close_driver",
"create_database",
"drop_database",
"get_driver",
"get_session",
"get_uri",
"init_driver",
"run_cypher",
]
@@ -0,0 +1,187 @@
"""Cartography ingest driver: per-scan throw-away Neo4j database.
Cartography writes each scan's graph into a throw-away Neo4j database named
`db-tmp-scan-{scan_uuid}`. This is always Neo4j, regardless of the configured
sink: Neptune is single-database and cannot host per-scan throw-away
databases. This module owns the Neo4j driver used for those temp DBs and the
admin ops they need (CREATE / DROP DATABASE).
"""
import atexit
import logging
import threading
from collections.abc import Iterator
from contextlib import contextmanager
from typing import Any
import neo4j
import neo4j.exceptions
from api.attack_paths.retryable_session import RetryableSession
from config.env import env
from django.conf import settings
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j").propagate = False
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
)
CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
# TCP connect timeout, ordered below the acquisition timeout so an unreachable
# host can't pin a worker on a temp-DB op longer than this.
CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
MAX_CONNECTION_LIFETIME = env.int("NEO4J_MAX_CONNECTION_LIFETIME", default=7200)
MAX_CONNECTION_POOL_SIZE = env.int("NEO4J_MAX_CONNECTION_POOL_SIZE", default=50)
_driver: neo4j.Driver | None = None
_lock = threading.Lock()
def _neo4j_config() -> dict:
return settings.DATABASES["neo4j"]
def get_uri() -> str:
"""Bolt URI for the Neo4j temp (ingest) database. Always Neo4j."""
config = _neo4j_config()
host = config["HOST"]
port = config["PORT"]
if not host or not port:
raise RuntimeError(
"NEO4J_HOST / NEO4J_PORT must be set to use the attack-paths "
"temp database. Workers require Neo4j env even when the sink is Neptune."
)
return f"bolt://{host}:{port}"
def init_driver() -> neo4j.Driver:
"""Initialize the temp-database Neo4j driver. Idempotent."""
global _driver
if _driver is not None:
return _driver
with _lock:
if _driver is None:
config = _neo4j_config()
_driver = neo4j.GraphDatabase.driver(
get_uri(),
auth=(config["USER"], config["PASSWORD"]),
keep_alive=True,
max_connection_lifetime=MAX_CONNECTION_LIFETIME,
connection_timeout=CONNECTION_TIMEOUT,
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
)
# Best-effort connectivity check: a Neo4j that is down at boot must
# not crash the worker. The driver reconnects lazily on first use.
try:
_driver.verify_connectivity()
except Exception:
logging.warning(
"Neo4j temp-database unreachable at init; continuing with a "
"lazily-reconnecting driver",
exc_info=True,
)
atexit.register(close_driver)
return _driver
def get_driver() -> neo4j.Driver:
return init_driver()
def close_driver() -> None:
global _driver
with _lock:
if _driver is not None:
try:
_driver.close()
finally:
_driver = None
@contextmanager
def get_session(
database: str | None = None,
default_access_mode: str | None = None,
) -> Iterator[RetryableSession]:
"""Session against the Neo4j temp-database cluster. Used for temp DB sessions
and for admin operations (CREATE / DROP DATABASE) when `database` is None."""
from api.attack_paths.database import (
ClientStatementException,
GraphDatabaseQueryException,
WriteQueryNotAllowedException,
)
READ_EXCEPTION_CODES = [
"Neo.ClientError.Statement.AccessMode",
"Neo.ClientError.Procedure.ProcedureNotFound",
]
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
session_wrapper: RetryableSession | None = None
try:
session_wrapper = RetryableSession(
session_factory=lambda: get_driver().session(
database=database, default_access_mode=default_access_mode
),
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
)
yield session_wrapper
except neo4j.exceptions.Neo4jError as exc:
if (
default_access_mode == neo4j.READ_ACCESS
and exc.code
and exc.code in READ_EXCEPTION_CODES
):
raise WriteQueryNotAllowedException(
message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
)
message = exc.message if exc.message is not None else str(exc)
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
raise ClientStatementException(message=message, code=exc.code)
raise GraphDatabaseQueryException(message=message, code=exc.code)
finally:
if session_wrapper is not None:
session_wrapper.close()
def create_database(database: str) -> None:
"""Create a database on the Neo4j cluster. Used for temp scan DBs."""
with get_session() as session:
session.run("CREATE DATABASE $database IF NOT EXISTS", {"database": database})
def drop_database(database: str) -> None:
"""Drop a database on the Neo4j cluster. Used for temp scan DBs."""
with get_session() as session:
session.run(f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA")
def clear_cache(database: str) -> None:
"""Best-effort cache clear for a Neo4j database."""
from api.attack_paths.database import GraphDatabaseQueryException
try:
with get_session(database) as session:
session.run("CALL db.clearQueryCaches()")
except GraphDatabaseQueryException as exc:
logging.warning(f"Failed to clear query cache for database `{database}`: {exc}")
def run_cypher(
database: str | None,
cypher: str,
parameters: dict[str, Any] | None = None,
) -> Any:
"""Execute Cypher directly without the context manager. Thin helper."""
with get_session(database) as session:
return session.run(cypher, parameters or {})
@@ -1,12 +1,11 @@
from api.attack_paths.queries.types import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
)
from api.attack_paths.queries.registry import (
get_queries_for_provider,
get_query_by_id,
)
from api.attack_paths.queries.types import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
)
__all__ = [
"AttackPathsQueryDefinition",
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,13 +1,14 @@
from api.attack_paths.queries.types import AttackPathsQueryDefinition
from api.attack_paths.queries.aws import AWS_QUERIES
# TODO: drop after Neptune cutover
from api.attack_paths.queries.aws_deprecated import AWS_DEPRECATED_QUERIES
from api.attack_paths.queries.types import AttackPathsQueryDefinition
# Query definitions organized by provider
# Query definitions for scans synced with the current schema.
_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
"aws": AWS_QUERIES,
}
# Flat lookup by query ID for O(1) access
_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
definition.id: definition
for definitions in _QUERY_DEFINITIONS.values()
@@ -15,11 +16,45 @@ _QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
}
def get_queries_for_provider(provider: str) -> list[AttackPathsQueryDefinition]:
"""Get all attack path queries for a specific provider."""
return _QUERY_DEFINITIONS.get(provider, [])
# TODO: drop after Neptune cutover
#
# Query definitions for pre-cutover scans (`AttackPathsScan.is_migrated=False`)
# whose graph data was written under the previous schema. Both maps expose the
# same query IDs so the API contract is identical regardless of which set is
# routed to.
_DEPRECATED_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
"aws": AWS_DEPRECATED_QUERIES,
}
_DEPRECATED_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
definition.id: definition
for definitions in _DEPRECATED_QUERY_DEFINITIONS.values()
for definition in definitions
}
def get_query_by_id(query_id: str) -> AttackPathsQueryDefinition | None:
"""Get a specific attack path query by its ID."""
return _QUERIES_BY_ID.get(query_id)
def get_queries_for_provider(
provider: str,
is_migrated: bool = True,
) -> list[AttackPathsQueryDefinition]:
"""Get all attack path queries for a provider.
`is_migrated` selects the catalog: True for scans synced with the current
schema, False for pre-cutover scans still using the legacy graph shape.
# TODO: drop the `is_migrated` parameter after Neptune cutover
"""
catalog = _QUERY_DEFINITIONS if is_migrated else _DEPRECATED_QUERY_DEFINITIONS
return catalog.get(provider, [])
def get_query_by_id(
query_id: str,
is_migrated: bool = True,
) -> AttackPathsQueryDefinition | None:
"""Get a specific attack path query by ID.
`is_migrated` selects the catalog (see `get_queries_for_provider`).
# TODO: drop the `is_migrated` parameter after Neptune cutover
"""
by_id = _QUERIES_BY_ID if is_migrated else _DEPRECATED_QUERIES_BY_ID
return by_id.get(query_id)
@@ -1,5 +1,4 @@
import logging
from collections.abc import Callable
from typing import Any
@@ -0,0 +1,28 @@
"""Attack-paths sink database layer.
The sink is the persistent store where attack-paths graphs live after a scan
finishes. Currently selectable between Neo4j (OSS / local dev default) and
AWS Neptune (hosted dev/staging/prod). Backend is picked by the
`ATTACK_PATHS_SINK_DATABASE` setting at process init.
This package exposes the public factory API; the implementation lives in
`api.attack_paths.sink.factory`.
"""
from api.attack_paths.sink.factory import (
SinkBackend,
close,
get_backend,
get_backend_for_name,
get_backend_for_scan,
init,
)
__all__ = [
"SinkBackend",
"close",
"get_backend",
"get_backend_for_name",
"get_backend_for_scan",
"init",
]
@@ -0,0 +1,92 @@
"""Protocol every sink backend must implement."""
from contextlib import AbstractContextManager
from typing import Any, Protocol
import neo4j
class SinkDatabase(Protocol):
"""Contract for the persistent attack-paths graph store.
The `database` argument is an opaque identifier passed through from the
legacy `database.py` API surface. On Neo4j it is the per-tenant database
name (e.g. `db-tenant-{uuid}`). On Neptune it is ignored (the cluster
has a single graph, and isolation is label-based).
"""
def init(self) -> None: ...
def close(self) -> None: ...
def verify_connectivity(self) -> None:
"""Raise if the backend the API read path uses is unreachable.
Neo4j verifies its single driver. Neptune verifies the reader
driver (the endpoint the API serves reads from); on single-endpoint
clusters the reader aliases the writer, so that path is covered too.
Used by the readiness probe; must not block longer than the caller's
probe budget.
"""
...
def get_session(
self,
database: str | None = None,
default_access_mode: str | None = None,
) -> AbstractContextManager: ...
def execute_read_query(
self,
database: str,
cypher: str,
parameters: dict[str, Any] | None = None,
) -> neo4j.graph.Graph: ...
def create_database(self, database: str) -> None: ...
def drop_database(self, database: str) -> None: ...
def drop_subgraph(self, database: str, provider_id: str) -> int: ...
def has_provider_data(self, database: str, provider_id: str) -> bool: ...
def clear_cache(self, database: str) -> None: ...
def ensure_sync_indexes(self, database: str) -> None:
"""Create any index needed for the sync write path.
Called once at the start of each provider sync; must be idempotent.
Neo4j creates a `_provider_element_id` index on `_ProviderResource`;
Neptune is a no-op (its `~id` lookup needs no index).
"""
...
def write_nodes(
self,
database: str,
labels: str,
rows: list[dict[str, Any]],
) -> None:
"""Upsert a batch of nodes into the sink.
`labels` is a pre-rendered Cypher label string ready to drop after
the node variable (e.g. `` `AWSUser`:`_ProviderResource`:`_Tenant_x` ``).
Each row carries `provider_element_id` and `props`.
"""
...
def write_relationships(
self,
database: str,
rel_type: str,
provider_id: str,
rows: list[dict[str, Any]],
) -> None:
"""Upsert a batch of relationships into the sink.
Each row carries `start_element_id`, `end_element_id`,
`provider_element_id` and `props`. `rel_type` is the relationship
type (already a valid Cypher identifier).
"""
...
@@ -0,0 +1,134 @@
"""Sink backend factory and process-wide handle cache.
Picks the active backend from `settings.ATTACK_PATHS_SINK_DATABASE` at first
use, holds the active backend plus any secondary backends needed to serve
scans written under the previous configuration, and tears them all down on
process shutdown. Imported via `from api.attack_paths import sink as
sink_module`.
"""
import threading
from enum import StrEnum, auto
from api.attack_paths.sink.base import SinkDatabase
from api.models import AttackPathsScan
from django.conf import settings
# Backend names
class SinkBackend(StrEnum):
NEO4J = auto()
NEPTUNE = auto()
# Backend cache
_backend: SinkDatabase | None = None
_secondary_backends: dict[SinkBackend, SinkDatabase] = {}
_lock = threading.Lock()
def _resolve_setting() -> SinkBackend:
raw = settings.ATTACK_PATHS_SINK_DATABASE.lower()
try:
return SinkBackend(raw)
except ValueError:
valid = sorted(b.value for b in SinkBackend)
raise RuntimeError(
f"ATTACK_PATHS_SINK_DATABASE must be one of {valid}; got {raw!r}"
)
def _build_backend(name: SinkBackend) -> SinkDatabase:
if name is SinkBackend.NEO4J:
from api.attack_paths.sink.neo4j import Neo4jSink
return Neo4jSink()
if name is SinkBackend.NEPTUNE:
from api.attack_paths.sink.neptune import NeptuneSink
return NeptuneSink()
raise RuntimeError(f"Unknown sink backend {name!r}")
# Lifecycle
def init(name: SinkBackend | str | None = None) -> SinkDatabase:
"""Initialize the configured sink backend. Idempotent."""
global _backend
if _backend is not None:
return _backend
with _lock:
if _backend is None:
resolved = SinkBackend(name) if name else _resolve_setting()
backend = _build_backend(resolved)
backend.init()
_backend = backend
return _backend
def close() -> None:
"""Close the active backend and every cached secondary backend."""
global _backend
with _lock:
backends = [
b for b in (_backend, *_secondary_backends.values()) if b is not None
]
_backend = None
_secondary_backends.clear()
for backend in backends:
try:
backend.close()
except Exception: # pragma: no cover - best-effort
pass
def get_backend() -> SinkDatabase:
"""Return the active sink. Initializes on first call."""
return init()
# Per-scan routing
def get_backend_for_scan(scan: AttackPathsScan) -> SinkDatabase:
"""Route reads by the sink that stores this scan's graph."""
raw_backend = getattr(scan, "sink_backend", SinkBackend.NEO4J.value)
if not isinstance(raw_backend, str):
raw_backend = SinkBackend.NEO4J.value
return get_backend_for_name(raw_backend)
def get_backend_for_name(name: SinkBackend | str) -> SinkDatabase:
"""Return the backend named by persisted scan metadata."""
resolved = SinkBackend(name)
if resolved is _resolve_setting():
return get_backend()
return _build_backend_cached(resolved)
def _build_backend_cached(name: SinkBackend) -> SinkDatabase:
# TODO: drop after Neptune cutover
# Needed only during cutover to serve Neo4j-written scans from a Neptune-
# configured API pod (and vice versa). Once every scan is on Neptune,
# `get_backend_for_scan` becomes a one-liner returning `get_backend()`.
if name in _secondary_backends:
return _secondary_backends[name]
with _lock:
if name not in _secondary_backends:
backend = _build_backend(name)
backend.init()
_secondary_backends[name] = backend
return _secondary_backends[name]
@@ -0,0 +1,454 @@
"""Neo4j sink implementation.
Owns a Neo4j driver independent from the staging driver. On OSS and local dev
this is the only sink; on hosted deployments it runs only as a legacy read
path while phase-1 drains tenant DBs.
"""
import atexit
import logging
import threading
import time
from collections.abc import Iterator
from contextlib import AbstractContextManager, contextmanager
from typing import Any
import neo4j
import neo4j.exceptions
from api.attack_paths.retryable_session import RetryableSession
from api.attack_paths.sink.base import SinkDatabase
from config.env import env
from django.conf import settings
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j").propagate = False
logger = logging.getLogger(__name__)
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
)
READ_QUERY_TIMEOUT_SECONDS = env.int(
"ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
)
CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
# TCP connect timeout, ordered below the acquisition timeout so an unreachable
# host can't pin a request or the readiness probe longer than this.
CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
MAX_CONNECTION_LIFETIME = env.int("NEO4J_MAX_CONNECTION_LIFETIME", default=7200)
MAX_CONNECTION_POOL_SIZE = env.int("NEO4J_MAX_CONNECTION_POOL_SIZE", default=50)
READ_EXCEPTION_CODES = [
"Neo.ClientError.Statement.AccessMode",
"Neo.ClientError.Procedure.ProcedureNotFound",
]
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
DATABASE_NOT_FOUND_CODE = "Neo.ClientError.Database.DatabaseNotFound"
class Neo4jSink(SinkDatabase):
"""Neo4j-backed sink. Multi-database cluster; tenant isolation is physical."""
def __init__(self) -> None:
self._driver: neo4j.Driver | None = None
self._lock = threading.Lock()
self._atexit_registered = False
# Driver
def _config(self) -> dict:
return settings.DATABASES["neo4j"]
def _uri(self) -> str:
cfg = self._config()
host = cfg["HOST"]
port = cfg["PORT"]
if not host or not port:
raise RuntimeError(
"NEO4J_HOST / NEO4J_PORT must be set when ATTACK_PATHS_SINK_DATABASE=neo4j"
)
return f"bolt://{host}:{port}"
def init(self) -> neo4j.Driver:
if self._driver is not None:
return self._driver
with self._lock:
if self._driver is None:
cfg = self._config()
self._driver = neo4j.GraphDatabase.driver(
self._uri(),
auth=(cfg["USER"], cfg["PASSWORD"]),
keep_alive=True,
max_connection_lifetime=MAX_CONNECTION_LIFETIME,
connection_timeout=CONNECTION_TIMEOUT,
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
)
# Eager connectivity check is best-effort:
# A Neo4j that is down at boot must not crash the process, same degradation model as Postgres
# The driver reconnects lazily on first use
# /health/ready surfaces the outage until it recovers
try:
self._driver.verify_connectivity()
except Exception:
logger.warning(
"Neo4j sink unreachable at init; continuing with a lazily-reconnecting driver",
exc_info=True,
)
if not self._atexit_registered:
atexit.register(self.close)
self._atexit_registered = True
return self._driver
def _get_driver(self) -> neo4j.Driver:
return self.init()
def verify_connectivity(self) -> None:
self._get_driver().verify_connectivity()
def close(self) -> None:
with self._lock:
if self._driver is not None:
try:
self._driver.close()
finally:
self._driver = None
# Sessions
@contextmanager
def get_session(
self,
database: str | None = None,
default_access_mode: str | None = None,
) -> Iterator[RetryableSession]:
from api.attack_paths.database import (
ClientStatementException,
GraphDatabaseQueryException,
WriteQueryNotAllowedException,
)
session_wrapper: RetryableSession | None = None
try:
session_wrapper = RetryableSession(
session_factory=lambda: self._get_driver().session(
database=database, default_access_mode=default_access_mode
),
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
)
yield session_wrapper
except neo4j.exceptions.Neo4jError as exc:
if (
default_access_mode == neo4j.READ_ACCESS
and exc.code
and exc.code in READ_EXCEPTION_CODES
):
raise WriteQueryNotAllowedException(
message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
)
message = exc.message if exc.message is not None else str(exc)
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
raise ClientStatementException(message=message, code=exc.code)
raise GraphDatabaseQueryException(message=message, code=exc.code)
finally:
if session_wrapper is not None:
session_wrapper.close()
# Operations
def execute_read_query(
self,
database: str,
cypher: str,
parameters: dict[str, Any] | None = None,
) -> neo4j.graph.Graph:
with self.get_session(
database, default_access_mode=neo4j.READ_ACCESS
) as session:
def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
result = tx.run(
cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
)
return result.graph()
return session.execute_read(_run)
def create_database(self, database: str) -> None:
with self.get_session() as session:
session.run(
"CREATE DATABASE $database IF NOT EXISTS", {"database": database}
)
def drop_database(self, database: str) -> None:
with self.get_session() as session:
session.run(f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA")
def drop_subgraph(self, database: str, provider_id: str) -> int:
"""Delete all nodes for a provider from a tenant database, batched.
Deletes relationships then nodes in batches (not `DETACH DELETE`) so a
dense provider's graph cannot exceed Neo4j's transaction memory limit.
Silently returns 0 if the database doesn't exist.
"""
from api.attack_paths.database import GraphDatabaseQueryException
from tasks.jobs.attack_paths.config import (
BATCH_SIZE,
PROVIDER_RESOURCE_LABEL,
get_provider_label,
)
provider_label = get_provider_label(provider_id)
deleted_nodes = 0
deleted_relationships = 0
relationship_batches = 0
node_batches = 0
drop_t0 = time.perf_counter()
logger.info(
"Dropping provider graph from Neo4j sink database %s "
"(provider=%s, provider_label=%s)",
database,
provider_id,
provider_label,
)
try:
logger.info(
"Opening Neo4j sink session for provider graph drop "
"(database=%s, provider=%s)",
database,
provider_id,
)
with self.get_session(database) as session:
logger.info(
"Opened Neo4j sink session for provider graph drop "
"(database=%s, provider=%s)",
database,
provider_id,
)
# Phase 1: delete relationships incident to provider nodes in
# batches. The undirected pattern matches an edge between two
# provider nodes from both ends, so `DISTINCT r` dedupes it to
# delete a full batch of unique relationships each round.
deleted_count = 1
while deleted_count > 0:
next_batch = relationship_batches + 1
logger.info(
"Deleting relationship batch from Neo4j sink database %s "
"(provider=%s, batch=%s, total_rels=%s, elapsed=%.3fs)",
database,
provider_id,
next_batch,
deleted_relationships,
time.perf_counter() - drop_t0,
)
result = session.run(
f"""
MATCH (:`{provider_label}`)-[r]-()
WITH DISTINCT r LIMIT $batch_size
DELETE r
RETURN COUNT(r) AS deleted_rels_count
""",
{"batch_size": BATCH_SIZE},
)
deleted_count = result.single().get("deleted_rels_count", 0)
if deleted_count > 0:
relationship_batches += 1
deleted_relationships += deleted_count
logger.info(
"Deleted relationship batch from Neo4j sink database %s "
"(provider=%s, batch=%s, deleted_rels=%s, "
"total_rels=%s, elapsed=%.3fs)",
database,
provider_id,
relationship_batches,
deleted_count,
deleted_relationships,
time.perf_counter() - drop_t0,
)
# Phase 2: delete the now relationship-free nodes in batches.
deleted_count = 1
while deleted_count > 0:
next_batch = node_batches + 1
logger.info(
"Deleting node batch from Neo4j sink database %s "
"(provider=%s, batch=%s, total_nodes=%s, elapsed=%.3fs)",
database,
provider_id,
next_batch,
deleted_nodes,
time.perf_counter() - drop_t0,
)
result = session.run(
f"""
MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`)
WITH n LIMIT $batch_size
DELETE n
RETURN COUNT(n) AS deleted_nodes_count
""",
{"batch_size": BATCH_SIZE},
)
deleted_count = result.single().get("deleted_nodes_count", 0)
if deleted_count > 0:
node_batches += 1
deleted_nodes += deleted_count
logger.info(
"Deleted node batch from Neo4j sink database %s "
"(provider=%s, batch=%s, deleted_nodes=%s, "
"total_nodes=%s, elapsed=%.3fs)",
database,
provider_id,
node_batches,
deleted_count,
deleted_nodes,
time.perf_counter() - drop_t0,
)
except GraphDatabaseQueryException as exc:
if exc.code == DATABASE_NOT_FOUND_CODE:
logger.info(
"Skipped provider graph drop from Neo4j sink database %s "
"(provider=%s, reason=database_not_found, elapsed=%.3fs)",
database,
provider_id,
time.perf_counter() - drop_t0,
)
return 0
raise
logger.info(
"Finished dropping provider graph from Neo4j sink database %s "
"(provider=%s, relationship_batches=%s, deleted_rels=%s, "
"node_batches=%s, deleted_nodes=%s, elapsed=%.3fs)",
database,
provider_id,
relationship_batches,
deleted_relationships,
node_batches,
deleted_nodes,
time.perf_counter() - drop_t0,
)
return deleted_nodes
def has_provider_data(self, database: str, provider_id: str) -> bool:
from api.attack_paths.database import GraphDatabaseQueryException
from tasks.jobs.attack_paths.config import (
PROVIDER_RESOURCE_LABEL,
get_provider_label,
)
provider_label = get_provider_label(provider_id)
query = (
f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
)
try:
with self.get_session(
database, default_access_mode=neo4j.READ_ACCESS
) as session:
result = session.run(query)
return result.single() is not None
except GraphDatabaseQueryException as exc:
if exc.code == DATABASE_NOT_FOUND_CODE:
return False
raise
def clear_cache(self, database: str) -> None:
from api.attack_paths.database import GraphDatabaseQueryException
try:
with self.get_session(database) as session:
session.run("CALL db.clearQueryCaches()")
except GraphDatabaseQueryException as exc:
logger.warning(
f"Failed to clear query cache for database `{database}`: {exc}"
)
# Sync write path
def ensure_sync_indexes(self, database: str) -> None:
"""Create the `_provider_element_id` lookup index on `_ProviderResource`.
Every synced node carries the `_ProviderResource` label, so a single
index covers both node-upserts and relationship endpoint MATCHes.
Without this index the rel sync degrades to a label scan per row and
large provider syncs become unworkable.
"""
from tasks.jobs.attack_paths.config import (
PROVIDER_ELEMENT_ID_PROPERTY,
PROVIDER_RESOURCE_LABEL,
)
query = (
f"CREATE INDEX provider_element_id_idx IF NOT EXISTS "
f"FOR (n:`{PROVIDER_RESOURCE_LABEL}`) "
f"ON (n.`{PROVIDER_ELEMENT_ID_PROPERTY}`)"
)
with self.get_session(database) as session:
session.run(query).consume()
def write_nodes(
self,
database: str,
labels: str,
rows: list[dict[str, Any]],
) -> None:
if not rows:
return
from tasks.jobs.attack_paths.config import (
PROVIDER_ELEMENT_ID_PROPERTY,
PROVIDER_RESOURCE_LABEL,
)
query = f"""
UNWIND $rows AS row
MERGE (n:`{PROVIDER_RESOURCE_LABEL}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}})
SET n:{labels}
SET n += row.props
"""
with self.get_session(database) as session:
session.run(query, {"rows": rows}).consume()
def write_relationships(
self,
database: str,
rel_type: str,
provider_id: str,
rows: list[dict[str, Any]],
) -> None:
if not rows:
return
from tasks.jobs.attack_paths.config import (
PROVIDER_ELEMENT_ID_PROPERTY,
PROVIDER_RESOURCE_LABEL,
get_provider_label,
)
provider_label = get_provider_label(provider_id)
query = f"""
UNWIND $rows AS row
MATCH (s:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.start_element_id}})
MATCH (t:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.end_element_id}})
MERGE (s)-[r:`{rel_type}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}}]->(t)
SET r += row.props
"""
with self.get_session(database) as session:
session.run(query, {"rows": rows}).consume()
# For compatibility with test harnesses that patch the concrete driver
def get_driver(self) -> neo4j.Driver:
return self._get_driver()
# Helper for tests / external callers that want a writer session specifically
def get_read_session(
sink: Neo4jSink, database: str
) -> AbstractContextManager[RetryableSession]:
return sink.get_session(database, default_access_mode=neo4j.READ_ACCESS)
@@ -0,0 +1,524 @@
"""AWS Neptune sink implementation.
Dual Bolt drivers: one against the writer endpoint for workers, one against
the reader endpoint for the API read path. If `NEPTUNE_READER_ENDPOINT` is
unset the reader falls back to the writer driver so single-node clusters work.
Neptune is single-database. The `database` argument on the SinkDatabase
protocol is ignored; tenant / provider isolation is enforced by labels that
the sync step already writes on every node (see tasks/jobs/attack_paths/sync.py).
SigV4 auth lives at the bottom of this file as `neptune_auth_provider`. The
neo4j driver invokes the returned callable on each token refresh.
"""
import atexit
import datetime
import json
import logging
import threading
import time
from collections.abc import Callable, Iterator
from contextlib import contextmanager
from typing import Any
from urllib.parse import urlsplit
import neo4j
import neo4j.exceptions
from api.attack_paths.retryable_session import RetryableSession
from api.attack_paths.sink.base import SinkDatabase
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest
from botocore.session import Session as BotoSession
from config.env import env
from django.conf import settings
from neo4j.auth_management import AuthManagers, ExpiringAuth
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j").propagate = False
logger = logging.getLogger(__name__)
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
)
READ_QUERY_TIMEOUT_SECONDS = env.int(
"ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
)
# Neptune serverless cold-start can be >30s; give the driver room
CONN_ACQUISITION_TIMEOUT = env.int("NEPTUNE_CONN_ACQUISITION_TIMEOUT", default=60)
# TCP connect timeout, ordered below the acquisition timeout so an unreachable
# endpoint can't pin a request or the readiness probe longer than this. Kept
# generous: cold-start delays query execution, not the socket connect.
CONNECTION_TIMEOUT = env.int("NEPTUNE_CONNECTION_TIMEOUT", default=10)
# Roll connections hourly so SigV4 rotations and cert refreshes don't strand long-lived pool entries
MAX_CONNECTION_LIFETIME = env.int("NEPTUNE_MAX_CONNECTION_LIFETIME", default=3600)
MAX_CONNECTION_POOL_SIZE = env.int("NEPTUNE_MAX_CONNECTION_POOL_SIZE", default=50)
READ_EXCEPTION_CODES = [
"Neo.ClientError.Statement.AccessMode",
"Neo.ClientError.Procedure.ProcedureNotFound",
]
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
# Refresh 60s before the 5-minute SigV4 window closes
SIGV4_TOKEN_LIFETIME_MINUTES = 4
class NeptuneSink(SinkDatabase):
"""Neptune-backed sink. Single database; isolation is label-based."""
def __init__(self) -> None:
self._writer: neo4j.Driver | None = None
self._reader: neo4j.Driver | None = None
self._lock = threading.Lock()
self._atexit_registered = False
# Config
def _config(self) -> dict:
return settings.DATABASES["neptune"]
def _bolt_uri(self, endpoint: str, port: str) -> str:
return f"bolt+s://{endpoint}:{port}"
def _https_url(self, endpoint: str, port: str) -> str:
return f"https://{endpoint}:{port}"
def _build_driver(self, endpoint: str) -> neo4j.Driver:
cfg = self._config()
port = cfg["PORT"]
region = cfg["REGION"]
if not endpoint or not region:
raise RuntimeError(
"NEPTUNE_WRITER_ENDPOINT and AWS_REGION must be set when "
"ATTACK_PATHS_SINK_DATABASE=neptune"
)
return neo4j.GraphDatabase.driver(
self._bolt_uri(endpoint, port),
auth=AuthManagers.bearer(
neptune_auth_provider(region, self._https_url(endpoint, port))
),
keep_alive=True,
max_connection_lifetime=MAX_CONNECTION_LIFETIME,
connection_timeout=CONNECTION_TIMEOUT,
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
max_transaction_retry_time=0,
)
# Lifecycle
def init(self) -> None:
if self._writer is not None:
return
with self._lock:
if self._writer is None:
cfg = self._config()
writer_endpoint = cfg["WRITER_ENDPOINT"]
reader_endpoint = cfg["READER_ENDPOINT"] or writer_endpoint
# Eager connectivity checks are best-effort
# A Neptune that is down at boot must not crash the process, same degradation model as Postgres
# Drivers reconnect lazily on first use
# /health/ready surfaces the outage until it recovers
self._writer = self._build_driver(writer_endpoint)
self._verify_best_effort(self._writer, "writer")
if reader_endpoint == writer_endpoint:
self._reader = self._writer
else:
self._reader = self._build_driver(reader_endpoint)
self._verify_best_effort(self._reader, "reader")
if not self._atexit_registered:
atexit.register(self.close)
self._atexit_registered = True
def close(self) -> None:
with self._lock:
# `Driver.close()` is idempotent, so closing the same driver twice
# (when reader aliases writer on single-endpoint configs) is safe
for driver in (self._reader, self._writer):
if driver is None:
continue
try:
driver.close()
except Exception: # pragma: no cover - best-effort
pass
self._writer = None
self._reader = None
# Sessions
def _get_writer(self) -> neo4j.Driver:
self.init()
assert self._writer is not None
return self._writer
def _get_reader(self) -> neo4j.Driver:
self.init()
assert self._reader is not None
return self._reader
@staticmethod
def _verify_best_effort(driver: neo4j.Driver, role: str) -> None:
try:
driver.verify_connectivity()
except Exception:
logger.warning(
"Neptune %s endpoint unreachable at init; continuing with a lazily-reconnecting driver",
role,
exc_info=True,
)
def verify_connectivity(self) -> None:
# The API read path uses the reader driver
# On single-endpoint clusters it aliases the writer, so this also covers the writer
# A writer-only outage is a workers' concern (no HTTP probe there) and deliberately does not fail API readiness
self._get_reader().verify_connectivity()
@contextmanager
def get_session(
self,
database: str | None = None, # noqa: ARG002 - ignored on Neptune
default_access_mode: str | None = None,
) -> Iterator[RetryableSession]:
from api.attack_paths.database import (
ClientStatementException,
GraphDatabaseQueryException,
WriteQueryNotAllowedException,
)
driver = (
self._get_reader()
if default_access_mode == neo4j.READ_ACCESS
else self._get_writer()
)
session_wrapper: RetryableSession | None = None
try:
session_wrapper = RetryableSession(
session_factory=lambda: driver.session(
default_access_mode=default_access_mode
),
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
)
yield session_wrapper
except neo4j.exceptions.Neo4jError as exc:
if (
default_access_mode == neo4j.READ_ACCESS
and exc.code
and exc.code in READ_EXCEPTION_CODES
):
raise WriteQueryNotAllowedException(
message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
)
message = exc.message if exc.message is not None else str(exc)
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
raise ClientStatementException(message=message, code=exc.code)
raise GraphDatabaseQueryException(message=message, code=exc.code)
finally:
if session_wrapper is not None:
session_wrapper.close()
# Operations
def execute_read_query(
self,
database: str, # noqa: ARG002 - ignored on Neptune
cypher: str,
parameters: dict[str, Any] | None = None,
) -> neo4j.graph.Graph:
with self.get_session(default_access_mode=neo4j.READ_ACCESS) as session:
def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
result = tx.run(
cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
)
return result.graph()
return session.execute_read(_run)
def create_database(self, database: str) -> None: # noqa: ARG002
# Neptune clusters are single-database; there is nothing to create.
return None
def drop_database(self, database: str) -> None: # noqa: ARG002
# Neptune clusters are single-database; there is nothing to drop.
return None
def drop_subgraph(self, database: str, provider_id: str) -> int: # noqa: ARG002
"""Delete a provider's subgraph in two bounded phases.
Neptune write transactions are capped at ~2 minutes. A naive
`DETACH DELETE` on a label-scanned batch grows unbounded with graph
density (one node can drag thousands of relationships into the same
transaction). Instead:
1. Delete relationships incident to provider nodes, one fixed-size
batch per transaction.
2. Delete the now-orphaned nodes, one fixed-size batch per transaction.
Each transaction does work proportional to `batch_size`, never to the
graph's branching factor.
"""
from tasks.jobs.attack_paths.config import (
BATCH_SIZE,
PROVIDER_RESOURCE_LABEL,
get_provider_label,
)
provider_label = get_provider_label(provider_id)
deleted_relationships = 0
relationship_batches = 0
node_batches = 0
drop_t0 = time.perf_counter()
logger.info(
"Dropping provider graph from Neptune sink "
"(provider=%s, provider_label=%s)",
provider_id,
provider_label,
)
logger.info(
"Opening Neptune writer session for provider graph drop (provider=%s)",
provider_id,
)
with self.get_session() as session:
logger.info(
"Opened Neptune writer session for provider graph drop (provider=%s)",
provider_id,
)
while True:
next_batch = relationship_batches + 1
logger.info(
"Deleting relationship batch from Neptune sink "
"(provider=%s, batch=%s, total_rels=%s, elapsed=%.3fs)",
provider_id,
next_batch,
deleted_relationships,
time.perf_counter() - drop_t0,
)
result = session.run(
f"""
MATCH (:`{provider_label}`)-[r]-()
WITH DISTINCT r LIMIT $batch_size
DELETE r
RETURN COUNT(r) AS deleted_rels_count
""",
{"batch_size": BATCH_SIZE},
)
record = result.single()
deleted_rels = (record["deleted_rels_count"] if record else 0) or 0
if deleted_rels == 0:
break
relationship_batches += 1
deleted_relationships += deleted_rels
logger.info(
"Deleted relationship batch from Neptune sink "
"(provider=%s, batch=%s, deleted_rels=%s, total_rels=%s, "
"elapsed=%.3fs)",
provider_id,
relationship_batches,
deleted_rels,
deleted_relationships,
time.perf_counter() - drop_t0,
)
deleted_nodes = 0
while True:
next_batch = node_batches + 1
logger.info(
"Deleting node batch from Neptune sink "
"(provider=%s, batch=%s, total_nodes=%s, elapsed=%.3fs)",
provider_id,
next_batch,
deleted_nodes,
time.perf_counter() - drop_t0,
)
result = session.run(
f"""
MATCH (n:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}`)
WITH n LIMIT $batch_size
DELETE n
RETURN COUNT(n) AS deleted_nodes_count
""",
{"batch_size": BATCH_SIZE},
)
record = result.single()
deleted = (record["deleted_nodes_count"] if record else 0) or 0
if deleted == 0:
break
node_batches += 1
deleted_nodes += deleted
logger.info(
"Deleted node batch from Neptune sink "
"(provider=%s, batch=%s, deleted_nodes=%s, total_nodes=%s, "
"elapsed=%.3fs)",
provider_id,
node_batches,
deleted,
deleted_nodes,
time.perf_counter() - drop_t0,
)
logger.info(
"Finished dropping provider graph from Neptune sink "
"(provider=%s, relationship_batches=%s, deleted_rels=%s, "
"node_batches=%s, deleted_nodes=%s, elapsed=%.3fs)",
provider_id,
relationship_batches,
deleted_relationships,
node_batches,
deleted_nodes,
time.perf_counter() - drop_t0,
)
return deleted_nodes
def has_provider_data(self, database: str, provider_id: str) -> bool: # noqa: ARG002
from tasks.jobs.attack_paths.config import (
PROVIDER_RESOURCE_LABEL,
get_provider_label,
)
provider_label = get_provider_label(provider_id)
query = (
f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
)
with self.get_session(default_access_mode=neo4j.READ_ACCESS) as session:
result = session.run(query)
return result.single() is not None
def clear_cache(self, database: str) -> None: # noqa: ARG002
# Neptune has no user-facing cache-clear procedure; no-op.
return None
# Sync write path
def ensure_sync_indexes(self, database: str) -> None: # noqa: ARG002
# Neptune routes node and relationship lookups through `~id`, which is the cluster's primary key
# No additional index is needed or supported
return None
def write_nodes(
self,
database: str, # noqa: ARG002
labels: str,
rows: list[dict[str, Any]],
) -> None:
if not rows:
return
from tasks.jobs.attack_paths.config import (
PROVIDER_ELEMENT_ID_PROPERTY,
PROVIDER_RESOURCE_LABEL,
)
# MERGE on `~id` is the documented and engine-optimized idempotent
# upsert pattern for Neptune openCypher. The label inside the MERGE
# matters: Neptune assigns a default `vertex` label to any node
# created without an explicit one, so we pin `_ProviderResource`
# (which every synced node carries anyway) at MERGE-time. Additional
# labels are added after
#
# We also write `_provider_element_id` as a regular property so
# non-sync code (drop_subgraph, query helpers) keeps a stable contract
# that doesn't know about `~id`
query = f"""
UNWIND $rows AS row
MERGE (n:`{PROVIDER_RESOURCE_LABEL}` {{`~id`: row.provider_element_id}})
SET n:{labels}
SET n += row.props
SET n.`{PROVIDER_ELEMENT_ID_PROPERTY}` = row.provider_element_id
"""
with self.get_session() as session:
session.run(query, {"rows": rows}).consume()
def write_relationships(
self,
database: str, # noqa: ARG002
rel_type: str,
provider_id: str, # noqa: ARG002 - encoded in start/end `~id` already
rows: list[dict[str, Any]],
) -> None:
if not rows:
return
from tasks.jobs.attack_paths.config import PROVIDER_ELEMENT_ID_PROPERTY
# `id(n) = $value` is Neptune's parameterized fast path; both endpoint
# MATCHes resolve in O(1) via the system `~id`, so per-row work stays
# bounded regardless of batch size
query = f"""
UNWIND $rows AS row
MATCH (s) WHERE id(s) = row.start_element_id
MATCH (e) WHERE id(e) = row.end_element_id
MERGE (s)-[r:`{rel_type}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}}]->(e)
SET r += row.props
"""
with self.get_session() as session:
session.run(query, {"rows": rows}).consume()
# Test helpers
def get_writer(self) -> neo4j.Driver:
return self._get_writer()
def get_reader(self) -> neo4j.Driver:
return self._get_reader()
# SigV4 auth provider
class _NeptuneAuthToken(neo4j.Auth):
"""Neo4j Auth backed by a SigV4-signed GET to `/opencypher`."""
def __init__(self, region: str, url: str) -> None:
session = BotoSession()
credentials = session.get_credentials()
if credentials is None:
raise RuntimeError(
"No AWS credentials available for Neptune SigV4 signing. "
"Ensure the boto3 credential chain can resolve."
)
credentials = credentials.get_frozen_credentials()
request = AWSRequest(method="GET", url=url + "/opencypher")
# SigV4 canonical Host must carry the real `host:port`
# Neptune runs on a non-default port (8182), so `.hostname` would drop it and break signing
request.headers.add_header("Host", urlsplit(url).netloc)
SigV4Auth(credentials, "neptune-db", region).add_auth(request)
auth_obj = {
header: request.headers[header]
for header in (
"Authorization",
"X-Amz-Date",
"X-Amz-Security-Token",
"Host",
)
if header in request.headers
}
auth_obj["HttpMethod"] = "GET"
super().__init__("basic", "username", json.dumps(auth_obj))
def neptune_auth_provider(region: str, https_url: str) -> Callable[[], ExpiringAuth]:
"""Return a callable the neo4j driver can invoke to refresh credentials."""
def _provider() -> ExpiringAuth:
token = _NeptuneAuthToken(region, https_url)
expires_at = (
datetime.datetime.now(datetime.UTC)
+ datetime.timedelta(minutes=SIGV4_TOKEN_LIFETIME_MINUTES)
).timestamp()
return ExpiringAuth(auth=token, expires_at=expires_at)
return _provider
@@ -1,12 +1,11 @@
import logging
from typing import Any, Iterable
from collections.abc import Iterable
from typing import Any
import neo4j
from rest_framework.exceptions import APIException, PermissionDenied, ValidationError
from api.attack_paths import database as graph_database, AttackPathsQueryDefinition
from api.attack_paths import AttackPathsQueryDefinition
from api.attack_paths import database as graph_database
from api.attack_paths import sink as sink_module
from api.attack_paths.cypher_sanitizer import (
inject_provider_label,
validate_custom_query,
@@ -16,7 +15,10 @@ from api.attack_paths.queries.schema import (
RAW_SCHEMA_URL,
get_cartography_schema_query,
)
from api.models import AttackPathsScan
from config.custom_logging import BackendLogger
from config.env import env
from rest_framework.exceptions import APIException, PermissionDenied, ValidationError
from tasks.jobs.attack_paths.config import (
INTERNAL_LABELS,
INTERNAL_PROPERTIES,
@@ -27,6 +29,10 @@ from tasks.jobs.attack_paths.config import (
logger = logging.getLogger(BackendLogger.API)
def _custom_query_timeout_ms() -> int:
return env.int("ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30) * 1000
# Predefined query helpers
@@ -103,13 +109,13 @@ def execute_query(
definition: AttackPathsQueryDefinition,
parameters: dict[str, Any],
provider_id: str,
scan: AttackPathsScan,
) -> dict[str, Any]:
try:
graph = graph_database.execute_read_query(
database=database_name,
cypher=definition.cypher,
parameters=parameters,
)
# TODO: drop after Neptune cutover
# Route reads by the scan row's recorded sink, not by current settings.
backend = sink_module.get_backend_for_scan(scan)
graph = backend.execute_read_query(database_name, definition.cypher, parameters)
return _serialize_graph(graph, provider_id)
except graph_database.WriteQueryNotAllowedException:
@@ -143,22 +149,31 @@ def execute_custom_query(
database_name: str,
cypher: str,
provider_id: str,
scan: AttackPathsScan,
) -> dict[str, Any]:
# Defense-in-depth for custom queries:
# 1. neo4j.READ_ACCESS — prevents mutations at the driver level
# 2. inject_provider_label() — regex-based label injection scopes node patterns
# 3. _serialize_graph() — post-query filter drops nodes without the provider label
# 1. `neo4j.READ_ACCESS` — prevents mutations at the driver level
# 2. `inject_provider_label()` — regex-based label injection scopes node patterns
# 3. `_serialize_graph()` — post-query filter drops nodes without the provider label
# 4. `USING QUERY:TIMEOUTMILLISECONDS` on Neptune — server-side runaway cutoff
#
# Layer 2 is best-effort (regex can't fully parse Cypher);
# layer 3 is the safety net that guarantees provider isolation.
validate_custom_query(cypher)
cypher = inject_provider_label(cypher, provider_id)
# TODO: drop after Neptune cutover
backend = sink_module.get_backend_for_scan(scan)
# Neptune enforces a cluster-level query timeout; prepending the hint
# makes the limit explicit and matches the client-side read timeout.
# Applies only when the scan's graph lives in Neptune.
if getattr(scan, "sink_backend", None) == "neptune":
timeout_ms = _custom_query_timeout_ms()
cypher = f"USING QUERY:TIMEOUTMILLISECONDS {timeout_ms}\n{cypher}"
try:
graph = graph_database.execute_read_query(
database=database_name,
cypher=cypher,
)
graph = backend.execute_read_query(database_name, cypher, None)
serialized = _serialize_graph(graph, provider_id)
return _truncate_graph(serialized)
@@ -181,10 +196,11 @@ def execute_custom_query(
def get_cartography_schema(
database_name: str, provider_id: str
database_name: str, provider_id: str, scan: AttackPathsScan
) -> dict[str, str] | None:
try:
with graph_database.get_session(
backend = sink_module.get_backend_for_scan(scan)
with backend.get_session(
database_name, default_access_mode=neo4j.READ_ACCESS
) as session:
result = session.run(get_cartography_schema_query(provider_id))
+73 -14
View File
@@ -1,18 +1,19 @@
from typing import Optional, Tuple
from math import isfinite
from uuid import UUID
from api.db_router import MainRouter
from api.models import TenantAPIKey, TenantAPIKeyManager
from cryptography.fernet import InvalidToken
from django.core.exceptions import ObjectDoesNotExist
from django.utils import timezone
from drf_simple_apikey.backends import APIKeyAuthentication as BaseAPIKeyAuth
from drf_simple_apikey.crypto import get_crypto
from drf_simple_apikey.settings import package_settings
from rest_framework.authentication import BaseAuthentication
from rest_framework.exceptions import AuthenticationFailed
from rest_framework.request import Request
from rest_framework_simplejwt.authentication import JWTAuthentication
from api.db_router import MainRouter
from api.models import TenantAPIKey, TenantAPIKeyManager
class TenantAPIKeyAuthentication(BaseAPIKeyAuth):
model = TenantAPIKey
@@ -23,18 +24,49 @@ class TenantAPIKeyAuthentication(BaseAPIKeyAuth):
def _authenticate_credentials(self, request, key):
"""
Override to use admin connection, bypassing RLS during authentication.
Delegates to parent after temporarily routing model queries to admin DB.
"""
# Temporarily point the model's manager to admin database
original_objects = self.model.objects
self.model.objects = self.model.objects.using(MainRouter.admin_db)
try:
payload = self.key_crypto.decrypt(key)
except ValueError:
raise AuthenticationFailed("Invalid API Key.")
if not isinstance(payload, dict):
raise AuthenticationFailed("Invalid API Key.")
payload_pk = payload.get("_pk")
payload_exp = payload.get("_exp")
if (
not isinstance(payload_pk, str)
or isinstance(payload_exp, bool)
or not isinstance(payload_exp, (int, float))
or not isfinite(payload_exp)
):
raise AuthenticationFailed("Invalid API Key.")
try:
# Call parent method which will now use admin database
return super()._authenticate_credentials(request, key)
finally:
# Restore original manager
self.model.objects = original_objects
api_key_pk = UUID(payload_pk)
except ValueError:
raise AuthenticationFailed("Invalid API Key.")
if payload_exp < timezone.now().timestamp():
raise AuthenticationFailed("API Key has already expired.")
try:
api_key = self.model.objects.using(MainRouter.admin_db).get(id=api_key_pk)
except ObjectDoesNotExist:
raise AuthenticationFailed("No entity matching this api key.")
if api_key.revoked:
raise AuthenticationFailed("This API Key has been revoked.")
client_ip = request.META.get(package_settings.IP_ADDRESS_HEADER)
if api_key.blacklisted_ips and client_ip in api_key.blacklisted_ips:
raise AuthenticationFailed("Access denied from blacklisted IP.")
if api_key.whitelisted_ips and client_ip not in api_key.whitelisted_ips:
raise AuthenticationFailed("Access restricted to specific IP addresses.")
return api_key.entity, key
def authenticate(self, request: Request):
prefixed_key = self.get_key(request)
@@ -81,7 +113,7 @@ class CombinedJWTOrAPIKeyAuthentication(BaseAuthentication):
jwt_auth = JWTAuthentication()
api_key_auth = TenantAPIKeyAuthentication()
def authenticate(self, request: Request) -> Optional[Tuple[object, dict]]:
def authenticate(self, request: Request) -> tuple[object, dict] | None:
auth_header = request.headers.get("Authorization", "")
# Prioritize JWT authentication if both are present
@@ -93,3 +125,30 @@ class CombinedJWTOrAPIKeyAuthentication(BaseAuthentication):
# Default fallback
return self.jwt_auth.authenticate(request)
class SSEAuthentication(CombinedJWTOrAPIKeyAuthentication):
"""JWT/API-Key auth that also accepts `?access_token=<jwt>`.
Browser `EventSource` is the only widely available SSE client API
and it cannot set the `Authorization` header (its constructor takes
only a URL and `withCredentials`). To keep browser SSE clients on
the same auth stack as the rest of the API, SSE endpoints additionally
accept a JWT via the `?access_token=<jwt>` query parameter the
standard parameter name defined in RFC 6750 Section 2.3 for bearer tokens.
"""
def authenticate(self, request: Request):
auth_header = request.headers.get("Authorization", "")
if auth_header:
return super().authenticate(request)
raw_token = request.query_params.get("access_token")
if not raw_token:
# No header and no query token — let the default path raise
# the canonical AuthenticationFailed via the parent class.
return super().authenticate(request)
validated_token = self.jwt_auth.get_validated_token(raw_token)
user = self.jwt_auth.get_user(validated_token)
return user, validated_token
+6 -7
View File
@@ -1,3 +1,9 @@
from api.authentication import CombinedJWTOrAPIKeyAuthentication
from api.db_router import MainRouter, reset_read_db_alias, set_read_db_alias
from api.db_utils import POSTGRES_USER_VAR, rls_transaction
from api.filters import CustomDjangoFilterBackend
from api.models import Role, UserRoleRelationship
from api.rbac.permissions import HasPermissions
from django.conf import settings
from django.db import transaction
from rest_framework import permissions
@@ -8,13 +14,6 @@ from rest_framework.response import Response
from rest_framework_json_api import filters
from rest_framework_json_api.views import ModelViewSet
from api.authentication import CombinedJWTOrAPIKeyAuthentication
from api.db_router import MainRouter, reset_read_db_alias, set_read_db_alias
from api.db_utils import POSTGRES_USER_VAR, rls_transaction
from api.filters import CustomDjangoFilterBackend
from api.models import Role, UserRoleRelationship
from api.rbac.permissions import HasPermissions
class BaseViewSet(ModelViewSet):
authentication_classes = [CombinedJWTOrAPIKeyAuthentication]
+3 -3
View File
@@ -112,14 +112,14 @@ def get_compliance_frameworks(provider_type: Provider.ProviderChoices) -> list[s
"""List compliance framework identifiers available for `provider_type`.
Includes both per-provider frameworks and universal top-level frameworks
(e.g. ``dora``, ``csa_ccm_4.0``).
(e.g. ``dora_2022_2554``, ``csa_ccm_4.0``).
Args:
provider_type (Provider.ProviderChoices): The cloud provider type
(e.g., "aws", "azure", "gcp", "m365").
Returns:
list[str]: Framework identifiers (e.g., "cis_1.4_aws", "dora").
list[str]: Framework identifiers (e.g., "cis_1.4_aws", "dora_2022_2554").
"""
global AVAILABLE_COMPLIANCE_FRAMEWORKS
if provider_type not in AVAILABLE_COMPLIANCE_FRAMEWORKS:
@@ -352,7 +352,7 @@ def generate_compliance_overview_template(
total_requirements += 1
provider_check_list = list(requirement.checks.get(provider_type, []))
total_checks = len(provider_check_list)
checks_dict = {check: None for check in provider_check_list}
checks_dict = dict.fromkeys(provider_check_list)
req_status_val = "MANUAL" if total_checks == 0 else "PASS"
+10 -11
View File
@@ -3,8 +3,14 @@ import secrets
import time
import uuid
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from datetime import UTC, datetime, timedelta
from api.db_router import (
READ_REPLICA_ALIAS,
get_read_db_alias,
reset_read_db_alias,
set_read_db_alias,
)
from celery.utils.log import get_task_logger
from config.env import env
from django.conf import settings
@@ -22,13 +28,6 @@ from psycopg2 import sql as psycopg2_sql
from psycopg2.extensions import AsIs, new_type, register_adapter, register_type
from rest_framework_json_api.serializers import ValidationError
from api.db_router import (
READ_REPLICA_ALIAS,
get_read_db_alias,
reset_read_db_alias,
set_read_db_alias,
)
logger = get_task_logger(__name__)
DB_USER = settings.DATABASES["default"]["USER"] if not settings.TESTING else "test"
@@ -170,7 +169,7 @@ def one_week_from_now():
"""
Return a datetime object with a date one week from now.
"""
return datetime.now(timezone.utc) + timedelta(days=7)
return datetime.now(UTC) + timedelta(days=7)
def generate_random_token(length: int = 14, symbols: str | None = None) -> str:
@@ -405,10 +404,10 @@ def _should_create_index_on_partition(
# Unknown month abbreviation, include it to be safe
return True
partition_date = datetime(year, month, 1, tzinfo=timezone.utc)
partition_date = datetime(year, month, 1, tzinfo=UTC)
# Get current month start
now = datetime.now(timezone.utc)
now = datetime.now(UTC)
current_month_start = now.replace(
day=1, hour=0, minute=0, second=0, microsecond=0
)
+3 -4
View File
@@ -1,14 +1,13 @@
import uuid
from functools import wraps
from django.core.exceptions import ObjectDoesNotExist
from django.db import DatabaseError, connection, transaction
from rest_framework_json_api.serializers import ValidationError
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import POSTGRES_TENANT_VAR, SET_CONFIG_QUERY, rls_transaction
from api.exceptions import ProviderDeletedException
from api.models import Provider, Scan
from django.core.exceptions import ObjectDoesNotExist
from django.db import DatabaseError, connection, transaction
from rest_framework_json_api.serializers import ValidationError
def set_tenant(func=None, *, keep_tenant=False):
+144 -32
View File
@@ -1,19 +1,4 @@
from datetime import date, datetime, timedelta, timezone
from dateutil.parser import parse
from django.conf import settings
from django.db.models import F, Q
from django_filters.rest_framework import (
BaseInFilter,
BooleanFilter,
CharFilter,
ChoiceFilter,
DateFilter,
FilterSet,
UUIDFilter,
)
from rest_framework_json_api.django_filters.backends import DjangoFilterBackend
from rest_framework_json_api.serializers import ValidationError
from datetime import UTC, date, datetime, timedelta
from api.constants import SEVERITY_ORDER
from api.db_utils import (
@@ -68,6 +53,20 @@ from api.uuid_utils import (
uuid7_start,
)
from api.v1.serializers import TaskBase
from dateutil.parser import parse
from django.conf import settings
from django.db.models import F, Q
from django_filters.rest_framework import (
BaseInFilter,
BooleanFilter,
CharFilter,
ChoiceFilter,
DateFilter,
FilterSet,
UUIDFilter,
)
from rest_framework_json_api.django_filters.backends import DjangoFilterBackend
from rest_framework_json_api.serializers import ValidationError
class CustomDjangoFilterBackend(DjangoFilterBackend):
@@ -102,7 +101,7 @@ class BaseProviderFilter(FilterSet):
"""
Abstract base filter for models with direct FK to Provider.
Provides standard provider_id and provider_type filters.
Provides standard provider_id, provider_type, and provider_groups filters.
Subclasses must define Meta.model.
"""
@@ -116,6 +115,16 @@ class BaseProviderFilter(FilterSet):
choices=Provider.ProviderChoices.choices,
lookup_expr="in",
)
provider_groups = UUIDFilter(
field_name="provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
class Meta:
abstract = True
@@ -126,7 +135,7 @@ class BaseScanProviderFilter(FilterSet):
"""
Abstract base filter for models with FK to Scan (and Scan has FK to Provider).
Provides standard provider_id and provider_type filters via scan relationship.
Provides standard provider_id, provider_type, and provider_groups filters via scan relationship.
Subclasses must define Meta.model.
"""
@@ -140,6 +149,16 @@ class BaseScanProviderFilter(FilterSet):
choices=Provider.ProviderChoices.choices,
lookup_expr="in",
)
provider_groups = UUIDFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
class Meta:
abstract = True
@@ -160,6 +179,16 @@ class CommonFindingFilters(FilterSet):
provider_type__in = ChoiceInFilter(
choices=Provider.ProviderChoices.choices, field_name="scan__provider__provider"
)
provider_groups = UUIDFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
provider_uid = CharFilter(field_name="scan__provider__uid", lookup_expr="exact")
provider_uid__in = CharInFilter(field_name="scan__provider__uid", lookup_expr="in")
provider_uid__icontains = CharFilter(
@@ -370,6 +399,12 @@ class ProviderFilter(FilterSet):
choices=Provider.ProviderChoices.choices,
lookup_expr="in",
)
provider_groups = UUIDFilter(
field_name="provider_groups__id", lookup_expr="exact", distinct=True
)
provider_groups__in = UUIDInFilter(
field_name="provider_groups__id", lookup_expr="in", distinct=True
)
class Meta:
model = Provider
@@ -395,6 +430,16 @@ class ProviderRelationshipFilterSet(FilterSet):
provider_type__in = ChoiceInFilter(
choices=Provider.ProviderChoices.choices, field_name="provider__provider"
)
provider_groups = UUIDFilter(
field_name="provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
provider_uid = CharFilter(field_name="provider__uid", lookup_expr="exact")
provider_uid__in = CharInFilter(field_name="provider__uid", lookup_expr="in")
provider_uid__icontains = CharFilter(
@@ -552,12 +597,12 @@ class ResourceFilter(ProviderRelationshipFilterSet):
gte_date = (
parse(self.data.get("updated_at__gte")).date()
if self.data.get("updated_at__gte")
else datetime.now(timezone.utc).date()
else datetime.now(UTC).date()
)
lte_date = (
parse(self.data.get("updated_at__lte")).date()
if self.data.get("updated_at__lte")
else datetime.now(timezone.utc).date()
else datetime.now(UTC).date()
)
if abs(lte_date - gte_date) > timedelta(
@@ -702,9 +747,9 @@ class FindingFilter(CommonFindingFilters):
lte_date = cleaned.get("inserted_at__lte") or exact_date
if gte_date is None:
gte_date = datetime.now(timezone.utc).date()
gte_date = datetime.now(UTC).date()
if lte_date is None:
lte_date = datetime.now(timezone.utc).date()
lte_date = datetime.now(UTC).date()
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
@@ -798,7 +843,7 @@ class FindingFilter(CommonFindingFilters):
def maybe_date_to_datetime(value):
dt = value
if isinstance(value, date):
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
return dt
@@ -887,9 +932,9 @@ class FindingGroupFilter(CommonFindingFilters):
lte_date = cleaned.get("inserted_at__lte") or exact_date
if gte_date is None:
gte_date = datetime.now(timezone.utc).date()
gte_date = datetime.now(UTC).date()
if lte_date is None:
lte_date = datetime.now(timezone.utc).date()
lte_date = datetime.now(UTC).date()
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
@@ -931,7 +976,7 @@ class FindingGroupFilter(CommonFindingFilters):
"""Convert date to datetime if needed."""
dt = value
if isinstance(value, date):
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
return dt
@@ -1001,6 +1046,16 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
field_name="provider__provider", choices=Provider.ProviderChoices.choices
)
provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
provider_groups = UUIDFilter(
field_name="provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
class Meta:
model = FindingGroupDailySummary
@@ -1035,9 +1090,9 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
lte_date = cleaned.get("inserted_at__lte") or exact_date
if gte_date is None:
gte_date = datetime.now(timezone.utc).date()
gte_date = datetime.now(UTC).date()
if lte_date is None:
lte_date = datetime.now(timezone.utc).date()
lte_date = datetime.now(UTC).date()
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
@@ -1076,7 +1131,7 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
def _maybe_date_to_datetime(value):
dt = value
if isinstance(value, date):
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
return dt
@@ -1101,6 +1156,16 @@ class LatestFindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
field_name="provider__provider", choices=Provider.ProviderChoices.choices
)
provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
provider_groups = UUIDFilter(
field_name="provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
class Meta:
model = FindingGroupDailySummary
@@ -1280,12 +1345,19 @@ class RoleFilter(FilterSet):
}
class ComplianceOverviewFilter(FilterSet):
class ComplianceOverviewFilter(BaseScanProviderFilter):
"""
Keep provider filters in the schema while runtime filtering resolves scans first.
Compliance overview provider filters are applied to the latest completed scans
in the viewset, then this filterset handles the remaining compliance fields.
"""
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
scan_id = UUIDFilter(field_name="scan_id", required=True)
scan_id = UUIDFilter(field_name="scan_id")
region = CharFilter(field_name="region")
class Meta:
class Meta(BaseScanProviderFilter.Meta):
model = ComplianceRequirementOverview
fields = {
"inserted_at": ["date", "gte", "lte"],
@@ -1306,6 +1378,16 @@ class ScanSummaryFilter(FilterSet):
provider_type__in = ChoiceInFilter(
field_name="scan__provider__provider", choices=Provider.ProviderChoices.choices
)
provider_groups = UUIDFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
region = CharFilter(field_name="region")
class Meta:
@@ -1329,6 +1411,16 @@ class DailySeveritySummaryFilter(FilterSet):
provider_type__in = ChoiceInFilter(
field_name="provider__provider", choices=Provider.ProviderChoices.choices
)
provider_groups = UUIDFilter(
field_name="provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
date_from = DateFilter(method="filter_noop")
date_to = DateFilter(method="filter_noop")
@@ -1585,6 +1677,16 @@ class ThreatScoreSnapshotFilter(FilterSet):
choices=Provider.ProviderChoices.choices,
lookup_expr="in",
)
provider_groups = UUIDFilter(
field_name="provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
compliance_id = CharFilter(field_name="compliance_id", lookup_expr="exact")
compliance_id__in = CharInFilter(field_name="compliance_id", lookup_expr="in")
@@ -1628,6 +1730,16 @@ class ResourceGroupOverviewFilter(FilterSet):
choices=Provider.ProviderChoices.choices,
lookup_expr="in",
)
provider_groups = UUIDFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="exact",
distinct=True,
)
provider_groups__in = UUIDInFilter(
field_name="scan__provider__provider_groups__id",
lookup_expr="in",
distinct=True,
)
resource_group = CharFilter(field_name="resource_group", lookup_expr="exact")
resource_group__in = CharInFilter(field_name="resource_group", lookup_expr="in")
+55 -20
View File
@@ -2,8 +2,9 @@
Format (draft-inadarei-api-health-check-06).
Liveness reports only process status. Readiness verifies that PostgreSQL,
Valkey and Neo4j are reachable and returns per-dependency detail when any
of them is unreachable.
Valkey and the attack-paths graph store (Neo4j or Neptune, per
``ATTACK_PATHS_SINK_DATABASE``) are reachable and returns per-dependency
detail when any of them is unreachable.
"""
from __future__ import annotations
@@ -11,8 +12,10 @@ from __future__ import annotations
import logging
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import TimeoutError as FuturesTimeoutError
from contextlib import suppress
from datetime import datetime, timezone
from datetime import UTC, datetime
from typing import Any
import redis
@@ -37,9 +40,28 @@ STATUS_FAIL = "fail"
STATUS_WARN = "warn"
# Short socket timeout so a stuck Valkey cannot stall the probe.
# Neo4j inherits its driver-level ``connection_acquisition_timeout``.
VALKEY_PROBE_TIMEOUT_SECONDS = 2
# Probe-scoped budget for the graph database.
# ``Driver.verify_connectivity()`` takes no timeout; its only bound is the
# driver-level ``connection_acquisition_timeout`` (60s on Neptune). The
# probe needs its own budget, independent of the workload driver, so a
# graph-database outage cannot pin a worker thread (and the readiness lock)
# for a minute.
GRAPH_DB_PROBE_TIMEOUT_SECONDS = 5
# Bounded pool that enforces ``GRAPH_DB_PROBE_TIMEOUT_SECONDS``. If the
# graph database is unreachable the probe call blocks until the driver's
# own acquisition timeout fires; we abandon the future after the budget and
# report ``fail``. Orphaned tasks are capped by ``max_workers`` plus the 3s
# readiness cache plus the per-IP throttle, so they cannot pile up: worst
# case during a graph-database outage is every readiness call failing fast
# in ``GRAPH_DB_PROBE_TIMEOUT_SECONDS`` with at most 2 background threads
# stuck for <= the driver acquisition timeout.
_graph_db_probe_executor = ThreadPoolExecutor(
max_workers=2, thread_name_prefix="health-graph-db-probe"
)
# Brief cache window so high-frequency probes (ALB target groups, scrapers)
# do not stampede the actual dependency checks.
CACHE_CONTROL_HEADER = "max-age=3, must-revalidate"
@@ -62,11 +84,7 @@ class HealthJSONRenderer(JSONRenderer):
def _now_iso() -> str:
return (
datetime.now(timezone.utc)
.isoformat(timespec="milliseconds")
.replace("+00:00", "Z")
)
return datetime.now(UTC).isoformat(timespec="milliseconds").replace("+00:00", "Z")
def _measure(name: str, check_fn) -> tuple[dict[str, Any], float]:
@@ -113,11 +131,24 @@ def _probe_valkey() -> None:
client.close()
def _probe_neo4j() -> None:
# Lazy import: avoids pulling attack_paths into the boot import graph.
from api.attack_paths.database import get_driver
def _graph_db_component_id() -> str:
"""Return the active graph database name for the ``componentId`` field."""
return settings.ATTACK_PATHS_SINK_DATABASE.strip().lower()
get_driver().verify_connectivity()
def _probe_graph_db() -> None:
# Lazy import: avoids pulling attack_paths into the boot import graph
from api.attack_paths.database import verify_connectivity
future = _graph_db_probe_executor.submit(verify_connectivity)
try:
future.result(timeout=GRAPH_DB_PROBE_TIMEOUT_SECONDS)
except FuturesTimeoutError as exc:
# Do not wait for the abandoned task; it ends when the driver's own acquisition timeout fires
future.cancel()
raise TimeoutError(
f"graph-db probe exceeded {GRAPH_DB_PROBE_TIMEOUT_SECONDS}s"
) from exc
def _build_check_entry(
@@ -180,14 +211,18 @@ def _readiness_payload() -> tuple[dict[str, Any], int]:
):
return snapshot[1], snapshot[2]
graph_db_component_id = _graph_db_component_id()
postgres_result, postgres_ms = _measure("postgres", _probe_postgres)
valkey_result, valkey_ms = _measure("valkey", _probe_valkey)
neo4j_result, neo4j_ms = _measure("neo4j", _probe_neo4j)
graph_db_result, graph_db_ms = _measure(graph_db_component_id, _probe_graph_db)
entries = [
_build_check_entry("postgres", "datastore", postgres_result, postgres_ms),
_build_check_entry("valkey", "datastore", valkey_result, valkey_ms),
_build_check_entry("neo4j", "datastore", neo4j_result, neo4j_ms),
_build_check_entry(
graph_db_component_id, "datastore", graph_db_result, graph_db_ms
),
]
overall = _aggregate_status(entries)
@@ -195,7 +230,7 @@ def _readiness_payload() -> tuple[dict[str, Any], int]:
payload["checks"] = {
"postgres:responseTime": [entries[0]],
"valkey:responseTime": [entries[1]],
"neo4j:responseTime": [entries[2]],
"graphdb:responseTime": [entries[2]],
}
http_status = (
@@ -237,10 +272,10 @@ class LivenessView(APIView):
class ReadinessView(APIView):
"""Readiness probe.
Returns 200 when PostgreSQL, Valkey and Neo4j all respond, or 503 with
per-dependency detail when any of them is unreachable. Per-IP throttle
plus the short in-process result cache cap the real dependency hits
regardless of inbound traffic shape.
Returns 200 when PostgreSQL, Valkey and the attack-paths graph store
all respond, or 503 with per-dependency detail when any of them is
unreachable. Per-IP throttle plus the short in-process result cache cap
the real dependency hits regardless of inbound traffic shape.
"""
authentication_classes: list = []
@@ -1,11 +1,8 @@
import random
from datetime import datetime, timezone
from datetime import UTC, datetime
from math import ceil
from uuid import uuid4
from django.core.management.base import BaseCommand
from tqdm import tqdm
from api.db_utils import rls_transaction
from api.models import (
Finding,
@@ -16,7 +13,9 @@ from api.models import (
Scan,
StatusChoices,
)
from django.core.management.base import BaseCommand
from prowler.lib.check.models import CheckMetadata
from tqdm import tqdm
class Command(BaseCommand):
@@ -116,7 +115,7 @@ class Command(BaseCommand):
trigger="manual",
state="executing",
progress=0,
started_at=datetime.now(timezone.utc),
started_at=datetime.now(UTC),
)
scan_state = "completed"
@@ -272,10 +271,8 @@ class Command(BaseCommand):
self.stdout.write(self.style.ERROR(f"Failed to populate test data: {e}"))
scan_state = "failed"
finally:
scan.completed_at = datetime.now(timezone.utc)
scan.duration = int(
(datetime.now(timezone.utc) - scan.started_at).total_seconds()
)
scan.completed_at = datetime.now(UTC)
scan.duration = int((datetime.now(UTC) - scan.started_at).total_seconds())
scan.progress = 100
scan.state = scan_state
scan.unique_resource_count = num_resources
@@ -1,5 +1,4 @@
from django.core.management.base import BaseCommand
from tasks.jobs.orphan_recovery import reconcile_orphans
+25
View File
@@ -2,6 +2,31 @@ import logging
import time
from config.custom_logging import BackendLogger
from django.core.handlers.asgi import ASGIRequest
from django.db import connections
class CloseDBConnectionsMiddleware:
"""
Close request-scoped DB connections at the end of each ASGI request.
Under the ASGI worker, connections opened by sync views are not released
by Django's normal request-boundary cleanup, so they accumulate idle until
Postgres runs out of slots. Only ASGI requests are handled; the sync WSGI
test client manages its own connections and must be left alone.
"""
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
try:
return self.get_response(request)
finally:
if isinstance(request, ASGIRequest):
for conn in connections.all(initialized_only=True):
if not conn.in_atomic_block:
conn.close_if_unusable_or_obsolete()
def extract_auth_info(request) -> dict:
+13 -14
View File
@@ -1,26 +1,13 @@
import uuid
from functools import partial
import api.rls
import django.contrib.auth.models
import django.contrib.postgres.indexes
import django.contrib.postgres.search
import django.core.validators
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations, models
from psqlextra.backend.migrations.operations.add_default_partition import (
PostgresAddDefaultPartition,
)
from psqlextra.backend.migrations.operations.create_partitioned_model import (
PostgresCreatePartitionedModel,
)
from psqlextra.manager.manager import PostgresManager
from psqlextra.models.partitioned import PostgresPartitionedModel
from psqlextra.types import PostgresPartitioningMethod
from uuid6 import uuid7
import api.rls
from api.db_utils import (
DB_PROWLER_PASSWORD,
DB_PROWLER_USER,
@@ -53,6 +40,18 @@ from api.models import (
StateChoices,
StatusChoices,
)
from django.conf import settings
from django.db import migrations, models
from psqlextra.backend.migrations.operations.add_default_partition import (
PostgresAddDefaultPartition,
)
from psqlextra.backend.migrations.operations.create_partitioned_model import (
PostgresCreatePartitionedModel,
)
from psqlextra.manager.manager import PostgresManager
from psqlextra.models.partitioned import PostgresPartitionedModel
from psqlextra.types import PostgresPartitioningMethod
from uuid6 import uuid7
DB_NAME = settings.DATABASES["default"]["NAME"]
@@ -1,8 +1,7 @@
from api.db_utils import DB_PROWLER_USER
from django.conf import settings
from django.db import migrations
from api.db_utils import DB_PROWLER_USER
DB_NAME = settings.DATABASES["default"]["NAME"]
+1 -2
View File
@@ -2,12 +2,11 @@
import uuid
import api.rls
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -1,6 +1,5 @@
from django.db import migrations
from api.db_router import MainRouter
from django.db import migrations
def create_admin_role(apps, schema_editor):
@@ -1,12 +1,11 @@
import json
from datetime import datetime, timedelta, timezone
from datetime import UTC, datetime, timedelta
import django.db.models.deletion
from django.db import migrations, models
from django_celery_beat.models import PeriodicTask
from api.db_utils import rls_transaction
from api.models import Scan, StateChoices
from django.db import migrations, models
from django_celery_beat.models import PeriodicTask
def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
@@ -17,11 +16,11 @@ def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
tenant_id = task_kwargs["tenant_id"]
provider_id = task_kwargs["provider_id"]
current_time = datetime.now(timezone.utc)
current_time = datetime.now(UTC)
scheduled_time_today = datetime.combine(
current_time.date(),
daily_scheduled_scan_task.start_time.time(),
tzinfo=timezone.utc,
tzinfo=UTC,
)
if current_time < scheduled_time_today:
@@ -2,10 +2,9 @@
from functools import partial
from django.db import migrations
from api.db_utils import IntegrationTypeEnum, PostgresEnumMigration, register_enum
from api.models import Integration
from django.db import migrations
IntegrationTypeEnumMigration = PostgresEnumMigration(
enum_name="integration_type",
@@ -2,12 +2,11 @@
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
import django.db.models.deletion
from api.rls import RowLevelSecurityConstraint
from django.db import migrations, models
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
# Generated by Django 5.1.5 on 2025-03-25 11:29
from django.db import migrations, models
import api.db_utils
from django.db import migrations, models
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
# Generated by Django 5.1.7 on 2025-04-16 08:47
from django.db import migrations
import api.db_utils
from django.db import migrations
class Migration(migrations.Migration):
@@ -2,12 +2,11 @@
import uuid
import api.rls
import django.db.models.deletion
import uuid6
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -1,8 +1,7 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
from django.db import migrations
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
from django.db import migrations
class Migration(migrations.Migration):
@@ -2,12 +2,11 @@
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
import django.db.models.deletion
from api.rls import RowLevelSecurityConstraint
from django.db import migrations, models
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
from django.db import migrations
class Migration(migrations.Migration):
@@ -2,12 +2,11 @@
import uuid
import api.rls
import django.core.validators
import django.db.models.deletion
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
+2 -3
View File
@@ -2,13 +2,12 @@
import uuid
import api.db_utils
import api.rls
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
import api.db_utils
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -2,10 +2,9 @@
from functools import partial
from django.db import migrations
from api.db_utils import PostgresEnumMigration, ProcessorTypeEnum, register_enum
from api.models import Processor
from django.db import migrations
ProcessorTypeEnumMigration = PostgresEnumMigration(
enum_name="processor_type",
@@ -2,12 +2,11 @@
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
import django.db.models.deletion
from api.rls import RowLevelSecurityConstraint
from django.db import migrations, models
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
from django.db import migrations
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
from django.db import migrations
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
# Generated by Django 5.1.7 on 2025-07-09 14:44
from django.db import migrations
import api.db_utils
from django.db import migrations
class Migration(migrations.Migration):
@@ -2,15 +2,14 @@
import uuid
import api.db_utils
import api.rls
import django.core.validators
import django.db.models.deletion
import drf_simple_apikey.models
from django.conf import settings
from django.db import migrations, models
import api.db_utils
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -4,15 +4,14 @@ import json
import logging
import uuid
import api.rls
import django.db.models.deletion
from api.db_router import MainRouter
from config.custom_logging import BackendLogger
from cryptography.fernet import Fernet
from django.conf import settings
from django.db import migrations, models
import api.rls
from api.db_router import MainRouter
logger = logging.getLogger(BackendLogger.API)
@@ -1,8 +1,7 @@
# Generated by Django 5.1.7 on 2025-10-14 00:00
from django.db import migrations
import api.db_utils
from django.db import migrations
class Migration(migrations.Migration):
@@ -2,14 +2,13 @@
import uuid
import api.rls
import django.contrib.postgres.fields
import django.core.validators
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -1,8 +1,7 @@
# Generated by Django 5.1.10 on 2025-09-09 09:25
from django.db import migrations
import api.db_utils
from django.db import migrations
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
# Generated by Django 5.1.13 on 2025-11-05 08:37
from django.db import migrations
import api.db_utils
from django.db import migrations
class Migration(migrations.Migration):
@@ -2,11 +2,10 @@
import uuid
import api.rls
import django.db.models.deletion
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -2,11 +2,10 @@
import uuid
import api.rls
import django.db.models.deletion
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -2,11 +2,10 @@
import uuid
import api.rls
import django.db.models.deletion
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -2,11 +2,10 @@
import uuid
import api.rls
import django.db.models.deletion
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
@@ -1,10 +1,9 @@
# Generated by Django 5.1.14 on 2025-12-10
from django.db import migrations
from tasks.tasks import backfill_daily_severity_summaries_task
from api.db_router import MainRouter
from api.rls import Tenant
from django.db import migrations
from tasks.tasks import backfill_daily_severity_summaries_task
def trigger_backfill_task(apps, schema_editor):
@@ -1,10 +1,9 @@
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
@@ -1,8 +1,7 @@
# Generated by Django migration for Alibaba Cloud provider support
from django.db import migrations
import api.db_utils
from django.db import migrations
class Migration(migrations.Migration):
@@ -1,10 +1,9 @@
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):

Some files were not shown because too many files have changed in this diff Show More