mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-07-04 19:21:51 +00:00
Merge branch 'master' of github.com:prowler-cloud/prowler into pr11569-coderabbit-fix
This commit is contained in:
@@ -6,14 +6,20 @@
|
||||
PROWLER_UI_VERSION="stable"
|
||||
AUTH_URL=http://localhost:3000
|
||||
API_BASE_URL=http://prowler-api:8080/api/v1
|
||||
# deprecated, use UI_API_BASE_URL
|
||||
NEXT_PUBLIC_API_BASE_URL=${API_BASE_URL}
|
||||
UI_API_BASE_URL=${API_BASE_URL}
|
||||
# deprecated, use UI_API_DOCS_URL
|
||||
NEXT_PUBLIC_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
|
||||
UI_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
|
||||
AUTH_TRUST_HOST=true
|
||||
UI_PORT=3000
|
||||
# openssl rand -base64 32
|
||||
AUTH_SECRET="N/c6mnaS5+SWq81+819OrzQZlmx1Vxtp/orjttJSmw8="
|
||||
# Google Tag Manager ID
|
||||
# Google Tag Manager ID (empty/unset ⇒ GTM not loaded, zero egress)
|
||||
# deprecated, use UI_GOOGLE_TAG_MANAGER_ID
|
||||
NEXT_PUBLIC_GOOGLE_TAG_MANAGER_ID=""
|
||||
UI_GOOGLE_TAG_MANAGER_ID=""
|
||||
|
||||
#### MCP Server ####
|
||||
PROWLER_MCP_VERSION=stable
|
||||
@@ -139,13 +145,19 @@ DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
|
||||
DJANGO_SENTRY_DSN=
|
||||
DJANGO_THROTTLE_TOKEN_OBTAIN=50/minute
|
||||
|
||||
# Sentry settings
|
||||
SENTRY_ENVIRONMENT=local
|
||||
# Sentry for the web app (server + browser). Empty/unset UI_SENTRY_DSN ⇒
|
||||
# Sentry disabled, zero egress. SENTRY_RELEASE (unprefixed) feeds the web app's
|
||||
# server/edge SDKs.
|
||||
UI_SENTRY_DSN=
|
||||
UI_SENTRY_ENVIRONMENT=local
|
||||
SENTRY_RELEASE=local
|
||||
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT}
|
||||
# Reserved runtime public config (registered now; no UI consumer yet)
|
||||
# POSTHOG_KEY=
|
||||
# POSTHOG_HOST=
|
||||
# REO_DEV_CLIENT_ID=
|
||||
|
||||
#### Prowler release version ####
|
||||
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.31.0
|
||||
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.32.0
|
||||
|
||||
# Social login credentials
|
||||
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# SDK
|
||||
/* @prowler-cloud/detection-remediation
|
||||
/prowler/ @prowler-cloud/detection-remediation
|
||||
/prowler/compliance/ @prowler-cloud/compliance
|
||||
/tests/ @prowler-cloud/detection-remediation
|
||||
/dashboard/ @prowler-cloud/detection-remediation
|
||||
/docs/ @prowler-cloud/detection-remediation
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
name: 'OSV-Scanner'
|
||||
description: 'Install osv-scanner and scan a lockfile, failing on HIGH/CRITICAL/UNKNOWN severity findings. Posts/updates a PR comment with findings on pull_request events (requires pull-requests: write).'
|
||||
description: 'Install osv-scanner and scan a lockfile, failing on CRITICAL severity findings. Posts/updates a PR comment with findings on pull_request events (requires pull-requests: write).'
|
||||
author: 'Prowler'
|
||||
|
||||
inputs:
|
||||
@@ -7,9 +7,9 @@ inputs:
|
||||
description: 'Path to the lockfile to scan, relative to the repository root (e.g. uv.lock, api/uv.lock, ui/pnpm-lock.yaml).'
|
||||
required: true
|
||||
severity-levels:
|
||||
description: 'Comma-separated severity levels that fail the scan. Default: HIGH,CRITICAL,UNKNOWN.'
|
||||
description: 'Comma-separated severity levels that fail the scan. Default: CRITICAL.'
|
||||
required: false
|
||||
default: 'HIGH,CRITICAL,UNKNOWN'
|
||||
default: 'CRITICAL'
|
||||
version:
|
||||
description: 'osv-scanner release tag to install. When overriding, you MUST also override binary-sha256.'
|
||||
required: false
|
||||
|
||||
@@ -43,8 +43,17 @@ runs:
|
||||
if: github.repository_owner == 'prowler-cloud' && github.repository != 'prowler-cloud/prowler'
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
LATEST_COMMIT=$(curl -s "https://api.github.com/repos/prowler-cloud/prowler/commits/master" | jq -r '.sha')
|
||||
LATEST_COMMIT=$(curl -sf --retry 3 --retry-all-errors --retry-delay 2 --retry-max-time 60 \
|
||||
-H "Authorization: Bearer ${GITHUB_TOKEN}" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/prowler-cloud/prowler/commits/master" \
|
||||
| jq -er '.sha') || {
|
||||
echo "::error::Failed to fetch latest prowler/master commit from the GitHub API (HTTP error or missing .sha). Check the GITHUB_TOKEN and API rate limits."
|
||||
exit 1
|
||||
}
|
||||
echo "Latest commit hash: $LATEST_COMMIT"
|
||||
sed -i "s|\(git = \"https://github\.com/prowler-cloud/prowler\.git?rev=master\)#[a-f0-9]\{40\}\"|\1#${LATEST_COMMIT}\"|g" uv.lock
|
||||
echo "Updated uv.lock entry:"
|
||||
@@ -54,8 +63,17 @@ runs:
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master' && github.repository == 'prowler-cloud/prowler'
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
LATEST_COMMIT=$(curl -s "https://api.github.com/repos/prowler-cloud/prowler/commits/master" | jq -r '.sha')
|
||||
LATEST_COMMIT=$(curl -sf --retry 3 --retry-all-errors --retry-delay 2 --retry-max-time 60 \
|
||||
-H "Authorization: Bearer ${GITHUB_TOKEN}" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/prowler-cloud/prowler/commits/master" \
|
||||
| jq -er '.sha') || {
|
||||
echo "::error::Failed to fetch latest prowler/master commit from the GitHub API (HTTP error or missing .sha). Check the GITHUB_TOKEN and API rate limits."
|
||||
exit 1
|
||||
}
|
||||
echo "Latest commit hash: $LATEST_COMMIT"
|
||||
sed -i "s|\(git = \"https://github\.com/prowler-cloud/prowler\.git?rev=master\)#[a-f0-9]\{40\}\"|\1#${LATEST_COMMIT}\"|g" uv.lock
|
||||
echo "Updated uv.lock entry:"
|
||||
|
||||
@@ -63,7 +63,7 @@ runs:
|
||||
exit-code: '0'
|
||||
scanners: 'vuln'
|
||||
timeout: '5m'
|
||||
version: 'v0.69.2'
|
||||
version: 'v0.71.2'
|
||||
|
||||
- name: Run Trivy vulnerability scan (SARIF)
|
||||
if: inputs.upload-sarif == 'true' && github.event_name == 'push'
|
||||
@@ -76,7 +76,7 @@ runs:
|
||||
exit-code: '0'
|
||||
scanners: 'vuln'
|
||||
timeout: '5m'
|
||||
version: 'v0.69.2'
|
||||
version: 'v0.71.2'
|
||||
|
||||
- name: Upload Trivy results to GitHub Security tab
|
||||
if: inputs.upload-sarif == 'true' && github.event_name == 'push'
|
||||
|
||||
@@ -77,6 +77,11 @@ provider/okta:
|
||||
- any-glob-to-any-file: "prowler/providers/okta/**"
|
||||
- any-glob-to-any-file: "tests/providers/okta/**"
|
||||
|
||||
provider/linode:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: "prowler/providers/linode/**"
|
||||
- any-glob-to-any-file: "tests/providers/linode/**"
|
||||
|
||||
github_actions:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: ".github/workflows/*"
|
||||
|
||||
@@ -6,8 +6,7 @@
|
||||
# - .github/workflows/api-security.yml, sdk-security.yml, ui-security.yml
|
||||
#
|
||||
# Severity levels (comma-separated) are read from OSV_SEVERITY_LEVELS.
|
||||
# Default: HIGH,CRITICAL,UNKNOWN — preserves prior .safety-policy.yml policy
|
||||
# (ignore-cvss-severity-below: 7 + ignore-cvss-unknown-severity: False).
|
||||
# Default: CRITICAL — only CVSS >= 9.0 findings fail the scan.
|
||||
# osv-scanner has no native CVSS threshold (google/osv-scanner#1400, closed
|
||||
# not-planned). Severity is derived from $group.max_severity (numeric CVSS
|
||||
# score string) which osv-scanner emits per group.
|
||||
@@ -33,7 +32,7 @@ set -euo pipefail
|
||||
|
||||
ROOT="$(git rev-parse --show-toplevel)"
|
||||
CONFIG="${ROOT}/osv-scanner.toml"
|
||||
SEVERITY_LEVELS="${OSV_SEVERITY_LEVELS:-HIGH,CRITICAL,UNKNOWN}"
|
||||
SEVERITY_LEVELS="${OSV_SEVERITY_LEVELS:-CRITICAL}"
|
||||
|
||||
for bin in osv-scanner jq; do
|
||||
if ! command -v "${bin}" >/dev/null 2>&1; then
|
||||
|
||||
@@ -272,27 +272,3 @@ jobs:
|
||||
payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
|
||||
step-outcome: ${{ steps.outcome.outputs.outcome }}
|
||||
update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
|
||||
|
||||
trigger-deployment:
|
||||
needs: [setup, container-build-push]
|
||||
if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
|
||||
with:
|
||||
egress-policy: block
|
||||
allowed-endpoints: >
|
||||
api.github.com:443
|
||||
|
||||
- name: Trigger API deployment
|
||||
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
|
||||
with:
|
||||
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
|
||||
repository: ${{ secrets.CLOUD_DISPATCH }}
|
||||
event-type: api-prowler-deployment
|
||||
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
|
||||
|
||||
@@ -12,9 +12,6 @@ on:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'v5.*'
|
||||
paths:
|
||||
- 'api/**'
|
||||
- '.github/workflows/api-container-checks.yml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -134,5 +131,5 @@ jobs:
|
||||
with:
|
||||
image-name: ${{ env.IMAGE_NAME }}
|
||||
image-tag: ${{ github.sha }}
|
||||
fail-on-critical: 'false'
|
||||
fail-on-critical: 'true'
|
||||
severity: 'CRITICAL'
|
||||
|
||||
@@ -16,13 +16,6 @@ on:
|
||||
branches:
|
||||
- "master"
|
||||
- "v5.*"
|
||||
paths:
|
||||
- 'api/**'
|
||||
- '.github/workflows/api-tests.yml'
|
||||
- '.github/workflows/api-security.yml'
|
||||
- '.github/actions/setup-python-uv/**'
|
||||
- '.github/actions/osv-scanner/**'
|
||||
- '.github/scripts/osv-scan.sh'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
@@ -29,10 +29,11 @@ jobs:
|
||||
with:
|
||||
# We can't block as Trufflehog needs to verify secrets against vendors
|
||||
egress-policy: audit
|
||||
# allowed-endpoints: >
|
||||
# github.com:443
|
||||
# ghcr.io:443
|
||||
# pkg-containers.githubusercontent.com:443
|
||||
allowed-endpoints: >
|
||||
github.com:443
|
||||
ghcr.io:443
|
||||
pkg-containers.githubusercontent.com:443
|
||||
www.formbucket.com:443
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -263,27 +263,3 @@ jobs:
|
||||
payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
|
||||
step-outcome: ${{ steps.outcome.outputs.outcome }}
|
||||
update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
|
||||
|
||||
trigger-deployment:
|
||||
needs: [setup, container-build-push]
|
||||
if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
|
||||
with:
|
||||
egress-policy: block
|
||||
allowed-endpoints: >
|
||||
api.github.com:443
|
||||
|
||||
- name: Trigger MCP deployment
|
||||
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
|
||||
with:
|
||||
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
|
||||
repository: ${{ secrets.CLOUD_DISPATCH }}
|
||||
event-type: mcp-prowler-deployment
|
||||
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
|
||||
|
||||
@@ -12,9 +12,6 @@ on:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'v5.*'
|
||||
paths:
|
||||
- 'mcp_server/**'
|
||||
- '.github/workflows/mcp-container-checks.yml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -127,5 +124,5 @@ jobs:
|
||||
with:
|
||||
image-name: ${{ env.IMAGE_NAME }}
|
||||
image-tag: ${{ github.sha }}
|
||||
fail-on-critical: 'false'
|
||||
fail-on-critical: 'true'
|
||||
severity: 'CRITICAL'
|
||||
|
||||
@@ -15,12 +15,6 @@ on:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'v5.*'
|
||||
paths:
|
||||
- 'mcp_server/pyproject.toml'
|
||||
- 'mcp_server/uv.lock'
|
||||
- '.github/workflows/mcp-security.yml'
|
||||
- '.github/actions/osv-scanner/**'
|
||||
- '.github/scripts/osv-scan.sh'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -30,7 +24,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
mcp-security-scans:
|
||||
if: github.repository == 'prowler-cloud/prowler'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
permissions:
|
||||
|
||||
@@ -29,6 +29,7 @@ jobs:
|
||||
- '3.10'
|
||||
- '3.11'
|
||||
- '3.12'
|
||||
- '3.13'
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
|
||||
@@ -15,12 +15,6 @@ on:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'v5.*'
|
||||
paths:
|
||||
- 'prowler/**'
|
||||
- 'Dockerfile*'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/sdk-container-checks.yml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -111,25 +105,14 @@ jobs:
|
||||
id: check-changes
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: ./**
|
||||
files: |
|
||||
prowler/**
|
||||
Dockerfile*
|
||||
pyproject.toml
|
||||
uv.lock
|
||||
.github/workflows/sdk-container-checks.yml
|
||||
files_ignore: |
|
||||
.github/**
|
||||
prowler/CHANGELOG.md
|
||||
docs/**
|
||||
permissions/**
|
||||
api/**
|
||||
ui/**
|
||||
dashboard/**
|
||||
mcp_server/**
|
||||
skills/**
|
||||
README.md
|
||||
mkdocs.yml
|
||||
.backportrc.json
|
||||
.env
|
||||
docker-compose*
|
||||
examples/**
|
||||
.gitignore
|
||||
contrib/**
|
||||
**/AGENTS.md
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
@@ -153,5 +136,5 @@ jobs:
|
||||
with:
|
||||
image-name: ${{ env.IMAGE_NAME }}
|
||||
image-tag: ${{ github.sha }}
|
||||
fail-on-critical: 'false'
|
||||
fail-on-critical: 'true'
|
||||
severity: 'CRITICAL'
|
||||
|
||||
@@ -19,16 +19,6 @@ on:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'v5.*'
|
||||
paths:
|
||||
- 'prowler/**'
|
||||
- 'tests/**'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/sdk-tests.yml'
|
||||
- '.github/workflows/sdk-security.yml'
|
||||
- '.github/actions/setup-python-uv/**'
|
||||
- '.github/actions/osv-scanner/**'
|
||||
- '.github/scripts/osv-scan.sh'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -71,27 +61,18 @@ jobs:
|
||||
id: check-changes
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files:
|
||||
./**
|
||||
files: |
|
||||
prowler/**
|
||||
tests/**
|
||||
pyproject.toml
|
||||
uv.lock
|
||||
.github/workflows/sdk-tests.yml
|
||||
.github/workflows/sdk-security.yml
|
||||
.github/actions/setup-python-uv/**
|
||||
.github/actions/osv-scanner/**
|
||||
.github/scripts/osv-scan.sh
|
||||
files_ignore: |
|
||||
.github/**
|
||||
prowler/CHANGELOG.md
|
||||
docs/**
|
||||
permissions/**
|
||||
api/**
|
||||
ui/**
|
||||
dashboard/**
|
||||
mcp_server/**
|
||||
skills/**
|
||||
README.md
|
||||
mkdocs.yml
|
||||
.backportrc.json
|
||||
.env
|
||||
docker-compose*
|
||||
examples/**
|
||||
.gitignore
|
||||
contrib/**
|
||||
**/AGENTS.md
|
||||
|
||||
- name: Setup Python with uv
|
||||
|
||||
@@ -29,6 +29,7 @@ jobs:
|
||||
- '3.10'
|
||||
- '3.11'
|
||||
- '3.12'
|
||||
- '3.13'
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
@@ -540,7 +541,7 @@ jobs:
|
||||
with:
|
||||
flags: prowler-py${{ matrix.python-version }}-vercel
|
||||
files: ./vercel_coverage.xml
|
||||
|
||||
|
||||
# Scaleway Provider
|
||||
- name: Check if Scaleway files changed
|
||||
if: steps.check-changes.outputs.any_changed == 'true'
|
||||
@@ -588,7 +589,31 @@ jobs:
|
||||
with:
|
||||
flags: prowler-py${{ matrix.python-version }}-stackit
|
||||
files: ./stackit_coverage.xml
|
||||
|
||||
|
||||
# Linode Provider
|
||||
- name: Check if Linode files changed
|
||||
if: steps.check-changes.outputs.any_changed == 'true'
|
||||
id: changed-linode
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: |
|
||||
./prowler/**/linode/**
|
||||
./tests/**/linode/**
|
||||
./uv.lock
|
||||
|
||||
- name: Run Linode tests
|
||||
if: steps.changed-linode.outputs.any_changed == 'true'
|
||||
run: uv run pytest -n auto --cov=./prowler/providers/linode --cov-report=xml:linode_coverage.xml tests/providers/linode
|
||||
|
||||
- name: Upload Linode coverage to Codecov
|
||||
if: steps.changed-linode.outputs.any_changed == 'true'
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
with:
|
||||
flags: prowler-py${{ matrix.python-version }}-linode
|
||||
files: ./linode_coverage.xml
|
||||
|
||||
# External Provider (dynamic loading)
|
||||
- name: Check if External Provider files changed
|
||||
if: steps.check-changes.outputs.any_changed == 'true'
|
||||
@@ -608,14 +633,14 @@ jobs:
|
||||
|
||||
- name: Upload External Provider coverage to Codecov
|
||||
if: steps.changed-external.outputs.any_changed == 'true'
|
||||
|
||||
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
with:
|
||||
flags: prowler-py${{ matrix.python-version }}-external
|
||||
files: ./external_coverage.xml
|
||||
|
||||
|
||||
# Lib
|
||||
- name: Check if Lib files changed
|
||||
if: steps.check-changes.outputs.any_changed == 'true'
|
||||
|
||||
@@ -32,9 +32,6 @@ env:
|
||||
PROWLERCLOUD_DOCKERHUB_REPOSITORY: prowlercloud
|
||||
PROWLERCLOUD_DOCKERHUB_IMAGE: prowler-ui
|
||||
|
||||
# Build args
|
||||
NEXT_PUBLIC_API_BASE_URL: http://prowler-api:8080/api/v1
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
@@ -146,7 +143,6 @@ jobs:
|
||||
context: ${{ env.WORKING_DIRECTORY }}
|
||||
build-args: |
|
||||
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=${{ (github.event_name == 'release' || github.event_name == 'workflow_dispatch') && format('v{0}', env.RELEASE_TAG) || needs.setup.outputs.short-sha }}
|
||||
NEXT_PUBLIC_API_BASE_URL=${{ env.NEXT_PUBLIC_API_BASE_URL }}
|
||||
push: true
|
||||
platforms: ${{ matrix.platform }}
|
||||
tags: |
|
||||
@@ -262,27 +258,3 @@ jobs:
|
||||
payload-file-path: "./.github/scripts/slack-messages/container-release-completed.json"
|
||||
step-outcome: ${{ steps.outcome.outputs.outcome }}
|
||||
update-ts: ${{ needs.notify-release-started.outputs.message-ts }}
|
||||
|
||||
trigger-deployment:
|
||||
needs: [setup, container-build-push]
|
||||
if: always() && github.event_name == 'push' && needs.setup.result == 'success' && needs.container-build-push.result == 'success'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
|
||||
with:
|
||||
egress-policy: block
|
||||
allowed-endpoints: >
|
||||
api.github.com:443
|
||||
|
||||
- name: Trigger UI deployment
|
||||
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
|
||||
with:
|
||||
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
|
||||
repository: ${{ secrets.CLOUD_DISPATCH }}
|
||||
event-type: ui-prowler-deployment
|
||||
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ needs.setup.outputs.short-sha }}"}'
|
||||
|
||||
@@ -12,9 +12,6 @@ on:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'v5.*'
|
||||
paths:
|
||||
- 'ui/**'
|
||||
- '.github/workflows/ui-container-checks.yml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -132,5 +129,5 @@ jobs:
|
||||
with:
|
||||
image-name: ${{ env.IMAGE_NAME }}
|
||||
image-tag: ${{ github.sha }}
|
||||
fail-on-critical: 'false'
|
||||
fail-on-critical: 'true'
|
||||
severity: 'CRITICAL'
|
||||
|
||||
@@ -81,7 +81,8 @@ jobs:
|
||||
AUTH_SECRET: 'fallback-ci-secret-for-testing'
|
||||
AUTH_TRUST_HOST: true
|
||||
NEXTAUTH_URL: 'http://localhost:3000'
|
||||
NEXT_PUBLIC_API_BASE_URL: 'http://localhost:8080/api/v1'
|
||||
AUTH_URL: 'http://localhost:3000'
|
||||
UI_API_BASE_URL: 'http://localhost:8080/api/v1'
|
||||
E2E_ADMIN_USER: ${{ secrets.E2E_ADMIN_USER }}
|
||||
E2E_ADMIN_PASSWORD: ${{ secrets.E2E_ADMIN_PASSWORD }}
|
||||
E2E_AWS_PROVIDER_ACCOUNT_ID: ${{ secrets.E2E_AWS_PROVIDER_ACCOUNT_ID }}
|
||||
@@ -118,6 +119,14 @@ jobs:
|
||||
E2E_ALIBABACLOUD_ACCESS_KEY_ID: ${{ secrets.E2E_ALIBABACLOUD_ACCESS_KEY_ID }}
|
||||
E2E_ALIBABACLOUD_ACCESS_KEY_SECRET: ${{ secrets.E2E_ALIBABACLOUD_ACCESS_KEY_SECRET }}
|
||||
E2E_ALIBABACLOUD_ROLE_ARN: ${{ secrets.E2E_ALIBABACLOUD_ROLE_ARN }}
|
||||
E2E_OKTA_DOMAIN: ${{ secrets.E2E_OKTA_DOMAIN }}
|
||||
E2E_OKTA_CLIENT_ID: ${{ secrets.E2E_OKTA_CLIENT_ID }}
|
||||
E2E_OKTA_BASE64_PRIVATE_KEY: ${{ secrets.E2E_OKTA_BASE64_PRIVATE_KEY }}
|
||||
E2E_GOOGLEWORKSPACE_CUSTOMER_ID: ${{ secrets.E2E_GOOGLEWORKSPACE_CUSTOMER_ID }}
|
||||
E2E_GOOGLEWORKSPACE_SERVICE_ACCOUNT_JSON: ${{ secrets.E2E_GOOGLEWORKSPACE_SERVICE_ACCOUNT_JSON }}
|
||||
E2E_GOOGLEWORKSPACE_DELEGATED_USER: ${{ secrets.E2E_GOOGLEWORKSPACE_DELEGATED_USER }}
|
||||
E2E_VERCEL_TEAM_ID: ${{ secrets.E2E_VERCEL_TEAM_ID }}
|
||||
E2E_VERCEL_API_TOKEN: ${{ secrets.E2E_VERCEL_API_TOKEN }}
|
||||
# Pass E2E paths from impact analysis
|
||||
E2E_TEST_PATHS: ${{ needs.impact-analysis.outputs.ui-e2e }}
|
||||
RUN_ALL_TESTS: ${{ needs.impact-analysis.outputs.run-all }}
|
||||
@@ -198,7 +207,7 @@ jobs:
|
||||
timeout=150
|
||||
elapsed=0
|
||||
while [ $elapsed -lt $timeout ]; do
|
||||
if curl -s ${NEXT_PUBLIC_API_BASE_URL}/docs >/dev/null 2>&1; then
|
||||
if curl -s ${UI_API_BASE_URL}/docs >/dev/null 2>&1; then
|
||||
echo "Prowler API is ready!"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
@@ -15,12 +15,6 @@ on:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'v5.*'
|
||||
paths:
|
||||
- 'ui/package.json'
|
||||
- 'ui/pnpm-lock.yaml'
|
||||
- '.github/workflows/ui-security.yml'
|
||||
- '.github/actions/osv-scanner/**'
|
||||
- '.github/scripts/osv-scan.sh'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -30,7 +24,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
ui-security-scans:
|
||||
if: github.repository == 'prowler-cloud/prowler'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
permissions:
|
||||
|
||||
@@ -131,6 +131,10 @@ jobs:
|
||||
if: steps.check-changes.outputs.any_changed == 'true'
|
||||
run: pnpm run healthcheck
|
||||
|
||||
- name: Check product-tour alignment
|
||||
if: steps.check-changes.outputs.any_changed == 'true'
|
||||
run: pnpm run tour:check
|
||||
|
||||
- name: Run pnpm audit
|
||||
if: steps.check-changes.outputs.any_changed == 'true'
|
||||
run: pnpm run audit
|
||||
|
||||
@@ -169,3 +169,7 @@ GEMINI.md
|
||||
|
||||
# Claude Code
|
||||
.claude/*
|
||||
|
||||
# Docker
|
||||
docker-compose.override.yml
|
||||
docker-compose-dev.override.yml
|
||||
|
||||
+36
-9
@@ -7,6 +7,10 @@
|
||||
# P50 — dependency validation
|
||||
|
||||
default_install_hook_types: [pre-commit]
|
||||
# Hooks run on commit only by default;
|
||||
# NOTE: default_stages does NOT override a hook's manifest stages, so fixers shipping pre-push in their
|
||||
# manifest need an explicit stages: ["pre-commit"] below to stay off push.
|
||||
default_stages: [pre-commit]
|
||||
|
||||
repos:
|
||||
## GENERAL (prek built-in — no external repo needed)
|
||||
@@ -21,13 +25,16 @@ repos:
|
||||
- id: check-json
|
||||
priority: 10
|
||||
- id: end-of-file-fixer
|
||||
stages: ["pre-commit"]
|
||||
priority: 0
|
||||
- id: trailing-whitespace
|
||||
stages: ["pre-commit"]
|
||||
priority: 0
|
||||
- id: no-commit-to-branch
|
||||
priority: 10
|
||||
- id: pretty-format-json
|
||||
args: ["--autofix", --no-sort-keys, --no-ensure-ascii]
|
||||
stages: ["pre-commit"]
|
||||
priority: 10
|
||||
|
||||
## TOML
|
||||
@@ -82,6 +89,7 @@ repos:
|
||||
name: "SDK - isort"
|
||||
files: { glob: ["{prowler,tests,dashboard,util,scripts}/**/*.py"] }
|
||||
args: ["--profile", "black"]
|
||||
stages: ["pre-commit"]
|
||||
priority: 20
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
@@ -102,17 +110,36 @@ repos:
|
||||
priority: 30
|
||||
|
||||
## PYTHON — API + MCP Server (ruff)
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.15.11
|
||||
# Run ruff through `uv run` against each project so prek uses the exact ruff
|
||||
# version pinned in that project's uv.lock — the same version GitHub Actions
|
||||
# runs via `uv run ruff`. This removes the drift between the local hooks and
|
||||
# CI. api/ and mcp_server/ are separate uv projects, so they need separate
|
||||
# hooks (each `uv run --project` resolves its own pinned ruff + config).
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: "API + MCP - ruff check"
|
||||
files: { glob: ["{api,mcp_server}/**/*.py"] }
|
||||
args: ["--fix"]
|
||||
- id: ruff-check-api
|
||||
name: "API - ruff check"
|
||||
entry: uv run --project ./api ruff check --fix
|
||||
language: system
|
||||
files: { glob: ["api/**/*.py"] }
|
||||
priority: 30
|
||||
- id: ruff-format
|
||||
name: "API + MCP - ruff format"
|
||||
files: { glob: ["{api,mcp_server}/**/*.py"] }
|
||||
- id: ruff-format-api
|
||||
name: "API - ruff format"
|
||||
entry: uv run --project ./api ruff format
|
||||
language: system
|
||||
files: { glob: ["api/**/*.py"] }
|
||||
priority: 20
|
||||
- id: ruff-check-mcp
|
||||
name: "MCP - ruff check"
|
||||
entry: uv run --project ./mcp_server ruff check --fix
|
||||
language: system
|
||||
files: { glob: ["mcp_server/**/*.py"] }
|
||||
priority: 30
|
||||
- id: ruff-format-mcp
|
||||
name: "MCP - ruff format"
|
||||
entry: uv run --project ./mcp_server ruff format
|
||||
language: system
|
||||
files: { glob: ["mcp_server/**/*.py"] }
|
||||
priority: 20
|
||||
|
||||
## PYTHON — uv (API + SDK)
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
# Trivy ignore file for prowlercloud/prowler SDK container image.
|
||||
# Each entry below documents (a) the affected package and why it ships in the
|
||||
# image, (b) why the CVE is not exploitable in Prowler's runtime, and (c) the
|
||||
# upstream fix status. Entries carry an expiry so they auto-force re-review.
|
||||
# Entries are scoped per-package so suppressions cannot drift onto unrelated
|
||||
# packages that may be assigned the same CVE in the future.
|
||||
#
|
||||
# Scanned by: .github/actions/trivy-scan via .github/workflows/sdk-container-checks.yml
|
||||
|
||||
# CVE-2026-42496 — perl-archive-tar path traversal via crafted symlinks.
|
||||
# CVE-2026-8376 — perl heap buffer overflow when compiling regex.
|
||||
# Packages: perl, perl-base, perl-modules-5.36, libperl5.36.
|
||||
# Why ignored: perl-base is part of Debian's "Essential: yes" set; it cannot be
|
||||
# removed without breaking dpkg. The Prowler SDK does not invoke perl at runtime;
|
||||
# neither vulnerable code path (Archive::Tar parsing or regex compilation of
|
||||
# attacker-controlled input) is reachable from Prowler. No Debian bookworm fix
|
||||
# is available yet.
|
||||
CVE-2026-42496 pkg:perl exp:2026-07-15
|
||||
CVE-2026-42496 pkg:perl-base exp:2026-07-15
|
||||
CVE-2026-42496 pkg:perl-modules-5.36 exp:2026-07-15
|
||||
CVE-2026-42496 pkg:libperl5.36 exp:2026-07-15
|
||||
CVE-2026-8376 pkg:perl exp:2026-07-15
|
||||
CVE-2026-8376 pkg:perl-base exp:2026-07-15
|
||||
CVE-2026-8376 pkg:perl-modules-5.36 exp:2026-07-15
|
||||
CVE-2026-8376 pkg:libperl5.36 exp:2026-07-15
|
||||
|
||||
# CVE-2025-7458 — SQLite integer overflow.
|
||||
# Package: libsqlite3-0.
|
||||
# Why ignored: transitive dependency of CPython's stdlib sqlite3 module. The
|
||||
# Prowler SDK does not open user-supplied SQLite databases; SQLite usage is
|
||||
# internal and bounded. No Debian bookworm fix is available.
|
||||
CVE-2025-7458 pkg:libsqlite3-0 exp:2026-07-15
|
||||
|
||||
# CVE-2026-43185 — Linux kernel ksmbd signedness bug.
|
||||
# Package: linux-libc-dev.
|
||||
# Why ignored: linux-libc-dev ships kernel headers for build-time compilation,
|
||||
# not a running kernel. Containers execute against the host kernel, so these
|
||||
# headers are inert at runtime. The upstream fix landed in kernel 7.0-rc2 and
|
||||
# has not been backported to Debian's 6.1 LTS line.
|
||||
CVE-2026-43185 pkg:linux-libc-dev exp:2026-07-15
|
||||
|
||||
# CVE-2023-45853 — zlib MiniZip integer overflow / heap overflow in
|
||||
# zipOpenNewFileInZip4_64.
|
||||
# Packages: zlib1g, zlib1g-dev.
|
||||
# Why ignored: Debian Security Tracker status for bookworm is <ignored>, with
|
||||
# the published rationale "contrib/minizip not built and src:zlib not producing
|
||||
# binary packages" — i.e. the vulnerable symbol is not present in the libz.so
|
||||
# shipped by Debian. Real-not-affected, not unpatched. Upstream fix is in
|
||||
# zlib 1.3.1, available in Debian trixie (13); migrating the base image would
|
||||
# clear it fully.
|
||||
# Ref: https://security-tracker.debian.org/tracker/CVE-2023-45853
|
||||
CVE-2023-45853 pkg:zlib1g exp:2026-07-15
|
||||
CVE-2023-45853 pkg:zlib1g-dev exp:2026-07-15
|
||||
|
||||
# CVE-2026-55200 — libssh2 out-of-bounds write in ssh2_transport_read() due to
|
||||
# an unchecked packet_length field in transport.c (heap corruption, possible RCE).
|
||||
# Package: libssh2-1.
|
||||
# Why ignored: libssh2-1 is pulled in only as a transitive dependency of libcurl4
|
||||
# (installed in the SDK Dockerfile for the networking/PowerShell stack). The
|
||||
# vulnerable path is reached exclusively when libssh2 acts as an SSH/SCP/SFTP
|
||||
# client parsing transport packets from a server. Prowler never uses libcurl's
|
||||
# SSH/SCP/SFTP transports; it talks to cloud provider HTTPS endpoints only, so the
|
||||
# affected code is unreachable at runtime. Fixed upstream in libssh2 commit
|
||||
# 97acf3df (PR #2052); no Debian bookworm fix is available yet.
|
||||
# Ref: https://security-tracker.debian.org/tracker/CVE-2026-55200
|
||||
CVE-2026-55200 pkg:libssh2-1 exp:2026-07-15
|
||||
|
||||
# --- API container image (api/Dockerfile) ---
|
||||
# The entries below are specific to the Prowler API image, which ships
|
||||
# PowerShell and additional build tooling on top of the same bookworm base.
|
||||
|
||||
# CVE-2026-7210 — CPython/Expat hash-flooding denial of service in
|
||||
# `xml.parsers.expat` and `xml.etree.ElementTree`.
|
||||
# Packages: the Debian system Python 3.11 (python3.11*, libpython3.11*).
|
||||
# Why ignored: the API runs under the Python 3.12 interpreter shipped in its
|
||||
# `.venv`; the system `python3.11` is only present because `python3-dev` is
|
||||
# pulled in to compile native extensions (xmlsec, lxml) and is never executed
|
||||
# at runtime. The vulnerable path requires parsing attacker-controlled XML with
|
||||
# the affected interpreter, which Prowler does not do with the system Python.
|
||||
# Full mitigation also needs libexpat >= 2.8.0; no Debian bookworm fix yet.
|
||||
CVE-2026-7210 pkg:python3.11 exp:2026-07-15
|
||||
CVE-2026-7210 pkg:python3.11-dev exp:2026-07-15
|
||||
CVE-2026-7210 pkg:python3.11-minimal exp:2026-07-15
|
||||
CVE-2026-7210 pkg:libpython3.11 exp:2026-07-15
|
||||
CVE-2026-7210 pkg:libpython3.11-dev exp:2026-07-15
|
||||
CVE-2026-7210 pkg:libpython3.11-minimal exp:2026-07-15
|
||||
CVE-2026-7210 pkg:libpython3.11-stdlib exp:2026-07-15
|
||||
|
||||
# CVE-2026-33278 — Unbound DNSSEC validator use-after-free (DoS, possible RCE).
|
||||
# CVE-2026-42960 — Unbound DNS cache poisoning via promiscuous additional records.
|
||||
# Package: libunbound8.
|
||||
# Why ignored: libunbound8 is a transitive apt dependency of the TLS/networking
|
||||
# stack (GnuTLS DANE support); only the shared library ships in the image. Both
|
||||
# vulnerabilities require operating a live Unbound recursive DNSSEC validator
|
||||
# that processes attacker-influenced DNS responses. Prowler never starts an
|
||||
# Unbound resolver, so neither code path is reachable. No Debian bookworm fix yet.
|
||||
CVE-2026-33278 pkg:libunbound8 exp:2026-07-15
|
||||
CVE-2026-42960 pkg:libunbound8 exp:2026-07-15
|
||||
@@ -51,6 +51,7 @@ Use these skills for detailed patterns on-demand:
|
||||
| `django-migration-psql` | Django migration best practices for PostgreSQL | [SKILL.md](skills/django-migration-psql/SKILL.md) |
|
||||
| `postgresql-indexing` | PostgreSQL indexing, EXPLAIN, monitoring, maintenance | [SKILL.md](skills/postgresql-indexing/SKILL.md) |
|
||||
| `prowler-attack-paths-query` | Create Attack Paths openCypher queries | [SKILL.md](skills/prowler-attack-paths-query/SKILL.md) |
|
||||
| `prowler-tour` | Keep product-tour definitions aligned with the UI | [SKILL.md](skills/prowler-tour/SKILL.md) |
|
||||
| `gh-aw` | GitHub Agentic Workflows (gh-aw) | [SKILL.md](skills/gh-aw/SKILL.md) |
|
||||
| `skill-creator` | Create new AI agent skills | [SKILL.md](skills/skill-creator/SKILL.md) |
|
||||
|
||||
@@ -67,10 +68,12 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
|
||||
| Adding new providers | `prowler-provider` |
|
||||
| Adding privilege escalation detection queries | `prowler-attack-paths-query` |
|
||||
| Adding services to existing providers | `prowler-provider` |
|
||||
| Adding, updating, or removing a tour definition (*.tour.ts) | `prowler-tour` |
|
||||
| After creating/modifying a skill | `skill-sync` |
|
||||
| App Router / Server Actions | `nextjs-16` |
|
||||
| Auditing check-to-requirement mappings as a cloud auditor | `prowler-compliance` |
|
||||
| Building AI chat features | `ai-sdk-5` |
|
||||
| Changing button labels or section headings on a tour-covered page | `prowler-tour` |
|
||||
| Committing changes | `prowler-commit` |
|
||||
| Configuring MCP servers in agentic workflows | `gh-aw` |
|
||||
| Create PR that requires changelog entry | `prowler-changelog` |
|
||||
@@ -89,6 +92,7 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
|
||||
| Creating/updating compliance frameworks | `prowler-compliance` |
|
||||
| Debug why a GitHub Actions job is failing | `prowler-ci` |
|
||||
| Debugging gh-aw compilation errors | `gh-aw` |
|
||||
| Editing a UI file containing data-tour-id attributes | `prowler-tour` |
|
||||
| Fill .github/pull_request_template.md (Context/Description/Steps to review/Checklist) | `prowler-pr` |
|
||||
| Fixing bug | `tdd` |
|
||||
| Fixing compliance JSON bugs (duplicate IDs, empty Section, stale refs) | `prowler-compliance` |
|
||||
@@ -105,6 +109,8 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
|
||||
| Modifying gh-aw workflow frontmatter or safe-outputs | `gh-aw` |
|
||||
| Refactoring code | `tdd` |
|
||||
| Regenerate AGENTS.md Auto-invoke tables (sync.sh) | `skill-sync` |
|
||||
| Renaming or removing a data-tour-id attribute value | `prowler-tour` |
|
||||
| Restructuring routes or layouts covered by a tour | `prowler-tour` |
|
||||
| Review PR requirements: template, title conventions, changelog gate | `prowler-pr` |
|
||||
| Review changelog format and conventions | `prowler-changelog` |
|
||||
| Reviewing JSON:API compliance | `jsonapi` |
|
||||
|
||||
+14
-2
@@ -1,4 +1,4 @@
|
||||
FROM python:3.12.11-slim-bookworm@sha256:519591d6871b7bc437060736b9f7456b8731f1499a57e22e6c285135ae657bf7 AS build
|
||||
FROM python:3.12.13-slim-bookworm@sha256:76d4b7b6305788c6b4c6a19d6a22a3921bf802e9af4d5e1e5bd771208dba74bf AS build
|
||||
|
||||
LABEL maintainer="https://github.com/prowler-cloud/prowler"
|
||||
LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
|
||||
@@ -6,7 +6,7 @@ LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
|
||||
ARG POWERSHELL_VERSION=7.5.0
|
||||
ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}
|
||||
|
||||
ARG TRIVY_VERSION=0.70.0
|
||||
ARG TRIVY_VERSION=0.71.2
|
||||
ENV TRIVY_VERSION=${TRIVY_VERSION}
|
||||
|
||||
ARG ZIZMOR_VERSION=1.24.1
|
||||
@@ -95,6 +95,18 @@ RUN uv sync --locked --compile-bytecode && \
|
||||
# Install PowerShell modules
|
||||
RUN .venv/bin/python prowler/providers/m365/lib/powershell/m365_powershell.py
|
||||
|
||||
USER root
|
||||
|
||||
# Remove build-only packages from the final image after Python dependencies are installed.
|
||||
RUN apt-get purge -y --auto-remove \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
libzstd-dev \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
USER prowler
|
||||
|
||||
# Remove deprecated dash dependencies
|
||||
RUN pip uninstall dash-html-components -y && \
|
||||
pip uninstall dash-core-components -y
|
||||
|
||||
@@ -1,5 +1,34 @@
|
||||
.DEFAULT_GOAL:=help
|
||||
|
||||
DEV_LOCAL := ./scripts/development/dev-local.sh
|
||||
|
||||
.PHONY: dev dev-setup dev-attach dev-launch dev-stop dev-clean dev-wipe dev-status
|
||||
|
||||
##@ Local Development
|
||||
dev: ## Start local API, worker, and database logs
|
||||
$(DEV_LOCAL) all
|
||||
|
||||
dev-setup: ## Bootstrap local dependencies, migrations, and fixtures
|
||||
$(DEV_LOCAL) setup
|
||||
|
||||
dev-attach: ## Attach to the local tmux development session
|
||||
$(DEV_LOCAL) attach
|
||||
|
||||
dev-launch: ## Start the local stack on fixed ports and attach
|
||||
$(DEV_LOCAL) launch
|
||||
|
||||
dev-stop: ## Stop the local tmux session and containers
|
||||
$(DEV_LOCAL) kill
|
||||
|
||||
dev-clean: ## Remove stopped local development containers
|
||||
$(DEV_LOCAL) clean
|
||||
|
||||
dev-wipe: ## Stop everything and delete local development data
|
||||
$(DEV_LOCAL) wipe
|
||||
|
||||
dev-status: ## Show local development container status
|
||||
$(DEV_LOCAL) status
|
||||
|
||||
##@ Testing
|
||||
test: ## Test with pytest
|
||||
rm -rf .coverage && \
|
||||
@@ -16,18 +45,41 @@ coverage-html: ## Show Test Coverage
|
||||
coverage html && \
|
||||
open htmlcov/index.html
|
||||
|
||||
##@ Linting
|
||||
format: ## Format Code
|
||||
@echo "Running black..."
|
||||
black .
|
||||
##@ Code Quality
|
||||
# `make` is the single entrypoint and mirrors CI exactly (uv run + same flags):
|
||||
# SDK (prowler/, util/) -> flake8 + black + pylint
|
||||
# API & MCP server -> ruff (rules live in each project's pyproject.toml)
|
||||
# `format` applies fixes (incl. ruff's import/upgrade autofixes); `lint` only
|
||||
# verifies and is what CI gates on.
|
||||
.PHONY: format format-sdk format-api format-mcp lint lint-sdk lint-api lint-mcp
|
||||
|
||||
lint: ## Lint Code
|
||||
@echo "Running flake8..."
|
||||
flake8 . --ignore=E266,W503,E203,E501,W605,E128 --exclude .venv,contrib
|
||||
@echo "Running black... "
|
||||
black --check .
|
||||
@echo "Running pylint..."
|
||||
pylint --disable=W,C,R,E -j 0 prowler util
|
||||
format: format-sdk format-api format-mcp ## Format & autofix all components (SDK, API, MCP)
|
||||
|
||||
lint: lint-sdk lint-api lint-mcp ## Lint all components (SDK, API, MCP) — mirrors CI
|
||||
|
||||
format-sdk: ## Format SDK code (black)
|
||||
uv run black --exclude "\.venv|api|ui|skills|mcp_server" .
|
||||
|
||||
lint-sdk: ## Lint SDK code (flake8, black --check, pylint)
|
||||
uv run flake8 . --ignore=E266,W503,E203,E501,W605,E128 --exclude .venv,contrib,ui,api,skills,mcp_server
|
||||
uv run black --exclude "\.venv|api|ui|skills|mcp_server" --check .
|
||||
uv run pylint --disable=W,C,R,E -j 0 -rn -sn prowler/
|
||||
|
||||
format-api: ## Format & autofix API code (ruff)
|
||||
cd api && uv run ruff check . --exclude contrib --fix
|
||||
cd api && uv run ruff format . --exclude contrib
|
||||
|
||||
lint-api: ## Lint API code (ruff check + format --check)
|
||||
cd api && uv run ruff check . --exclude contrib
|
||||
cd api && uv run ruff format --check . --exclude contrib
|
||||
|
||||
format-mcp: ## Format & autofix MCP server code (ruff)
|
||||
cd mcp_server && uv run ruff check . --fix
|
||||
cd mcp_server && uv run ruff format .
|
||||
|
||||
lint-mcp: ## Lint MCP server code (ruff check + format --check)
|
||||
cd mcp_server && uv run ruff check .
|
||||
cd mcp_server && uv run ruff format --check .
|
||||
|
||||
##@ PyPI
|
||||
pypi-clean: ## Delete the distribution files
|
||||
|
||||
@@ -83,16 +83,35 @@ prowler dashboard
|
||||
|
||||
## Attack Paths
|
||||
|
||||
Attack Paths automatically extends every completed AWS scan with a Neo4j graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan and therefore requires:
|
||||
Attack Paths automatically extends every completed AWS scan with a graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan.
|
||||
|
||||
- An accessible Neo4j instance (the Docker Compose files already ships a `neo4j` service).
|
||||
- The following environment variables so Django and Celery can connect:
|
||||
Two graph backends are supported as the long-lived sink:
|
||||
|
||||
| Variable | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
|
||||
| `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
|
||||
| `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
|
||||
- **Neo4j** (default; the Docker Compose files already ship a `neo4j` service).
|
||||
- **Amazon Neptune** (cloud-managed; opt-in).
|
||||
|
||||
Select the sink with `ATTACK_PATHS_SINK_DATABASE` (`neo4j` or `neptune`; default `neo4j`).
|
||||
|
||||
> Note: Cartography ingestion always uses a temporary Neo4j database, regardless of the configured sink. The `NEO4J_*` variables below must remain set even when `ATTACK_PATHS_SINK_DATABASE=neptune`.
|
||||
|
||||
### Neo4j sink
|
||||
|
||||
| Variable | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
|
||||
| `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
|
||||
| `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
|
||||
|
||||
### Neptune sink
|
||||
|
||||
| Variable | Description | Default |
|
||||
| --- | --- | --- |
|
||||
| `NEPTUNE_WRITER_ENDPOINT` | Bolt host for the Neptune writer instance. Required when sink is `neptune`. | _empty_ |
|
||||
| `NEPTUNE_READER_ENDPOINT` | Optional reader endpoint for read-only queries. Falls back to the writer when unset. | _empty_ |
|
||||
| `NEPTUNE_PORT` | Bolt port exposed by Neptune. | `8182` |
|
||||
| `AWS_REGION` | Region the Neptune cluster lives in. Required when sink is `neptune`. | _empty_ |
|
||||
|
||||
Neptune authenticates with SigV4 using the standard boto3 credential chain. The worker's IAM role (or `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`) supplies the credentials. There is no Neptune password variable.
|
||||
|
||||
Every AWS provider scan will enqueue an Attack Paths ingestion job automatically. Other cloud providers will be added in future iterations.
|
||||
|
||||
@@ -104,26 +123,27 @@ Every AWS provider scan will enqueue an Attack Paths ingestion job automatically
|
||||
|
||||
| Provider | Checks | Services | [Compliance Frameworks](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/compliance/) | [Categories](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/misc/#categories) | Support | Interface |
|
||||
|---|---|---|---|---|---|---|
|
||||
| AWS | 600 | 84 | 44 | 18 | Official | UI, API, CLI |
|
||||
| Azure | 167 | 22 | 19 | 16 | Official | UI, API, CLI |
|
||||
| GCP | 102 | 18 | 17 | 12 | Official | UI, API, CLI |
|
||||
| Kubernetes | 83 | 7 | 7 | 11 | Official | UI, API, CLI |
|
||||
| GitHub | 24 | 3 | 1 | 5 | Official | UI, API, CLI |
|
||||
| M365 | 102 | 10 | 4 | 10 | Official | UI, API, CLI |
|
||||
| OCI | 51 | 14 | 4 | 10 | Official | UI, API, CLI |
|
||||
| Alibaba Cloud | 63 | 9 | 4 | 9 | Official | UI, API, CLI |
|
||||
| Cloudflare | 29 | 3 | 0 | 5 | Official | UI, API, CLI |
|
||||
| AWS | 615 | 86 | 47 | 19 | Official | UI, API, CLI |
|
||||
| Azure | 190 | 22 | 21 | 16 | Official | UI, API, CLI |
|
||||
| GCP | 109 | 20 | 19 | 12 | Official | UI, API, CLI |
|
||||
| Kubernetes | 90 | 7 | 8 | 11 | Official | UI, API, CLI |
|
||||
| GitHub | 24 | 3 | 2 | 5 | Official | UI, API, CLI |
|
||||
| M365 | 109 | 10 | 6 | 10 | Official | UI, API, CLI |
|
||||
| OCI | 52 | 14 | 5 | 10 | Official | UI, API, CLI |
|
||||
| Alibaba Cloud | 63 | 9 | 6 | 9 | Official | UI, API, CLI |
|
||||
| Cloudflare | 29 | 3 | 2 | 5 | Official | UI, API, CLI |
|
||||
| IaC | [See `trivy` docs.](https://trivy.dev/latest/docs/coverage/iac/) | N/A | N/A | N/A | Official | UI, API, CLI |
|
||||
| MongoDB Atlas | 10 | 3 | 0 | 8 | Official | UI, API, CLI |
|
||||
| MongoDB Atlas | 10 | 3 | 1 | 8 | Official | UI, API, CLI |
|
||||
| LLM | [See `promptfoo` docs.](https://www.promptfoo.dev/docs/red-team/plugins/) | N/A | N/A | N/A | Official | CLI |
|
||||
| Image | N/A | N/A | N/A | N/A | Official | CLI, API |
|
||||
| Google Workspace | 39 | 5 | 2 | 5 | Official | UI, API, CLI |
|
||||
| OpenStack | 34 | 5 | 0 | 9 | Official | UI, API, CLI |
|
||||
| Vercel | 26 | 6 | 0 | 8 | Official | UI, API, CLI |
|
||||
| Okta | 1 | 1 | 0 | 1 | Official | CLI |
|
||||
| Scaleway [Contact us](https://prowler.com/contact) | 1 | 1 | 0 | 1 | Unofficial | CLI |
|
||||
| StackIT [Contact us](https://prowler.com/contact) | 7 | 2 | 0 | 3 | Unofficial | CLI |
|
||||
| NHN | 6 | 2 | 1 | 0 | Unofficial | CLI |
|
||||
| Google Workspace | 65 | 11 | 3 | 6 | Official | UI, API, CLI |
|
||||
| OpenStack | 34 | 5 | 1 | 9 | Official | UI, API, CLI |
|
||||
| Vercel | 26 | 6 | 1 | 8 | Official | UI, API, CLI |
|
||||
| Okta | 29 | 8 | 2 | 2 | Official | UI, API, CLI |
|
||||
| Linode [Contact us](https://prowler.com/contact) | 10 | 3 | 1 | 4 | Unofficial | CLI |
|
||||
| Scaleway [Contact us](https://prowler.com/contact) | 1 | 1 | 1 | 1 | Unofficial | CLI |
|
||||
| StackIT [Contact us](https://prowler.com/contact) | 7 | 2 | 1 | 3 | Unofficial | CLI |
|
||||
| NHN | 6 | 2 | 2 | 0 | Unofficial | CLI |
|
||||
|
||||
> [!Note]
|
||||
> The numbers in the table are updated periodically.
|
||||
|
||||
@@ -24,6 +24,9 @@ DJANGO_THROTTLE_TOKEN_OBTAIN=50/minute
|
||||
# Decide whether to allow Django manage database table partitions
|
||||
DJANGO_MANAGE_DB_PARTITIONS=[True|False]
|
||||
DJANGO_CELERY_DEADLOCK_ATTEMPTS=5
|
||||
# Optional: bound Celery's prefork pool size. Unset → Celery uses os.cpu_count().
|
||||
# Useful on Kubernetes nodes with many CPUs where unbounded prefork balloons memory.
|
||||
# DJANGO_CELERY_WORKER_CONCURRENCY=4
|
||||
DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
|
||||
DJANGO_SENTRY_DSN=
|
||||
|
||||
|
||||
@@ -2,6 +2,76 @@
|
||||
|
||||
All notable changes to the **Prowler API** are documented in this file.
|
||||
|
||||
## [1.33.0] (Prowler UNRELEASED)
|
||||
|
||||
### 🔄 Changed
|
||||
|
||||
- Attack Paths: AWS Neptune is now supported as a persistent sink database, selectable via `ATTACK_PATHS_SINK_DATABASE=neptune` (default `neo4j`), Cartography's (bumped to 0.138.1) per-scan ingest database stays on Neo4j [(#11524)](https://github.com/prowler-cloud/prowler/pull/11524)
|
||||
- Attack Paths: Scan task now checks the ingest Neo4j database and configured graph sink before starting graph ingestion [(#11743)](https://github.com/prowler-cloud/prowler/pull/11743)
|
||||
|
||||
---
|
||||
|
||||
## [1.32.2] (Prowler UNRELEASED)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
- `scan-perform` no longer reports an error when a provider is deleted during a running scan [(#11696)](https://github.com/prowler-cloud/prowler/pull/11696)
|
||||
|
||||
---
|
||||
|
||||
## [1.32.1] (Prowler v5.31.1)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
- API key auth no longer mutates `TenantAPIKey.objects` during admin DB lookups [(#11686)](https://github.com/prowler-cloud/prowler/pull/11686)
|
||||
|
||||
---
|
||||
|
||||
## [1.32.0] (Prowler v5.31.0)
|
||||
|
||||
### 🚀 Added
|
||||
|
||||
- Provider group filters for API endpoints that support cloud provider filtering, including exact and `__in` variants [(#11573)](https://github.com/prowler-cloud/prowler/pull/11573)
|
||||
- Provider filters for `GET /api/v1/compliance-overviews`, `/metadata`, and `/requirements`, using latest completed scans per matching provider [(#11587)](https://github.com/prowler-cloud/prowler/pull/11587)
|
||||
- Server-Sent Events (SSE) infrastructure for the API: a base viewset, a tenant-aware channel manager, and channel-name helpers backed by `django-eventstream` over Valkey Pub/Sub and served through the Gunicorn ASGI worker, so feature endpoints can stream events to clients over a single long-lived connection [(#11556)](https://github.com/prowler-cloud/prowler/pull/11556)
|
||||
- `DJANGO_CELERY_WORKER_CONCURRENCY` to configure Celery workers concurrency. Unset for default behaviour [(#11075)](https://github.com/prowler-cloud/prowler/pull/11075)
|
||||
|
||||
### 🔄 Changed
|
||||
|
||||
- Gunicorn worker timeout raised from the 30s default to 120s, so long-running requests are no longer killed prematurely [(#11631)](https://github.com/prowler-cloud/prowler/pull/11631)
|
||||
- Sentry now drops ASGI's `RequestAborted` errors from health-check probe disconnects on `/health/live` [(#11632)](https://github.com/prowler-cloud/prowler/pull/11632)
|
||||
- Gunicorn keep-alive timeout now exceeds the load balancer idle timeout, stopping 502s from reused connections [(#11647)](https://github.com/prowler-cloud/prowler/pull/11647)
|
||||
- API runs under the Uvicorn worker so keep-alive outlives the load balancer idle timeout, fixing Gunicorn's intermittent 502s [(#11663)](https://github.com/prowler-cloud/prowler/pull/11663)
|
||||
- SAML logins no longer wipe a user's roles when the IdP does not send the `userType` attribute; existing roles are kept, and when `userType` names a role that does not exist it is now created with read-only access (visibility over all providers, no management permissions) instead of no permissions at all [(#11520)](https://github.com/prowler-cloud/prowler/pull/11520)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
- Database connections no longer leak under the ASGI worker, which previously exhausted the read replica's connection slots and caused 500s on read endpoints [(#11640)](https://github.com/prowler-cloud/prowler/pull/11640)
|
||||
|
||||
### 🔐 Security
|
||||
|
||||
- `aiohttp` to 3.14.0 and `idna` to 3.15, patching known CVEs [(#11596)](https://github.com/prowler-cloud/prowler/pull/11596)
|
||||
- Container base image to `python:3.12.13-slim-bookworm` and `trivy` to 0.71.0, patching OS and Go module CVEs [(#11596)](https://github.com/prowler-cloud/prowler/pull/11596)
|
||||
- `trivy` binary bumped to 0.71.0 patching embedded `golang.org/x/crypto`, `golang.org/x/net`, and Go `stdlib` CVEs [(#11592)](https://github.com/prowler-cloud/prowler/pull/11592)
|
||||
|
||||
---
|
||||
|
||||
## [1.31.3] (Prowler v5.30.3)
|
||||
|
||||
### 🔐 Security
|
||||
|
||||
- SAML logins now link to an existing account only when the asserted email domain matches the ACS endpoint and the user is already a member of that domain's tenant, fixing a cross-tenant account takeover [(GHSA-h8m9-jgf8-vwvp)](https://github.com/prowler-cloud/prowler/security/advisories/GHSA-h8m9-jgf8-vwvp)
|
||||
|
||||
---
|
||||
|
||||
## [1.31.2] (Prowler v5.30.2)
|
||||
|
||||
### 🔄 Changed
|
||||
|
||||
- `scan-compliance-overviews` task now streams the findings aggregation and the requirement-row writes so it runs faster and its peak memory no longer grows with the number of regions and frameworks [(#11591)](https://github.com/prowler-cloud/prowler/pull/11591)
|
||||
|
||||
---
|
||||
|
||||
## [1.31.1] (Prowler v5.30.1)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
+19
-2
@@ -1,11 +1,11 @@
|
||||
FROM python:3.12.10-slim-bookworm@sha256:fd95fa221297a88e1cf49c55ec1828edd7c5a428187e67b5d1805692d11588db AS build
|
||||
FROM python:3.12.13-slim-bookworm@sha256:76d4b7b6305788c6b4c6a19d6a22a3921bf802e9af4d5e1e5bd771208dba74bf AS build
|
||||
|
||||
LABEL maintainer="https://github.com/prowler-cloud/api"
|
||||
|
||||
ARG POWERSHELL_VERSION=7.5.0
|
||||
ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}
|
||||
|
||||
ARG TRIVY_VERSION=0.70.0
|
||||
ARG TRIVY_VERSION=0.71.2
|
||||
ENV TRIVY_VERSION=${TRIVY_VERSION}
|
||||
|
||||
ARG ZIZMOR_VERSION=1.24.1
|
||||
@@ -102,6 +102,23 @@ RUN uv sync --locked --no-install-project && \
|
||||
|
||||
RUN .venv/bin/python .venv/lib/python3.12/site-packages/prowler/providers/m365/lib/powershell/m365_powershell.py
|
||||
|
||||
USER root
|
||||
|
||||
# Remove build-only packages from the final image after Python dependencies are installed.
|
||||
RUN apt-get purge -y --auto-remove \
|
||||
gcc \
|
||||
g++ \
|
||||
make \
|
||||
libxml2-dev \
|
||||
libxmlsec1-dev \
|
||||
pkg-config \
|
||||
libtool \
|
||||
libxslt1-dev \
|
||||
python3-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
USER prowler
|
||||
|
||||
COPY --chown=prowler:prowler src/backend/ ./backend/
|
||||
COPY --chown=prowler:prowler docker-entrypoint.sh ./docker-entrypoint.sh
|
||||
|
||||
|
||||
@@ -196,6 +196,42 @@ python -m celery -A config.celery worker -l info -E
|
||||
|
||||
The Celery worker does not detect and reload changes in the code, so you need to restart it manually when you make changes.
|
||||
|
||||
### Makefile-Assisted Local Deployment
|
||||
|
||||
This method is an additional local development workflow. It does not replace the manual local deployment or the Docker deployment described in this guide.
|
||||
|
||||
PostgreSQL, Valkey, and Neo4j run with Docker Compose, while Django and the Celery worker run natively through `uv`. Additionally, this workflow creates a `tmux` session with panes for the API, worker, and PostgreSQL logs.
|
||||
|
||||
Before using this method, ensure `docker compose`, `tmux`, and `uv` are installed.
|
||||
|
||||
This workflow is designed for macOS and should also work on Linux when Docker, `tmux`, and `uv` are available. Windows requires script changes before it can be supported.
|
||||
|
||||
From the repository root, run:
|
||||
|
||||
```console
|
||||
make dev
|
||||
```
|
||||
|
||||
The API will be available at:
|
||||
|
||||
```console
|
||||
http://localhost:8080/api/v1
|
||||
```
|
||||
|
||||
Use these commands to manage the local stack:
|
||||
|
||||
```console
|
||||
make dev-setup # Bootstrap dependencies, migrations, and fixtures
|
||||
make dev-attach # Attach to the tmux session
|
||||
make dev-launch # Start the stack on fixed ports and attach
|
||||
make dev-stop # Stop the tmux session and containers
|
||||
make dev-clean # Remove stopped development containers
|
||||
make dev-wipe # Stop everything and delete local development data
|
||||
make dev-status # Show development container status
|
||||
```
|
||||
|
||||
This workflow does not start the UI. Start it separately from the `ui/` directory when needed.
|
||||
|
||||
### Docker deployment
|
||||
|
||||
This method requires `docker` and `docker compose`.
|
||||
|
||||
@@ -21,13 +21,19 @@ apply_fixtures() {
|
||||
}
|
||||
|
||||
start_dev_server() {
|
||||
echo "Starting the development server..."
|
||||
exec uv run python manage.py runserver 0.0.0.0:"${DJANGO_PORT:-8080}"
|
||||
echo "Starting the development server (Gunicorn ASGI, debug + reload)..."
|
||||
# Same server/worker as prod (config.asgi via the native `asgi` worker), so
|
||||
# SSE streams run on the event loop exactly as they do in production. DEBUG is
|
||||
# on so guniconf's `reload = DEBUG` hot-reloads edited code (and flips
|
||||
# `preload_app` off so reload actually takes).
|
||||
export DJANGO_DEBUG="${DJANGO_DEBUG:-True}"
|
||||
export DJANGO_BIND_ADDRESS="${DJANGO_BIND_ADDRESS:-0.0.0.0}"
|
||||
exec uv run gunicorn -c config/guniconf.py config.asgi:application
|
||||
}
|
||||
|
||||
start_prod_server() {
|
||||
echo "Starting the Gunicorn server..."
|
||||
exec uv run gunicorn -c config/guniconf.py config.wsgi:application
|
||||
exec uv run gunicorn -c config/guniconf.py config.asgi:application
|
||||
}
|
||||
|
||||
resolve_worker_hostname() {
|
||||
|
||||
@@ -65,6 +65,7 @@ All settings have safe defaults; override via environment variables.
|
||||
| Env var | Default | Purpose |
|
||||
| --- | --- | --- |
|
||||
| `DJANGO_CELERY_WORKER_PREFETCH_MULTIPLIER` | `1` | Tasks reserved per worker process. |
|
||||
| `DJANGO_CELERY_WORKER_CONCURRENCY` | unset | Optional Celery prefork pool size. When unset, Celery uses its CPU-based default. Set this on worker containers to bound idle memory on hosts with many CPUs. |
|
||||
| `DJANGO_CELERY_WORKER_SOFT_SHUTDOWN_TIMEOUT` | `60` | Seconds the worker drains/re-queues on `SIGTERM` before force-kill. |
|
||||
| `DJANGO_CELERY_TASK_TIME_LIMIT` | `21600` (6h) | Hard limit for most tasks; connection checks are capped at 120s. |
|
||||
| `DJANGO_CELERY_TASK_SOFT_TIME_LIMIT` | hard - 600 | Soft limit; raises `SoftTimeLimitExceeded` for cleanup. |
|
||||
|
||||
+47
-19
@@ -14,7 +14,7 @@ dev = [
|
||||
"pytest-env==1.1.3",
|
||||
"pytest-randomly==3.15.0",
|
||||
"pytest-xdist==3.6.1",
|
||||
"ruff==0.5.0",
|
||||
"ruff==0.15.11",
|
||||
"tqdm==4.67.1",
|
||||
"vulture==2.14",
|
||||
"prek==0.3.9"
|
||||
@@ -41,7 +41,9 @@ dependencies = [
|
||||
"drf-spectacular==0.27.2",
|
||||
"drf-spectacular-jsonapi==0.5.1",
|
||||
"defusedxml==0.7.1",
|
||||
"gunicorn==23.0.0",
|
||||
"django-eventstream==5.3.3",
|
||||
"gunicorn==26.0.0",
|
||||
"uvloop==0.22.1",
|
||||
"lxml==6.1.0",
|
||||
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
|
||||
"psycopg2-binary==2.9.9",
|
||||
@@ -56,11 +58,12 @@ dependencies = [
|
||||
"matplotlib (==3.10.8)",
|
||||
"reportlab (==4.4.10)",
|
||||
"neo4j (==6.1.0)",
|
||||
"cartography (==0.135.0)",
|
||||
"cartography (==0.138.1)",
|
||||
"gevent (==25.9.1)",
|
||||
"werkzeug (==3.1.7)",
|
||||
"sqlparse (==0.5.5)",
|
||||
"fonttools (==4.62.1)"
|
||||
"fonttools (==4.62.1)",
|
||||
"uvicorn-worker (==0.4.0)",
|
||||
]
|
||||
description = "Prowler's API (Django/DRF)"
|
||||
license = "Apache-2.0"
|
||||
@@ -68,7 +71,24 @@ name = "prowler-api"
|
||||
package-mode = false
|
||||
# Needed for the SDK compatibility
|
||||
requires-python = ">=3.11,<3.13"
|
||||
version = "1.32.0"
|
||||
version = "1.33.0"
|
||||
|
||||
# Shared ruff baseline (kept in sync with mcp_server/pyproject.toml).
|
||||
# target-version tracks this project's lowest supported Python.
|
||||
[tool.ruff]
|
||||
src = ["src"]
|
||||
target-version = "py311"
|
||||
|
||||
[tool.ruff.lint]
|
||||
# Defaults (E4/E7/E9, F) plus import sorting, modern-syntax upgrades, and
|
||||
# comprehension lints — all mechanically auto-fixable. flake8-bugbear (B) is a
|
||||
# good next step but needs manual cleanup (e.g. B904 raise-from), so it is left
|
||||
# out of the shared baseline for now.
|
||||
extend-select = [
|
||||
"I", # isort — import ordering (prek's isort hook covers only the SDK)
|
||||
"UP", # pyupgrade — modern syntax for the min supported Python
|
||||
"C4" # flake8-comprehensions
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
# Transitive pins matching master to avoid silent drift; bump deliberately.
|
||||
@@ -79,7 +99,7 @@ constraint-dependencies = [
|
||||
"aiobotocore==2.25.1",
|
||||
"aiofiles==24.1.0",
|
||||
"aiohappyeyeballs==2.6.1",
|
||||
"aiohttp==3.13.5",
|
||||
"aiohttp==3.14.0",
|
||||
"aioitertools==0.13.0",
|
||||
"aiosignal==1.4.0",
|
||||
"alibabacloud-actiontrail20200706==2.4.1",
|
||||
@@ -124,9 +144,8 @@ constraint-dependencies = [
|
||||
"astroid==3.2.4",
|
||||
"async-timeout==5.0.1",
|
||||
"attrs==25.4.0",
|
||||
"authlib==1.6.9",
|
||||
"authlib==1.6.12",
|
||||
"autopep8==2.3.2",
|
||||
"awsipranges==0.3.3",
|
||||
"azure-cli-core==2.83.0",
|
||||
"azure-cli-telemetry==1.1.0",
|
||||
"azure-common==1.1.28",
|
||||
@@ -174,7 +193,7 @@ constraint-dependencies = [
|
||||
"blinker==1.9.0",
|
||||
"boto3==1.40.61",
|
||||
"botocore==1.40.61",
|
||||
"cartography==0.135.0",
|
||||
"cartography==0.138.1",
|
||||
"celery==5.6.2",
|
||||
"certifi==2026.1.4",
|
||||
"cffi==2.0.0",
|
||||
@@ -199,7 +218,6 @@ constraint-dependencies = [
|
||||
"debugpy==1.8.20",
|
||||
"decorator==5.2.1",
|
||||
"defusedxml==0.7.1",
|
||||
"detect-secrets==1.5.0",
|
||||
"dill==0.4.1",
|
||||
"distro==1.9.0",
|
||||
"dj-rest-auth==7.0.1",
|
||||
@@ -209,6 +227,7 @@ constraint-dependencies = [
|
||||
"django-celery-results==2.6.0",
|
||||
"django-cors-headers==4.4.0",
|
||||
"django-environ==0.11.2",
|
||||
"django-eventstream==5.3.3",
|
||||
"django-filter==24.3",
|
||||
"django-guid==3.5.0",
|
||||
"django-postgres-extra==2.0.9",
|
||||
@@ -253,7 +272,7 @@ constraint-dependencies = [
|
||||
"grpc-google-iam-v1==0.14.3",
|
||||
"grpcio==1.76.0",
|
||||
"grpcio-status==1.76.0",
|
||||
"gunicorn==23.0.0",
|
||||
"gunicorn==26.0.0",
|
||||
"h11==0.16.0",
|
||||
"h2==4.3.0",
|
||||
"hpack==4.1.0",
|
||||
@@ -262,8 +281,8 @@ constraint-dependencies = [
|
||||
"httpx==0.28.1",
|
||||
"humanfriendly==10.0",
|
||||
"hyperframe==6.1.0",
|
||||
"iamdata==0.1.202602021",
|
||||
"idna==3.11",
|
||||
"iamdata==0.1.202605131",
|
||||
"idna==3.15",
|
||||
"importlib-metadata==8.7.1",
|
||||
"inflection==0.5.1",
|
||||
"iniconfig==2.3.0",
|
||||
@@ -281,6 +300,7 @@ constraint-dependencies = [
|
||||
"jsonschema==4.23.0",
|
||||
"jsonschema-specifications==2025.9.1",
|
||||
"keystoneauth1==5.13.0",
|
||||
"kingfisher-bin==1.104.0",
|
||||
"kiwisolver==1.4.9",
|
||||
"knack==0.11.0",
|
||||
"kombu==5.6.2",
|
||||
@@ -315,7 +335,7 @@ constraint-dependencies = [
|
||||
"neo4j==6.1.0",
|
||||
"nest-asyncio==1.6.0",
|
||||
"nltk==3.9.4",
|
||||
"numpy==2.0.2",
|
||||
"numpy==2.2.6",
|
||||
"oauthlib==3.3.1",
|
||||
"oci==2.169.0",
|
||||
"openai==1.109.1",
|
||||
@@ -344,7 +364,7 @@ constraint-dependencies = [
|
||||
"psutil==7.2.2",
|
||||
"psycopg2-binary==2.9.9",
|
||||
"py-deviceid==0.1.1",
|
||||
"py-iam-expand==0.1.0",
|
||||
"py-iam-expand==0.3.0",
|
||||
"py-ocsf-models==0.8.1",
|
||||
"pyasn1==0.6.3",
|
||||
"pyasn1-modules==0.4.2",
|
||||
@@ -390,7 +410,7 @@ constraint-dependencies = [
|
||||
"rpds-py==0.30.0",
|
||||
"rsa==4.9.1",
|
||||
"ruamel-yaml==0.19.1",
|
||||
"ruff==0.5.0",
|
||||
"ruff==0.15.11",
|
||||
"s3transfer==0.14.0",
|
||||
"scaleway==2.10.3",
|
||||
"scaleway-core==2.10.3",
|
||||
@@ -420,12 +440,14 @@ constraint-dependencies = [
|
||||
"uritemplate==4.2.0",
|
||||
"urllib3==2.7.0",
|
||||
"uuid6==2024.7.10",
|
||||
"uvicorn==0.49.0",
|
||||
"uvloop==0.22.1",
|
||||
"vine==5.1.0",
|
||||
"vulture==2.14",
|
||||
"wcwidth==0.5.3",
|
||||
"websocket-client==1.9.0",
|
||||
"werkzeug==3.1.7",
|
||||
"workos==6.0.4",
|
||||
"workos==6.0.8",
|
||||
"wrapt==1.17.3",
|
||||
"xlsxwriter==3.2.9",
|
||||
"xmlsec==1.3.17",
|
||||
@@ -436,8 +458,13 @@ constraint-dependencies = [
|
||||
"zope-interface==8.2",
|
||||
"zstd==1.5.7.3"
|
||||
]
|
||||
# prowler@master needs okta==3.4.2; cartography 0.135.0 declares okta<1.0.0 for an
|
||||
# integration prowler does not import.
|
||||
# prowler@master needs okta==3.4.2, but cartography 0.138.1 requires okta<1.0.0.
|
||||
# Attack Paths does not ingest Okta today, so override the Cartography
|
||||
# dependency to the Prowler pin.
|
||||
#
|
||||
# prowler@master needs azure-mgmt-containerservice==34.1.0, but cartography
|
||||
# 0.138.1 requires azure-mgmt-containerservice>=41.0.0. Attack Paths does not
|
||||
# ingest Azure today, so override the Cartography dependency to the Prowler pin.
|
||||
#
|
||||
# prowler@master hard-pins microsoft-kiota-abstractions==1.9.2 in [project.dependencies].
|
||||
# The microsoft-kiota-http security bump to 1.9.9 (GHSA-7j59-v9qr-6fq9) requires
|
||||
@@ -453,6 +480,7 @@ constraint-dependencies = [
|
||||
# that request pyjwt[crypto] and leave cryptography (needed for RS256) only transitive.
|
||||
override-dependencies = [
|
||||
"okta==3.4.2",
|
||||
"azure-mgmt-containerservice==34.1.0",
|
||||
"microsoft-kiota-abstractions==1.9.9",
|
||||
"dulwich==1.2.5",
|
||||
"pyjwt[crypto]==2.13.0"
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
from allauth.socialaccount.adapter import DefaultSocialAccountAdapter
|
||||
from django.db import transaction
|
||||
|
||||
from api.db_router import MainRouter
|
||||
from api.db_utils import rls_transaction
|
||||
from api.models import Membership, Role, Tenant, User, UserRoleRelationship
|
||||
from api.models import (
|
||||
Membership,
|
||||
Role,
|
||||
SAMLConfiguration,
|
||||
Tenant,
|
||||
User,
|
||||
UserRoleRelationship,
|
||||
)
|
||||
from django.db import transaction
|
||||
|
||||
|
||||
class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
|
||||
@@ -18,7 +24,42 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
|
||||
# Link existing accounts with the same email address
|
||||
email = sociallogin.account.extra_data.get("email")
|
||||
if sociallogin.provider.id == "saml":
|
||||
# For SAML, the asserted NameID email cannot be trusted on its own:
|
||||
# any tenant can claim any email domain in its SAML configuration. To
|
||||
# prevent cross-tenant account takeover (GHSA-h8m9-jgf8-vwvp), only link
|
||||
# the incoming SAML session to an existing account when (1) the email
|
||||
# domain matches the tenant whose ACS endpoint is being used and (2) the
|
||||
# existing user is already a member of that tenant.
|
||||
email = sociallogin.user.email
|
||||
if not email:
|
||||
return
|
||||
|
||||
domain = email.rsplit("@", 1)[-1].lower()
|
||||
resolver_match = getattr(request, "resolver_match", None)
|
||||
organization_slug = (
|
||||
(resolver_match.kwargs or {}).get("organization_slug", "")
|
||||
if resolver_match
|
||||
else ""
|
||||
).lower()
|
||||
# The ACS endpoint is scoped per email domain; reject mismatches so an
|
||||
# attacker cannot replay an assertion through another tenant's endpoint.
|
||||
if organization_slug != domain:
|
||||
return
|
||||
|
||||
try:
|
||||
saml_config = SAMLConfiguration.objects.using(MainRouter.admin_db).get(
|
||||
email_domain=domain
|
||||
)
|
||||
except SAMLConfiguration.DoesNotExist:
|
||||
return
|
||||
|
||||
existing_user = self.get_user_by_email(email)
|
||||
if existing_user and existing_user.is_member_of_tenant(
|
||||
str(saml_config.tenant_id)
|
||||
):
|
||||
sociallogin.connect(request, existing_user)
|
||||
return
|
||||
|
||||
if email:
|
||||
existing_user = self.get_user_by_email(email)
|
||||
if existing_user:
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from django.apps import AppConfig
|
||||
from django.conf import settings
|
||||
|
||||
from config.custom_logging import BackendLogger
|
||||
from config.env import env
|
||||
from django.apps import AppConfig
|
||||
from django.conf import settings
|
||||
|
||||
logger = logging.getLogger(BackendLogger.API)
|
||||
|
||||
@@ -30,8 +28,10 @@ class ApiConfig(AppConfig):
|
||||
name = "api"
|
||||
|
||||
def ready(self):
|
||||
from api import schema_extensions # noqa: F401
|
||||
from api import signals # noqa: F401
|
||||
from api import (
|
||||
schema_extensions, # noqa: F401
|
||||
signals, # noqa: F401
|
||||
)
|
||||
|
||||
# Generate required cryptographic keys if not present, but only if:
|
||||
# `"manage.py" not in sys.argv[0]`: If an external server (e.g., Gunicorn) is running the app
|
||||
@@ -42,9 +42,6 @@ class ApiConfig(AppConfig):
|
||||
):
|
||||
self._ensure_crypto_keys()
|
||||
|
||||
# Neo4j driver is created lazily on first use (see api.attack_paths.database).
|
||||
# App init never contacts Neo4j, so a Neo4j outage cannot block API startup.
|
||||
|
||||
def _ensure_crypto_keys(self):
|
||||
"""
|
||||
Orchestrator method that ensures all required cryptographic keys are present.
|
||||
|
||||
@@ -5,7 +5,6 @@ from api.attack_paths.queries import (
|
||||
get_query_by_id,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AttackPathsQueryDefinition",
|
||||
"AttackPathsQueryParameterDefinition",
|
||||
|
||||
@@ -4,10 +4,10 @@ Cypher sanitizer for custom (user-supplied) Attack Paths queries.
|
||||
Two responsibilities:
|
||||
|
||||
1. **Validation** - reject queries containing SSRF or dangerous procedure
|
||||
patterns (defense-in-depth; the primary control is ``neo4j.READ_ACCESS``).
|
||||
patterns (defense-in-depth; the primary control is `neo4j.READ_ACCESS`).
|
||||
|
||||
2. **Provider-scoped label injection** - inject a dynamic
|
||||
``_Provider_{uuid}`` label into every node pattern so the database can
|
||||
`_Provider_{uuid}` label into every node pattern so the database can
|
||||
use its native label index for provider isolation.
|
||||
|
||||
Label-injection pipeline:
|
||||
@@ -22,18 +22,16 @@ Label-injection pipeline:
|
||||
import re
|
||||
|
||||
from rest_framework.exceptions import ValidationError
|
||||
|
||||
from tasks.jobs.attack_paths.config import get_provider_label
|
||||
|
||||
|
||||
# Step 1 - String / comment protection
|
||||
# Single combined regex: strings first, then line comments.
|
||||
# Single combined regex: strings first, then line comments
|
||||
# The regex engine finds the leftmost match, so a string like 'https://prowler.com'
|
||||
# is consumed as a string before the // inside it can match as a comment.
|
||||
# is consumed as a string before the // inside it can match as a comment
|
||||
_PROTECTED_RE = re.compile(r"'(?:[^'\\]|\\.)*'|\"(?:[^\"\\]|\\.)*\"|//[^\n]*")
|
||||
|
||||
# Step 2 - Clause splitting
|
||||
# OPTIONAL MATCH must come before MATCH to avoid partial matching.
|
||||
# `OPTIONAL MATCH` must come before `MATCH` to avoid partial matching
|
||||
_CLAUSE_RE = re.compile(
|
||||
r"\b(OPTIONAL\s+MATCH|MATCH|WHERE|RETURN|WITH|ORDER\s+BY"
|
||||
r"|SKIP|LIMIT|UNION|UNWIND|CALL)\b",
|
||||
@@ -41,10 +39,10 @@ _CLAUSE_RE = re.compile(
|
||||
)
|
||||
|
||||
# Pass A - Labeled node patterns (all segments)
|
||||
# Matches node patterns that have at least one :Label.
|
||||
# (?<!\w)\( - open paren NOT preceded by a word char (excludes function calls).
|
||||
# Group 1: optional variable + one or more :Label
|
||||
# Group 2: optional {properties} + closing paren
|
||||
# Matches node patterns that have at least one `:Label`
|
||||
# `(?<!\w)\(` - open paren NOT preceded by a word char, excludes function calls
|
||||
# Group 1: optional variable + one or more `:Label`
|
||||
# Group 2: optional `{`properties`}` + closing paren
|
||||
_LABELED_NODE_RE = re.compile(
|
||||
r"(?<!\w)\("
|
||||
r"("
|
||||
@@ -57,9 +55,9 @@ _LABELED_NODE_RE = re.compile(
|
||||
r")"
|
||||
)
|
||||
|
||||
# Pass B - Bare node patterns (MATCH segments only)
|
||||
# Matches (identifier) or (identifier {properties}) without any :Label.
|
||||
# Only applied in MATCH/OPTIONAL MATCH segments.
|
||||
# Pass B - Bare node patterns (`MATCH` segments only)
|
||||
# Matches (identifier) or (identifier {properties}) without any `:Label`
|
||||
# Only applied in `MATCH` / `OPTIONAL MATCH` segments
|
||||
_BARE_NODE_RE = re.compile(
|
||||
r"(?<!\w)\(" r"(\s*[a-zA-Z_]\w*)" r"(\s*(?:\{[^}]*\})?)" r"\s*\)"
|
||||
)
|
||||
@@ -98,6 +96,11 @@ def inject_provider_label(cypher: str, provider_id: str) -> str:
|
||||
node pattern.
|
||||
"""
|
||||
label = get_provider_label(provider_id)
|
||||
return inject_label(cypher, label)
|
||||
|
||||
|
||||
def inject_label(cypher: str, label: str) -> str:
|
||||
"""Rewrite a Cypher query to append a label to every node pattern."""
|
||||
|
||||
# Step 1: Protect strings and comments (single pass, leftmost-first)
|
||||
protected: list[str] = []
|
||||
@@ -136,9 +139,7 @@ def inject_provider_label(cypher: str, provider_id: str) -> str:
|
||||
return work
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Patterns that indicate SSRF or dangerous procedure calls
|
||||
# Defense-in-depth layer - the primary control is `neo4j.READ_ACCESS`
|
||||
|
||||
@@ -1,263 +1,32 @@
|
||||
import atexit
|
||||
import logging
|
||||
import threading
|
||||
"""Backwards-compatible facade over the ingest and sink modules.
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Iterator
|
||||
Historically this module owned a single Neo4j driver used for both the
|
||||
cartography temp database and the per-tenant sink database. The port to AWS
|
||||
Neptune split those roles: the cartography ingest (temp) database is always
|
||||
Neo4j and lives in `api.attack_paths.ingest`; the sink is configurable
|
||||
(Neo4j or Neptune) and lives in `api.attack_paths.sink`. This shim preserves
|
||||
the public API that `tasks/` and `api/v1/views.py` already depend on, and
|
||||
dispatches to the right module by database-name prefix.
|
||||
|
||||
A database name starting with `db-tmp-scan-` is a cartography temp DB and
|
||||
routes to ingest. Everything else routes to the configured sink.
|
||||
"""
|
||||
|
||||
from contextlib import AbstractContextManager
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import neo4j
|
||||
import neo4j.exceptions
|
||||
|
||||
import neo4j # noqa: F401 - kept for tests that patch api.attack_paths.database.neo4j
|
||||
from api.attack_paths import ingest
|
||||
from api.attack_paths import sink as sink_module
|
||||
from config.env import env
|
||||
from django.conf import settings
|
||||
|
||||
from api.attack_paths.retryable_session import RetryableSession
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
BATCH_SIZE,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
get_provider_label,
|
||||
from django.conf import (
|
||||
settings, # noqa: F401 - kept for tests that patch ...database.settings
|
||||
)
|
||||
|
||||
# Without this Celery goes crazy with Neo4j logging
|
||||
logging.getLogger("neo4j").setLevel(logging.ERROR)
|
||||
logging.getLogger("neo4j").propagate = False
|
||||
|
||||
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
|
||||
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
|
||||
)
|
||||
READ_QUERY_TIMEOUT_SECONDS = env.int(
|
||||
"ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
|
||||
)
|
||||
MAX_CUSTOM_QUERY_NODES = env.int("ATTACK_PATHS_MAX_CUSTOM_QUERY_NODES", default=250)
|
||||
# Shorter than CONN_ACQUISITION_TIMEOUT — the driver requires acquisition to be
|
||||
# the longer of the two (it may include opening a new connection).
|
||||
CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
|
||||
CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
|
||||
READ_EXCEPTION_CODES = [
|
||||
"Neo.ClientError.Statement.AccessMode",
|
||||
"Neo.ClientError.Procedure.ProcedureNotFound",
|
||||
]
|
||||
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
|
||||
|
||||
# Module-level process-wide driver singleton
|
||||
_driver: neo4j.Driver | None = None
|
||||
_lock = threading.Lock()
|
||||
|
||||
# Base Neo4j functions
|
||||
|
||||
|
||||
def get_uri() -> str:
|
||||
host = settings.DATABASES["neo4j"]["HOST"]
|
||||
port = settings.DATABASES["neo4j"]["PORT"]
|
||||
return f"bolt://{host}:{port}"
|
||||
|
||||
|
||||
def init_driver() -> neo4j.Driver:
|
||||
global _driver
|
||||
if _driver is not None:
|
||||
return _driver
|
||||
|
||||
with _lock:
|
||||
if _driver is None:
|
||||
uri = get_uri()
|
||||
config = settings.DATABASES["neo4j"]
|
||||
|
||||
driver = neo4j.GraphDatabase.driver(
|
||||
uri,
|
||||
auth=(config["USER"], config["PASSWORD"]),
|
||||
keep_alive=True,
|
||||
max_connection_lifetime=7200,
|
||||
connection_timeout=CONNECTION_TIMEOUT,
|
||||
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
|
||||
max_connection_pool_size=50,
|
||||
)
|
||||
# Publish the singleton only after connectivity is verified so a
|
||||
# failed probe does not leave an unverified driver behind. Close the
|
||||
# driver on failure so a repeatedly-probed outage cannot leak pools.
|
||||
try:
|
||||
driver.verify_connectivity()
|
||||
except Exception:
|
||||
driver.close()
|
||||
raise
|
||||
_driver = driver
|
||||
|
||||
# Register cleanup handler (only runs once since we're inside the _driver is None block)
|
||||
atexit.register(close_driver)
|
||||
|
||||
return _driver
|
||||
|
||||
|
||||
def get_driver() -> neo4j.Driver:
|
||||
return init_driver()
|
||||
|
||||
|
||||
def close_driver() -> None: # TODO: Use it
|
||||
global _driver
|
||||
with _lock:
|
||||
if _driver is not None:
|
||||
try:
|
||||
_driver.close()
|
||||
|
||||
finally:
|
||||
_driver = None
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_session(
|
||||
database: str | None = None, default_access_mode: str | None = None
|
||||
) -> Iterator[RetryableSession]:
|
||||
session_wrapper: RetryableSession | None = None
|
||||
|
||||
try:
|
||||
session_wrapper = RetryableSession(
|
||||
session_factory=lambda: get_driver().session(
|
||||
database=database, default_access_mode=default_access_mode
|
||||
),
|
||||
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
|
||||
)
|
||||
yield session_wrapper
|
||||
|
||||
except neo4j.exceptions.Neo4jError as exc:
|
||||
if (
|
||||
default_access_mode == neo4j.READ_ACCESS
|
||||
and exc.code
|
||||
and exc.code in READ_EXCEPTION_CODES
|
||||
):
|
||||
message = "Read query not allowed"
|
||||
code = READ_EXCEPTION_CODES[0]
|
||||
raise WriteQueryNotAllowedException(message=message, code=code)
|
||||
|
||||
message = exc.message if exc.message is not None else str(exc)
|
||||
|
||||
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
|
||||
raise ClientStatementException(message=message, code=exc.code)
|
||||
|
||||
raise GraphDatabaseQueryException(message=message, code=exc.code)
|
||||
|
||||
finally:
|
||||
if session_wrapper is not None:
|
||||
session_wrapper.close()
|
||||
|
||||
|
||||
def execute_read_query(
|
||||
database: str,
|
||||
cypher: str,
|
||||
parameters: dict[str, Any] | None = None,
|
||||
) -> neo4j.graph.Graph:
|
||||
with get_session(database, default_access_mode=neo4j.READ_ACCESS) as session:
|
||||
|
||||
def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
|
||||
result = tx.run(
|
||||
cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
|
||||
)
|
||||
return result.graph()
|
||||
|
||||
return session.execute_read(_run)
|
||||
|
||||
|
||||
def create_database(database: str) -> None:
|
||||
query = "CREATE DATABASE $database IF NOT EXISTS"
|
||||
parameters = {"database": database}
|
||||
|
||||
with get_session() as session:
|
||||
session.run(query, parameters)
|
||||
|
||||
|
||||
def drop_database(database: str) -> None:
|
||||
query = f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA"
|
||||
|
||||
with get_session() as session:
|
||||
session.run(query)
|
||||
|
||||
|
||||
def drop_subgraph(database: str, provider_id: str) -> int:
|
||||
"""
|
||||
Delete all nodes for a provider from the tenant database.
|
||||
|
||||
Deletes relationships then nodes in batches (not `DETACH DELETE`) so a dense
|
||||
provider's graph cannot exceed Neo4j's transaction memory limit.
|
||||
Silently returns 0 if the database doesn't exist.
|
||||
"""
|
||||
provider_label = get_provider_label(provider_id)
|
||||
deleted_nodes = 0
|
||||
|
||||
try:
|
||||
with get_session(database) as session:
|
||||
# Phase 1: delete relationships incident to provider nodes in batches.
|
||||
deleted_count = 1
|
||||
while deleted_count > 0:
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (:`{provider_label}`)-[r]-()
|
||||
WITH DISTINCT r LIMIT $batch_size
|
||||
DELETE r
|
||||
RETURN COUNT(r) AS deleted_rels_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
)
|
||||
deleted_count = result.single().get("deleted_rels_count", 0)
|
||||
|
||||
# Phase 2: delete the now relationship-free nodes in batches.
|
||||
deleted_count = 1
|
||||
while deleted_count > 0:
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`)
|
||||
WITH n LIMIT $batch_size
|
||||
DELETE n
|
||||
RETURN COUNT(n) AS deleted_nodes_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
)
|
||||
deleted_count = result.single().get("deleted_nodes_count", 0)
|
||||
deleted_nodes += deleted_count
|
||||
|
||||
except GraphDatabaseQueryException as exc:
|
||||
if exc.code == "Neo.ClientError.Database.DatabaseNotFound":
|
||||
return 0
|
||||
raise
|
||||
|
||||
return deleted_nodes
|
||||
|
||||
|
||||
def has_provider_data(database: str, provider_id: str) -> bool:
|
||||
"""
|
||||
Check if any ProviderResource node exists for this provider.
|
||||
|
||||
Returns `False` if the database doesn't exist.
|
||||
"""
|
||||
provider_label = get_provider_label(provider_id)
|
||||
query = f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
|
||||
|
||||
try:
|
||||
with get_session(database, default_access_mode=neo4j.READ_ACCESS) as session:
|
||||
result = session.run(query)
|
||||
return result.single() is not None
|
||||
|
||||
except GraphDatabaseQueryException as exc:
|
||||
if exc.code == "Neo.ClientError.Database.DatabaseNotFound":
|
||||
return False
|
||||
raise
|
||||
|
||||
|
||||
def clear_cache(database: str) -> None:
|
||||
query = "CALL db.clearQueryCaches()"
|
||||
|
||||
try:
|
||||
with get_session(database) as session:
|
||||
session.run(query)
|
||||
|
||||
except GraphDatabaseQueryException as exc:
|
||||
logging.warning(f"Failed to clear query cache for database `{database}`: {exc}")
|
||||
|
||||
|
||||
# Neo4j functions related to Prowler + Cartography
|
||||
|
||||
|
||||
def get_database_name(entity_id: str | UUID, temporary: bool = False) -> str:
|
||||
prefix = "tmp-scan" if temporary else "tenant"
|
||||
return f"db-{prefix}-{str(entity_id).lower()}"
|
||||
TEMP_DB_PREFIX = "db-tmp-scan-"
|
||||
|
||||
|
||||
# Exceptions
|
||||
@@ -272,7 +41,6 @@ class GraphDatabaseQueryException(Exception):
|
||||
def __str__(self) -> str:
|
||||
if self.code:
|
||||
return f"{self.code}: {self.message}"
|
||||
|
||||
return self.message
|
||||
|
||||
|
||||
@@ -282,3 +50,177 @@ class WriteQueryNotAllowedException(GraphDatabaseQueryException):
|
||||
|
||||
class ClientStatementException(GraphDatabaseQueryException):
|
||||
pass
|
||||
|
||||
|
||||
# Routing
|
||||
|
||||
|
||||
def _is_ingest_database(database: str | None) -> bool:
|
||||
return bool(database) and database.startswith(TEMP_DB_PREFIX)
|
||||
|
||||
|
||||
# Driver lifecycle
|
||||
|
||||
|
||||
def init_driver() -> Any:
|
||||
"""Initialize the configured sink backend.
|
||||
|
||||
The ingest driver (Neo4j for cartography temp DBs) stays lazy: it is
|
||||
only initialized when a temp-DB operation actually runs, which never
|
||||
happens on API pods.
|
||||
"""
|
||||
return sink_module.init()
|
||||
|
||||
|
||||
def close_driver() -> None:
|
||||
"""Close every driver held by this process."""
|
||||
sink_module.close()
|
||||
ingest.close_driver()
|
||||
|
||||
|
||||
def get_driver() -> neo4j.Driver:
|
||||
"""Return the sink backend's underlying driver.
|
||||
|
||||
Only meaningful for the Neo4j sink (where the backend has a single Neo4j
|
||||
driver). On Neptune this returns the writer driver. Kept for tests and
|
||||
legacy call-sites; prefer `get_session` for new code.
|
||||
"""
|
||||
backend = sink_module.get_backend()
|
||||
|
||||
# Neo4jSink exposes get_driver(); NeptuneSink exposes get_writer()
|
||||
if hasattr(backend, "get_driver"):
|
||||
return backend.get_driver()
|
||||
|
||||
if hasattr(backend, "get_writer"):
|
||||
return backend.get_writer()
|
||||
|
||||
raise RuntimeError("Active sink backend does not expose a driver handle")
|
||||
|
||||
|
||||
def verify_connectivity() -> None:
|
||||
"""Raise if the configured graph database is unreachable on the API read path.
|
||||
|
||||
Backend-agnostic entry point for the readiness probe: Neo4j verifies its
|
||||
driver, Neptune verifies the reader endpoint.
|
||||
"""
|
||||
sink_module.get_backend().verify_connectivity()
|
||||
|
||||
|
||||
def verify_scan_databases_available() -> None:
|
||||
"""Raise if either graph database needed by an Attack Paths scan is unavailable."""
|
||||
errors: list[str] = []
|
||||
first_error: Exception | None = None
|
||||
|
||||
try:
|
||||
ingest.get_driver().verify_connectivity()
|
||||
except Exception as exc:
|
||||
errors.append(f"ingest Neo4j: {exc}")
|
||||
first_error = exc
|
||||
|
||||
try:
|
||||
get_driver().verify_connectivity()
|
||||
except Exception as exc:
|
||||
errors.append(f"sink {settings.ATTACK_PATHS_SINK_DATABASE}: {exc}")
|
||||
if first_error is None:
|
||||
first_error = exc
|
||||
|
||||
if errors:
|
||||
raise RuntimeError(
|
||||
"Attack Paths graph database unavailable before scan start: "
|
||||
+ "; ".join(errors)
|
||||
) from first_error
|
||||
|
||||
|
||||
def get_uri() -> str:
|
||||
"""Return the sink URI. Retained for backwards compatibility."""
|
||||
if settings.ATTACK_PATHS_SINK_DATABASE == "neptune":
|
||||
cfg = settings.DATABASES["neptune"]
|
||||
return f"bolt+s://{cfg['WRITER_ENDPOINT']}:{cfg['PORT']}"
|
||||
|
||||
cfg = settings.DATABASES["neo4j"]
|
||||
return f"bolt://{cfg['HOST']}:{cfg['PORT']}"
|
||||
|
||||
|
||||
def get_ingest_uri() -> str:
|
||||
"""Neo4j URI for the cartography temp (ingest) database, which is always
|
||||
Neo4j regardless of the configured sink."""
|
||||
return ingest.get_uri()
|
||||
|
||||
|
||||
# Session API
|
||||
|
||||
|
||||
def get_session(
|
||||
database: str | None = None,
|
||||
default_access_mode: str | None = None,
|
||||
) -> AbstractContextManager:
|
||||
"""Return a session against the right backend.
|
||||
|
||||
- `database` names starting with `db-tmp-scan-` always go to ingest.
|
||||
- No database name → ingest (used for CREATE / DROP DATABASE admin ops).
|
||||
- Any other name → sink.
|
||||
"""
|
||||
if _is_ingest_database(database) or database is None:
|
||||
return ingest.get_session(
|
||||
database=database, default_access_mode=default_access_mode
|
||||
)
|
||||
|
||||
return sink_module.get_backend().get_session(
|
||||
database=database, default_access_mode=default_access_mode
|
||||
)
|
||||
|
||||
|
||||
def execute_read_query(
|
||||
database: str,
|
||||
cypher: str,
|
||||
parameters: dict[str, Any] | None = None,
|
||||
) -> neo4j.graph.Graph:
|
||||
"""Read-only query against the sink."""
|
||||
return sink_module.get_backend().execute_read_query(database, cypher, parameters)
|
||||
|
||||
|
||||
def create_database(database: str) -> None:
|
||||
"""Create a database. Temp DBs always land on ingest (Neo4j).
|
||||
|
||||
On the Neo4j sink, tenant DBs also route to ingest because both drivers
|
||||
connect to the same Neo4j cluster. On the Neptune sink, tenant DB creates
|
||||
are no-ops.
|
||||
"""
|
||||
if _is_ingest_database(database):
|
||||
ingest.create_database(database)
|
||||
return
|
||||
|
||||
sink_module.get_backend().create_database(database)
|
||||
|
||||
|
||||
def drop_database(database: str) -> None:
|
||||
"""Drop a database. Mirrors `create_database` routing."""
|
||||
if _is_ingest_database(database):
|
||||
ingest.drop_database(database)
|
||||
return
|
||||
|
||||
sink_module.get_backend().drop_database(database)
|
||||
|
||||
|
||||
def drop_subgraph(database: str, provider_id: str) -> int:
|
||||
return sink_module.get_backend().drop_subgraph(database, provider_id)
|
||||
|
||||
|
||||
def has_provider_data(database: str, provider_id: str) -> bool:
|
||||
return sink_module.get_backend().has_provider_data(database, provider_id)
|
||||
|
||||
|
||||
def clear_cache(database: str) -> None:
|
||||
if _is_ingest_database(database):
|
||||
ingest.clear_cache(database)
|
||||
return
|
||||
|
||||
sink_module.get_backend().clear_cache(database)
|
||||
|
||||
|
||||
# Name helper
|
||||
|
||||
|
||||
def get_database_name(entity_id: str | UUID, temporary: bool = False) -> str:
|
||||
prefix = "tmp-scan" if temporary else "tenant"
|
||||
return f"db-{prefix}-{str(entity_id).lower()}"
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
"""Cartography ingest layer.
|
||||
|
||||
Public surface for the per-scan Neo4j temp database driver. Implementation
|
||||
lives in `api.attack_paths.ingest.driver`.
|
||||
"""
|
||||
|
||||
from api.attack_paths.ingest.driver import (
|
||||
clear_cache,
|
||||
close_driver,
|
||||
create_database,
|
||||
drop_database,
|
||||
get_driver,
|
||||
get_session,
|
||||
get_uri,
|
||||
init_driver,
|
||||
run_cypher,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"clear_cache",
|
||||
"close_driver",
|
||||
"create_database",
|
||||
"drop_database",
|
||||
"get_driver",
|
||||
"get_session",
|
||||
"get_uri",
|
||||
"init_driver",
|
||||
"run_cypher",
|
||||
]
|
||||
@@ -0,0 +1,187 @@
|
||||
"""Cartography ingest driver: per-scan throw-away Neo4j database.
|
||||
|
||||
Cartography writes each scan's graph into a throw-away Neo4j database named
|
||||
`db-tmp-scan-{scan_uuid}`. This is always Neo4j, regardless of the configured
|
||||
sink: Neptune is single-database and cannot host per-scan throw-away
|
||||
databases. This module owns the Neo4j driver used for those temp DBs and the
|
||||
admin ops they need (CREATE / DROP DATABASE).
|
||||
"""
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import threading
|
||||
from collections.abc import Iterator
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
|
||||
import neo4j
|
||||
import neo4j.exceptions
|
||||
from api.attack_paths.retryable_session import RetryableSession
|
||||
from config.env import env
|
||||
from django.conf import settings
|
||||
|
||||
logging.getLogger("neo4j").setLevel(logging.ERROR)
|
||||
logging.getLogger("neo4j").propagate = False
|
||||
|
||||
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
|
||||
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
|
||||
)
|
||||
CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
|
||||
# TCP connect timeout, ordered below the acquisition timeout so an unreachable
|
||||
# host can't pin a worker on a temp-DB op longer than this.
|
||||
CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
|
||||
MAX_CONNECTION_LIFETIME = env.int("NEO4J_MAX_CONNECTION_LIFETIME", default=7200)
|
||||
MAX_CONNECTION_POOL_SIZE = env.int("NEO4J_MAX_CONNECTION_POOL_SIZE", default=50)
|
||||
|
||||
_driver: neo4j.Driver | None = None
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def _neo4j_config() -> dict:
|
||||
return settings.DATABASES["neo4j"]
|
||||
|
||||
|
||||
def get_uri() -> str:
|
||||
"""Bolt URI for the Neo4j temp (ingest) database. Always Neo4j."""
|
||||
config = _neo4j_config()
|
||||
host = config["HOST"]
|
||||
port = config["PORT"]
|
||||
if not host or not port:
|
||||
raise RuntimeError(
|
||||
"NEO4J_HOST / NEO4J_PORT must be set to use the attack-paths "
|
||||
"temp database. Workers require Neo4j env even when the sink is Neptune."
|
||||
)
|
||||
|
||||
return f"bolt://{host}:{port}"
|
||||
|
||||
|
||||
def init_driver() -> neo4j.Driver:
|
||||
"""Initialize the temp-database Neo4j driver. Idempotent."""
|
||||
global _driver
|
||||
if _driver is not None:
|
||||
return _driver
|
||||
|
||||
with _lock:
|
||||
if _driver is None:
|
||||
config = _neo4j_config()
|
||||
_driver = neo4j.GraphDatabase.driver(
|
||||
get_uri(),
|
||||
auth=(config["USER"], config["PASSWORD"]),
|
||||
keep_alive=True,
|
||||
max_connection_lifetime=MAX_CONNECTION_LIFETIME,
|
||||
connection_timeout=CONNECTION_TIMEOUT,
|
||||
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
|
||||
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
|
||||
)
|
||||
# Best-effort connectivity check: a Neo4j that is down at boot must
|
||||
# not crash the worker. The driver reconnects lazily on first use.
|
||||
try:
|
||||
_driver.verify_connectivity()
|
||||
|
||||
except Exception:
|
||||
logging.warning(
|
||||
"Neo4j temp-database unreachable at init; continuing with a "
|
||||
"lazily-reconnecting driver",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
atexit.register(close_driver)
|
||||
|
||||
return _driver
|
||||
|
||||
|
||||
def get_driver() -> neo4j.Driver:
|
||||
return init_driver()
|
||||
|
||||
|
||||
def close_driver() -> None:
|
||||
global _driver
|
||||
with _lock:
|
||||
if _driver is not None:
|
||||
try:
|
||||
_driver.close()
|
||||
finally:
|
||||
_driver = None
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_session(
|
||||
database: str | None = None,
|
||||
default_access_mode: str | None = None,
|
||||
) -> Iterator[RetryableSession]:
|
||||
"""Session against the Neo4j temp-database cluster. Used for temp DB sessions
|
||||
and for admin operations (CREATE / DROP DATABASE) when `database` is None."""
|
||||
from api.attack_paths.database import (
|
||||
ClientStatementException,
|
||||
GraphDatabaseQueryException,
|
||||
WriteQueryNotAllowedException,
|
||||
)
|
||||
|
||||
READ_EXCEPTION_CODES = [
|
||||
"Neo.ClientError.Statement.AccessMode",
|
||||
"Neo.ClientError.Procedure.ProcedureNotFound",
|
||||
]
|
||||
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
|
||||
|
||||
session_wrapper: RetryableSession | None = None
|
||||
try:
|
||||
session_wrapper = RetryableSession(
|
||||
session_factory=lambda: get_driver().session(
|
||||
database=database, default_access_mode=default_access_mode
|
||||
),
|
||||
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
|
||||
)
|
||||
yield session_wrapper
|
||||
|
||||
except neo4j.exceptions.Neo4jError as exc:
|
||||
if (
|
||||
default_access_mode == neo4j.READ_ACCESS
|
||||
and exc.code
|
||||
and exc.code in READ_EXCEPTION_CODES
|
||||
):
|
||||
raise WriteQueryNotAllowedException(
|
||||
message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
|
||||
)
|
||||
|
||||
message = exc.message if exc.message is not None else str(exc)
|
||||
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
|
||||
raise ClientStatementException(message=message, code=exc.code)
|
||||
raise GraphDatabaseQueryException(message=message, code=exc.code)
|
||||
|
||||
finally:
|
||||
if session_wrapper is not None:
|
||||
session_wrapper.close()
|
||||
|
||||
|
||||
def create_database(database: str) -> None:
|
||||
"""Create a database on the Neo4j cluster. Used for temp scan DBs."""
|
||||
with get_session() as session:
|
||||
session.run("CREATE DATABASE $database IF NOT EXISTS", {"database": database})
|
||||
|
||||
|
||||
def drop_database(database: str) -> None:
|
||||
"""Drop a database on the Neo4j cluster. Used for temp scan DBs."""
|
||||
with get_session() as session:
|
||||
session.run(f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA")
|
||||
|
||||
|
||||
def clear_cache(database: str) -> None:
|
||||
"""Best-effort cache clear for a Neo4j database."""
|
||||
from api.attack_paths.database import GraphDatabaseQueryException
|
||||
|
||||
try:
|
||||
with get_session(database) as session:
|
||||
session.run("CALL db.clearQueryCaches()")
|
||||
|
||||
except GraphDatabaseQueryException as exc:
|
||||
logging.warning(f"Failed to clear query cache for database `{database}`: {exc}")
|
||||
|
||||
|
||||
def run_cypher(
|
||||
database: str | None,
|
||||
cypher: str,
|
||||
parameters: dict[str, Any] | None = None,
|
||||
) -> Any:
|
||||
"""Execute Cypher directly without the context manager. Thin helper."""
|
||||
with get_session(database) as session:
|
||||
return session.run(cypher, parameters or {})
|
||||
@@ -1,12 +1,11 @@
|
||||
from api.attack_paths.queries.types import (
|
||||
AttackPathsQueryDefinition,
|
||||
AttackPathsQueryParameterDefinition,
|
||||
)
|
||||
from api.attack_paths.queries.registry import (
|
||||
get_queries_for_provider,
|
||||
get_query_by_id,
|
||||
)
|
||||
|
||||
from api.attack_paths.queries.types import (
|
||||
AttackPathsQueryDefinition,
|
||||
AttackPathsQueryParameterDefinition,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AttackPathsQueryDefinition",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,13 +1,14 @@
|
||||
from api.attack_paths.queries.types import AttackPathsQueryDefinition
|
||||
from api.attack_paths.queries.aws import AWS_QUERIES
|
||||
|
||||
# TODO: drop after Neptune cutover
|
||||
from api.attack_paths.queries.aws_deprecated import AWS_DEPRECATED_QUERIES
|
||||
from api.attack_paths.queries.types import AttackPathsQueryDefinition
|
||||
|
||||
# Query definitions organized by provider
|
||||
# Query definitions for scans synced with the current schema.
|
||||
_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
|
||||
"aws": AWS_QUERIES,
|
||||
}
|
||||
|
||||
# Flat lookup by query ID for O(1) access
|
||||
_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
|
||||
definition.id: definition
|
||||
for definitions in _QUERY_DEFINITIONS.values()
|
||||
@@ -15,11 +16,45 @@ _QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
|
||||
}
|
||||
|
||||
|
||||
def get_queries_for_provider(provider: str) -> list[AttackPathsQueryDefinition]:
|
||||
"""Get all attack path queries for a specific provider."""
|
||||
return _QUERY_DEFINITIONS.get(provider, [])
|
||||
# TODO: drop after Neptune cutover
|
||||
#
|
||||
# Query definitions for pre-cutover scans (`AttackPathsScan.is_migrated=False`)
|
||||
# whose graph data was written under the previous schema. Both maps expose the
|
||||
# same query IDs so the API contract is identical regardless of which set is
|
||||
# routed to.
|
||||
_DEPRECATED_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
|
||||
"aws": AWS_DEPRECATED_QUERIES,
|
||||
}
|
||||
|
||||
_DEPRECATED_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
|
||||
definition.id: definition
|
||||
for definitions in _DEPRECATED_QUERY_DEFINITIONS.values()
|
||||
for definition in definitions
|
||||
}
|
||||
|
||||
|
||||
def get_query_by_id(query_id: str) -> AttackPathsQueryDefinition | None:
|
||||
"""Get a specific attack path query by its ID."""
|
||||
return _QUERIES_BY_ID.get(query_id)
|
||||
def get_queries_for_provider(
|
||||
provider: str,
|
||||
is_migrated: bool = True,
|
||||
) -> list[AttackPathsQueryDefinition]:
|
||||
"""Get all attack path queries for a provider.
|
||||
|
||||
`is_migrated` selects the catalog: True for scans synced with the current
|
||||
schema, False for pre-cutover scans still using the legacy graph shape.
|
||||
# TODO: drop the `is_migrated` parameter after Neptune cutover
|
||||
"""
|
||||
catalog = _QUERY_DEFINITIONS if is_migrated else _DEPRECATED_QUERY_DEFINITIONS
|
||||
return catalog.get(provider, [])
|
||||
|
||||
|
||||
def get_query_by_id(
|
||||
query_id: str,
|
||||
is_migrated: bool = True,
|
||||
) -> AttackPathsQueryDefinition | None:
|
||||
"""Get a specific attack path query by ID.
|
||||
|
||||
`is_migrated` selects the catalog (see `get_queries_for_provider`).
|
||||
# TODO: drop the `is_migrated` parameter after Neptune cutover
|
||||
"""
|
||||
by_id = _QUERIES_BY_ID if is_migrated else _DEPRECATED_QUERIES_BY_ID
|
||||
return by_id.get(query_id)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
"""Attack-paths sink database layer.
|
||||
|
||||
The sink is the persistent store where attack-paths graphs live after a scan
|
||||
finishes. Currently selectable between Neo4j (OSS / local dev default) and
|
||||
AWS Neptune (hosted dev/staging/prod). Backend is picked by the
|
||||
`ATTACK_PATHS_SINK_DATABASE` setting at process init.
|
||||
|
||||
This package exposes the public factory API; the implementation lives in
|
||||
`api.attack_paths.sink.factory`.
|
||||
"""
|
||||
|
||||
from api.attack_paths.sink.factory import (
|
||||
SinkBackend,
|
||||
close,
|
||||
get_backend,
|
||||
get_backend_for_name,
|
||||
get_backend_for_scan,
|
||||
init,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"SinkBackend",
|
||||
"close",
|
||||
"get_backend",
|
||||
"get_backend_for_name",
|
||||
"get_backend_for_scan",
|
||||
"init",
|
||||
]
|
||||
@@ -0,0 +1,92 @@
|
||||
"""Protocol every sink backend must implement."""
|
||||
|
||||
from contextlib import AbstractContextManager
|
||||
from typing import Any, Protocol
|
||||
|
||||
import neo4j
|
||||
|
||||
|
||||
class SinkDatabase(Protocol):
|
||||
"""Contract for the persistent attack-paths graph store.
|
||||
|
||||
The `database` argument is an opaque identifier passed through from the
|
||||
legacy `database.py` API surface. On Neo4j it is the per-tenant database
|
||||
name (e.g. `db-tenant-{uuid}`). On Neptune it is ignored (the cluster
|
||||
has a single graph, and isolation is label-based).
|
||||
"""
|
||||
|
||||
def init(self) -> None: ...
|
||||
|
||||
def close(self) -> None: ...
|
||||
|
||||
def verify_connectivity(self) -> None:
|
||||
"""Raise if the backend the API read path uses is unreachable.
|
||||
|
||||
Neo4j verifies its single driver. Neptune verifies the reader
|
||||
driver (the endpoint the API serves reads from); on single-endpoint
|
||||
clusters the reader aliases the writer, so that path is covered too.
|
||||
Used by the readiness probe; must not block longer than the caller's
|
||||
probe budget.
|
||||
"""
|
||||
...
|
||||
|
||||
def get_session(
|
||||
self,
|
||||
database: str | None = None,
|
||||
default_access_mode: str | None = None,
|
||||
) -> AbstractContextManager: ...
|
||||
|
||||
def execute_read_query(
|
||||
self,
|
||||
database: str,
|
||||
cypher: str,
|
||||
parameters: dict[str, Any] | None = None,
|
||||
) -> neo4j.graph.Graph: ...
|
||||
|
||||
def create_database(self, database: str) -> None: ...
|
||||
|
||||
def drop_database(self, database: str) -> None: ...
|
||||
|
||||
def drop_subgraph(self, database: str, provider_id: str) -> int: ...
|
||||
|
||||
def has_provider_data(self, database: str, provider_id: str) -> bool: ...
|
||||
|
||||
def clear_cache(self, database: str) -> None: ...
|
||||
|
||||
def ensure_sync_indexes(self, database: str) -> None:
|
||||
"""Create any index needed for the sync write path.
|
||||
|
||||
Called once at the start of each provider sync; must be idempotent.
|
||||
Neo4j creates a `_provider_element_id` index on `_ProviderResource`;
|
||||
Neptune is a no-op (its `~id` lookup needs no index).
|
||||
"""
|
||||
...
|
||||
|
||||
def write_nodes(
|
||||
self,
|
||||
database: str,
|
||||
labels: str,
|
||||
rows: list[dict[str, Any]],
|
||||
) -> None:
|
||||
"""Upsert a batch of nodes into the sink.
|
||||
|
||||
`labels` is a pre-rendered Cypher label string ready to drop after
|
||||
the node variable (e.g. `` `AWSUser`:`_ProviderResource`:`_Tenant_x` ``).
|
||||
Each row carries `provider_element_id` and `props`.
|
||||
"""
|
||||
...
|
||||
|
||||
def write_relationships(
|
||||
self,
|
||||
database: str,
|
||||
rel_type: str,
|
||||
provider_id: str,
|
||||
rows: list[dict[str, Any]],
|
||||
) -> None:
|
||||
"""Upsert a batch of relationships into the sink.
|
||||
|
||||
Each row carries `start_element_id`, `end_element_id`,
|
||||
`provider_element_id` and `props`. `rel_type` is the relationship
|
||||
type (already a valid Cypher identifier).
|
||||
"""
|
||||
...
|
||||
@@ -0,0 +1,134 @@
|
||||
"""Sink backend factory and process-wide handle cache.
|
||||
|
||||
Picks the active backend from `settings.ATTACK_PATHS_SINK_DATABASE` at first
|
||||
use, holds the active backend plus any secondary backends needed to serve
|
||||
scans written under the previous configuration, and tears them all down on
|
||||
process shutdown. Imported via `from api.attack_paths import sink as
|
||||
sink_module`.
|
||||
"""
|
||||
|
||||
import threading
|
||||
from enum import StrEnum, auto
|
||||
|
||||
from api.attack_paths.sink.base import SinkDatabase
|
||||
from api.models import AttackPathsScan
|
||||
from django.conf import settings
|
||||
|
||||
# Backend names
|
||||
|
||||
|
||||
class SinkBackend(StrEnum):
|
||||
NEO4J = auto()
|
||||
NEPTUNE = auto()
|
||||
|
||||
|
||||
# Backend cache
|
||||
|
||||
_backend: SinkDatabase | None = None
|
||||
_secondary_backends: dict[SinkBackend, SinkDatabase] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def _resolve_setting() -> SinkBackend:
|
||||
raw = settings.ATTACK_PATHS_SINK_DATABASE.lower()
|
||||
try:
|
||||
return SinkBackend(raw)
|
||||
|
||||
except ValueError:
|
||||
valid = sorted(b.value for b in SinkBackend)
|
||||
raise RuntimeError(
|
||||
f"ATTACK_PATHS_SINK_DATABASE must be one of {valid}; got {raw!r}"
|
||||
)
|
||||
|
||||
|
||||
def _build_backend(name: SinkBackend) -> SinkDatabase:
|
||||
if name is SinkBackend.NEO4J:
|
||||
from api.attack_paths.sink.neo4j import Neo4jSink
|
||||
|
||||
return Neo4jSink()
|
||||
|
||||
if name is SinkBackend.NEPTUNE:
|
||||
from api.attack_paths.sink.neptune import NeptuneSink
|
||||
|
||||
return NeptuneSink()
|
||||
|
||||
raise RuntimeError(f"Unknown sink backend {name!r}")
|
||||
|
||||
|
||||
# Lifecycle
|
||||
|
||||
|
||||
def init(name: SinkBackend | str | None = None) -> SinkDatabase:
|
||||
"""Initialize the configured sink backend. Idempotent."""
|
||||
global _backend
|
||||
if _backend is not None:
|
||||
return _backend
|
||||
|
||||
with _lock:
|
||||
if _backend is None:
|
||||
resolved = SinkBackend(name) if name else _resolve_setting()
|
||||
backend = _build_backend(resolved)
|
||||
backend.init()
|
||||
_backend = backend
|
||||
|
||||
return _backend
|
||||
|
||||
|
||||
def close() -> None:
|
||||
"""Close the active backend and every cached secondary backend."""
|
||||
global _backend
|
||||
with _lock:
|
||||
backends = [
|
||||
b for b in (_backend, *_secondary_backends.values()) if b is not None
|
||||
]
|
||||
_backend = None
|
||||
_secondary_backends.clear()
|
||||
|
||||
for backend in backends:
|
||||
try:
|
||||
backend.close()
|
||||
|
||||
except Exception: # pragma: no cover - best-effort
|
||||
pass
|
||||
|
||||
|
||||
def get_backend() -> SinkDatabase:
|
||||
"""Return the active sink. Initializes on first call."""
|
||||
return init()
|
||||
|
||||
|
||||
# Per-scan routing
|
||||
|
||||
|
||||
def get_backend_for_scan(scan: AttackPathsScan) -> SinkDatabase:
|
||||
"""Route reads by the sink that stores this scan's graph."""
|
||||
raw_backend = getattr(scan, "sink_backend", SinkBackend.NEO4J.value)
|
||||
if not isinstance(raw_backend, str):
|
||||
raw_backend = SinkBackend.NEO4J.value
|
||||
return get_backend_for_name(raw_backend)
|
||||
|
||||
|
||||
def get_backend_for_name(name: SinkBackend | str) -> SinkDatabase:
|
||||
"""Return the backend named by persisted scan metadata."""
|
||||
resolved = SinkBackend(name)
|
||||
if resolved is _resolve_setting():
|
||||
return get_backend()
|
||||
|
||||
return _build_backend_cached(resolved)
|
||||
|
||||
|
||||
def _build_backend_cached(name: SinkBackend) -> SinkDatabase:
|
||||
# TODO: drop after Neptune cutover
|
||||
# Needed only during cutover to serve Neo4j-written scans from a Neptune-
|
||||
# configured API pod (and vice versa). Once every scan is on Neptune,
|
||||
# `get_backend_for_scan` becomes a one-liner returning `get_backend()`.
|
||||
if name in _secondary_backends:
|
||||
return _secondary_backends[name]
|
||||
|
||||
with _lock:
|
||||
if name not in _secondary_backends:
|
||||
backend = _build_backend(name)
|
||||
backend.init()
|
||||
_secondary_backends[name] = backend
|
||||
|
||||
return _secondary_backends[name]
|
||||
@@ -0,0 +1,454 @@
|
||||
"""Neo4j sink implementation.
|
||||
|
||||
Owns a Neo4j driver independent from the staging driver. On OSS and local dev
|
||||
this is the only sink; on hosted deployments it runs only as a legacy read
|
||||
path while phase-1 drains tenant DBs.
|
||||
"""
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
from contextlib import AbstractContextManager, contextmanager
|
||||
from typing import Any
|
||||
|
||||
import neo4j
|
||||
import neo4j.exceptions
|
||||
from api.attack_paths.retryable_session import RetryableSession
|
||||
from api.attack_paths.sink.base import SinkDatabase
|
||||
from config.env import env
|
||||
from django.conf import settings
|
||||
|
||||
logging.getLogger("neo4j").setLevel(logging.ERROR)
|
||||
logging.getLogger("neo4j").propagate = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
|
||||
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
|
||||
)
|
||||
READ_QUERY_TIMEOUT_SECONDS = env.int(
|
||||
"ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
|
||||
)
|
||||
CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15)
|
||||
# TCP connect timeout, ordered below the acquisition timeout so an unreachable
|
||||
# host can't pin a request or the readiness probe longer than this.
|
||||
CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5)
|
||||
MAX_CONNECTION_LIFETIME = env.int("NEO4J_MAX_CONNECTION_LIFETIME", default=7200)
|
||||
MAX_CONNECTION_POOL_SIZE = env.int("NEO4J_MAX_CONNECTION_POOL_SIZE", default=50)
|
||||
|
||||
READ_EXCEPTION_CODES = [
|
||||
"Neo.ClientError.Statement.AccessMode",
|
||||
"Neo.ClientError.Procedure.ProcedureNotFound",
|
||||
]
|
||||
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
|
||||
DATABASE_NOT_FOUND_CODE = "Neo.ClientError.Database.DatabaseNotFound"
|
||||
|
||||
|
||||
class Neo4jSink(SinkDatabase):
|
||||
"""Neo4j-backed sink. Multi-database cluster; tenant isolation is physical."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._driver: neo4j.Driver | None = None
|
||||
self._lock = threading.Lock()
|
||||
self._atexit_registered = False
|
||||
|
||||
# Driver
|
||||
|
||||
def _config(self) -> dict:
|
||||
return settings.DATABASES["neo4j"]
|
||||
|
||||
def _uri(self) -> str:
|
||||
cfg = self._config()
|
||||
host = cfg["HOST"]
|
||||
port = cfg["PORT"]
|
||||
if not host or not port:
|
||||
raise RuntimeError(
|
||||
"NEO4J_HOST / NEO4J_PORT must be set when ATTACK_PATHS_SINK_DATABASE=neo4j"
|
||||
)
|
||||
return f"bolt://{host}:{port}"
|
||||
|
||||
def init(self) -> neo4j.Driver:
|
||||
if self._driver is not None:
|
||||
return self._driver
|
||||
with self._lock:
|
||||
if self._driver is None:
|
||||
cfg = self._config()
|
||||
self._driver = neo4j.GraphDatabase.driver(
|
||||
self._uri(),
|
||||
auth=(cfg["USER"], cfg["PASSWORD"]),
|
||||
keep_alive=True,
|
||||
max_connection_lifetime=MAX_CONNECTION_LIFETIME,
|
||||
connection_timeout=CONNECTION_TIMEOUT,
|
||||
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
|
||||
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
|
||||
)
|
||||
# Eager connectivity check is best-effort:
|
||||
# A Neo4j that is down at boot must not crash the process, same degradation model as Postgres
|
||||
# The driver reconnects lazily on first use
|
||||
# /health/ready surfaces the outage until it recovers
|
||||
try:
|
||||
self._driver.verify_connectivity()
|
||||
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Neo4j sink unreachable at init; continuing with a lazily-reconnecting driver",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
if not self._atexit_registered:
|
||||
atexit.register(self.close)
|
||||
self._atexit_registered = True
|
||||
return self._driver
|
||||
|
||||
def _get_driver(self) -> neo4j.Driver:
|
||||
return self.init()
|
||||
|
||||
def verify_connectivity(self) -> None:
|
||||
self._get_driver().verify_connectivity()
|
||||
|
||||
def close(self) -> None:
|
||||
with self._lock:
|
||||
if self._driver is not None:
|
||||
try:
|
||||
self._driver.close()
|
||||
finally:
|
||||
self._driver = None
|
||||
|
||||
# Sessions
|
||||
|
||||
@contextmanager
|
||||
def get_session(
|
||||
self,
|
||||
database: str | None = None,
|
||||
default_access_mode: str | None = None,
|
||||
) -> Iterator[RetryableSession]:
|
||||
from api.attack_paths.database import (
|
||||
ClientStatementException,
|
||||
GraphDatabaseQueryException,
|
||||
WriteQueryNotAllowedException,
|
||||
)
|
||||
|
||||
session_wrapper: RetryableSession | None = None
|
||||
try:
|
||||
session_wrapper = RetryableSession(
|
||||
session_factory=lambda: self._get_driver().session(
|
||||
database=database, default_access_mode=default_access_mode
|
||||
),
|
||||
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
|
||||
)
|
||||
yield session_wrapper
|
||||
|
||||
except neo4j.exceptions.Neo4jError as exc:
|
||||
if (
|
||||
default_access_mode == neo4j.READ_ACCESS
|
||||
and exc.code
|
||||
and exc.code in READ_EXCEPTION_CODES
|
||||
):
|
||||
raise WriteQueryNotAllowedException(
|
||||
message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
|
||||
)
|
||||
|
||||
message = exc.message if exc.message is not None else str(exc)
|
||||
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
|
||||
raise ClientStatementException(message=message, code=exc.code)
|
||||
raise GraphDatabaseQueryException(message=message, code=exc.code)
|
||||
|
||||
finally:
|
||||
if session_wrapper is not None:
|
||||
session_wrapper.close()
|
||||
|
||||
# Operations
|
||||
|
||||
def execute_read_query(
|
||||
self,
|
||||
database: str,
|
||||
cypher: str,
|
||||
parameters: dict[str, Any] | None = None,
|
||||
) -> neo4j.graph.Graph:
|
||||
with self.get_session(
|
||||
database, default_access_mode=neo4j.READ_ACCESS
|
||||
) as session:
|
||||
|
||||
def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
|
||||
result = tx.run(
|
||||
cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
|
||||
)
|
||||
return result.graph()
|
||||
|
||||
return session.execute_read(_run)
|
||||
|
||||
def create_database(self, database: str) -> None:
|
||||
with self.get_session() as session:
|
||||
session.run(
|
||||
"CREATE DATABASE $database IF NOT EXISTS", {"database": database}
|
||||
)
|
||||
|
||||
def drop_database(self, database: str) -> None:
|
||||
with self.get_session() as session:
|
||||
session.run(f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA")
|
||||
|
||||
def drop_subgraph(self, database: str, provider_id: str) -> int:
|
||||
"""Delete all nodes for a provider from a tenant database, batched.
|
||||
|
||||
Deletes relationships then nodes in batches (not `DETACH DELETE`) so a
|
||||
dense provider's graph cannot exceed Neo4j's transaction memory limit.
|
||||
Silently returns 0 if the database doesn't exist.
|
||||
"""
|
||||
from api.attack_paths.database import GraphDatabaseQueryException
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
BATCH_SIZE,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
get_provider_label,
|
||||
)
|
||||
|
||||
provider_label = get_provider_label(provider_id)
|
||||
deleted_nodes = 0
|
||||
deleted_relationships = 0
|
||||
relationship_batches = 0
|
||||
node_batches = 0
|
||||
drop_t0 = time.perf_counter()
|
||||
|
||||
logger.info(
|
||||
"Dropping provider graph from Neo4j sink database %s "
|
||||
"(provider=%s, provider_label=%s)",
|
||||
database,
|
||||
provider_id,
|
||||
provider_label,
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info(
|
||||
"Opening Neo4j sink session for provider graph drop "
|
||||
"(database=%s, provider=%s)",
|
||||
database,
|
||||
provider_id,
|
||||
)
|
||||
with self.get_session(database) as session:
|
||||
logger.info(
|
||||
"Opened Neo4j sink session for provider graph drop "
|
||||
"(database=%s, provider=%s)",
|
||||
database,
|
||||
provider_id,
|
||||
)
|
||||
# Phase 1: delete relationships incident to provider nodes in
|
||||
# batches. The undirected pattern matches an edge between two
|
||||
# provider nodes from both ends, so `DISTINCT r` dedupes it to
|
||||
# delete a full batch of unique relationships each round.
|
||||
deleted_count = 1
|
||||
while deleted_count > 0:
|
||||
next_batch = relationship_batches + 1
|
||||
logger.info(
|
||||
"Deleting relationship batch from Neo4j sink database %s "
|
||||
"(provider=%s, batch=%s, total_rels=%s, elapsed=%.3fs)",
|
||||
database,
|
||||
provider_id,
|
||||
next_batch,
|
||||
deleted_relationships,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (:`{provider_label}`)-[r]-()
|
||||
WITH DISTINCT r LIMIT $batch_size
|
||||
DELETE r
|
||||
RETURN COUNT(r) AS deleted_rels_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
)
|
||||
deleted_count = result.single().get("deleted_rels_count", 0)
|
||||
if deleted_count > 0:
|
||||
relationship_batches += 1
|
||||
deleted_relationships += deleted_count
|
||||
logger.info(
|
||||
"Deleted relationship batch from Neo4j sink database %s "
|
||||
"(provider=%s, batch=%s, deleted_rels=%s, "
|
||||
"total_rels=%s, elapsed=%.3fs)",
|
||||
database,
|
||||
provider_id,
|
||||
relationship_batches,
|
||||
deleted_count,
|
||||
deleted_relationships,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
|
||||
# Phase 2: delete the now relationship-free nodes in batches.
|
||||
deleted_count = 1
|
||||
while deleted_count > 0:
|
||||
next_batch = node_batches + 1
|
||||
logger.info(
|
||||
"Deleting node batch from Neo4j sink database %s "
|
||||
"(provider=%s, batch=%s, total_nodes=%s, elapsed=%.3fs)",
|
||||
database,
|
||||
provider_id,
|
||||
next_batch,
|
||||
deleted_nodes,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`)
|
||||
WITH n LIMIT $batch_size
|
||||
DELETE n
|
||||
RETURN COUNT(n) AS deleted_nodes_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
)
|
||||
deleted_count = result.single().get("deleted_nodes_count", 0)
|
||||
if deleted_count > 0:
|
||||
node_batches += 1
|
||||
deleted_nodes += deleted_count
|
||||
logger.info(
|
||||
"Deleted node batch from Neo4j sink database %s "
|
||||
"(provider=%s, batch=%s, deleted_nodes=%s, "
|
||||
"total_nodes=%s, elapsed=%.3fs)",
|
||||
database,
|
||||
provider_id,
|
||||
node_batches,
|
||||
deleted_count,
|
||||
deleted_nodes,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
|
||||
except GraphDatabaseQueryException as exc:
|
||||
if exc.code == DATABASE_NOT_FOUND_CODE:
|
||||
logger.info(
|
||||
"Skipped provider graph drop from Neo4j sink database %s "
|
||||
"(provider=%s, reason=database_not_found, elapsed=%.3fs)",
|
||||
database,
|
||||
provider_id,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
return 0
|
||||
raise
|
||||
|
||||
logger.info(
|
||||
"Finished dropping provider graph from Neo4j sink database %s "
|
||||
"(provider=%s, relationship_batches=%s, deleted_rels=%s, "
|
||||
"node_batches=%s, deleted_nodes=%s, elapsed=%.3fs)",
|
||||
database,
|
||||
provider_id,
|
||||
relationship_batches,
|
||||
deleted_relationships,
|
||||
node_batches,
|
||||
deleted_nodes,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
return deleted_nodes
|
||||
|
||||
def has_provider_data(self, database: str, provider_id: str) -> bool:
|
||||
from api.attack_paths.database import GraphDatabaseQueryException
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
get_provider_label,
|
||||
)
|
||||
|
||||
provider_label = get_provider_label(provider_id)
|
||||
query = (
|
||||
f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
|
||||
)
|
||||
try:
|
||||
with self.get_session(
|
||||
database, default_access_mode=neo4j.READ_ACCESS
|
||||
) as session:
|
||||
result = session.run(query)
|
||||
return result.single() is not None
|
||||
|
||||
except GraphDatabaseQueryException as exc:
|
||||
if exc.code == DATABASE_NOT_FOUND_CODE:
|
||||
return False
|
||||
raise
|
||||
|
||||
def clear_cache(self, database: str) -> None:
|
||||
from api.attack_paths.database import GraphDatabaseQueryException
|
||||
|
||||
try:
|
||||
with self.get_session(database) as session:
|
||||
session.run("CALL db.clearQueryCaches()")
|
||||
except GraphDatabaseQueryException as exc:
|
||||
logger.warning(
|
||||
f"Failed to clear query cache for database `{database}`: {exc}"
|
||||
)
|
||||
|
||||
# Sync write path
|
||||
|
||||
def ensure_sync_indexes(self, database: str) -> None:
|
||||
"""Create the `_provider_element_id` lookup index on `_ProviderResource`.
|
||||
|
||||
Every synced node carries the `_ProviderResource` label, so a single
|
||||
index covers both node-upserts and relationship endpoint MATCHes.
|
||||
Without this index the rel sync degrades to a label scan per row and
|
||||
large provider syncs become unworkable.
|
||||
"""
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
PROVIDER_ELEMENT_ID_PROPERTY,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
)
|
||||
|
||||
query = (
|
||||
f"CREATE INDEX provider_element_id_idx IF NOT EXISTS "
|
||||
f"FOR (n:`{PROVIDER_RESOURCE_LABEL}`) "
|
||||
f"ON (n.`{PROVIDER_ELEMENT_ID_PROPERTY}`)"
|
||||
)
|
||||
with self.get_session(database) as session:
|
||||
session.run(query).consume()
|
||||
|
||||
def write_nodes(
|
||||
self,
|
||||
database: str,
|
||||
labels: str,
|
||||
rows: list[dict[str, Any]],
|
||||
) -> None:
|
||||
if not rows:
|
||||
return
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
PROVIDER_ELEMENT_ID_PROPERTY,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
)
|
||||
|
||||
query = f"""
|
||||
UNWIND $rows AS row
|
||||
MERGE (n:`{PROVIDER_RESOURCE_LABEL}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}})
|
||||
SET n:{labels}
|
||||
SET n += row.props
|
||||
"""
|
||||
with self.get_session(database) as session:
|
||||
session.run(query, {"rows": rows}).consume()
|
||||
|
||||
def write_relationships(
|
||||
self,
|
||||
database: str,
|
||||
rel_type: str,
|
||||
provider_id: str,
|
||||
rows: list[dict[str, Any]],
|
||||
) -> None:
|
||||
if not rows:
|
||||
return
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
PROVIDER_ELEMENT_ID_PROPERTY,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
get_provider_label,
|
||||
)
|
||||
|
||||
provider_label = get_provider_label(provider_id)
|
||||
query = f"""
|
||||
UNWIND $rows AS row
|
||||
MATCH (s:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.start_element_id}})
|
||||
MATCH (t:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.end_element_id}})
|
||||
MERGE (s)-[r:`{rel_type}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}}]->(t)
|
||||
SET r += row.props
|
||||
"""
|
||||
with self.get_session(database) as session:
|
||||
session.run(query, {"rows": rows}).consume()
|
||||
|
||||
# For compatibility with test harnesses that patch the concrete driver
|
||||
def get_driver(self) -> neo4j.Driver:
|
||||
return self._get_driver()
|
||||
|
||||
|
||||
# Helper for tests / external callers that want a writer session specifically
|
||||
def get_read_session(
|
||||
sink: Neo4jSink, database: str
|
||||
) -> AbstractContextManager[RetryableSession]:
|
||||
return sink.get_session(database, default_access_mode=neo4j.READ_ACCESS)
|
||||
@@ -0,0 +1,524 @@
|
||||
"""AWS Neptune sink implementation.
|
||||
|
||||
Dual Bolt drivers: one against the writer endpoint for workers, one against
|
||||
the reader endpoint for the API read path. If `NEPTUNE_READER_ENDPOINT` is
|
||||
unset the reader falls back to the writer driver so single-node clusters work.
|
||||
|
||||
Neptune is single-database. The `database` argument on the SinkDatabase
|
||||
protocol is ignored; tenant / provider isolation is enforced by labels that
|
||||
the sync step already writes on every node (see tasks/jobs/attack_paths/sync.py).
|
||||
|
||||
SigV4 auth lives at the bottom of this file as `neptune_auth_provider`. The
|
||||
neo4j driver invokes the returned callable on each token refresh.
|
||||
"""
|
||||
|
||||
import atexit
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Callable, Iterator
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
import neo4j
|
||||
import neo4j.exceptions
|
||||
from api.attack_paths.retryable_session import RetryableSession
|
||||
from api.attack_paths.sink.base import SinkDatabase
|
||||
from botocore.auth import SigV4Auth
|
||||
from botocore.awsrequest import AWSRequest
|
||||
from botocore.session import Session as BotoSession
|
||||
from config.env import env
|
||||
from django.conf import settings
|
||||
from neo4j.auth_management import AuthManagers, ExpiringAuth
|
||||
|
||||
logging.getLogger("neo4j").setLevel(logging.ERROR)
|
||||
logging.getLogger("neo4j").propagate = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SERVICE_UNAVAILABLE_MAX_RETRIES = env.int(
|
||||
"ATTACK_PATHS_SERVICE_UNAVAILABLE_MAX_RETRIES", default=3
|
||||
)
|
||||
READ_QUERY_TIMEOUT_SECONDS = env.int(
|
||||
"ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30
|
||||
)
|
||||
# Neptune serverless cold-start can be >30s; give the driver room
|
||||
CONN_ACQUISITION_TIMEOUT = env.int("NEPTUNE_CONN_ACQUISITION_TIMEOUT", default=60)
|
||||
# TCP connect timeout, ordered below the acquisition timeout so an unreachable
|
||||
# endpoint can't pin a request or the readiness probe longer than this. Kept
|
||||
# generous: cold-start delays query execution, not the socket connect.
|
||||
CONNECTION_TIMEOUT = env.int("NEPTUNE_CONNECTION_TIMEOUT", default=10)
|
||||
# Roll connections hourly so SigV4 rotations and cert refreshes don't strand long-lived pool entries
|
||||
MAX_CONNECTION_LIFETIME = env.int("NEPTUNE_MAX_CONNECTION_LIFETIME", default=3600)
|
||||
MAX_CONNECTION_POOL_SIZE = env.int("NEPTUNE_MAX_CONNECTION_POOL_SIZE", default=50)
|
||||
|
||||
READ_EXCEPTION_CODES = [
|
||||
"Neo.ClientError.Statement.AccessMode",
|
||||
"Neo.ClientError.Procedure.ProcedureNotFound",
|
||||
]
|
||||
CLIENT_STATEMENT_EXCEPTION_PREFIX = "Neo.ClientError.Statement."
|
||||
|
||||
# Refresh 60s before the 5-minute SigV4 window closes
|
||||
SIGV4_TOKEN_LIFETIME_MINUTES = 4
|
||||
|
||||
|
||||
class NeptuneSink(SinkDatabase):
|
||||
"""Neptune-backed sink. Single database; isolation is label-based."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._writer: neo4j.Driver | None = None
|
||||
self._reader: neo4j.Driver | None = None
|
||||
self._lock = threading.Lock()
|
||||
self._atexit_registered = False
|
||||
|
||||
# Config
|
||||
|
||||
def _config(self) -> dict:
|
||||
return settings.DATABASES["neptune"]
|
||||
|
||||
def _bolt_uri(self, endpoint: str, port: str) -> str:
|
||||
return f"bolt+s://{endpoint}:{port}"
|
||||
|
||||
def _https_url(self, endpoint: str, port: str) -> str:
|
||||
return f"https://{endpoint}:{port}"
|
||||
|
||||
def _build_driver(self, endpoint: str) -> neo4j.Driver:
|
||||
cfg = self._config()
|
||||
port = cfg["PORT"]
|
||||
region = cfg["REGION"]
|
||||
if not endpoint or not region:
|
||||
raise RuntimeError(
|
||||
"NEPTUNE_WRITER_ENDPOINT and AWS_REGION must be set when "
|
||||
"ATTACK_PATHS_SINK_DATABASE=neptune"
|
||||
)
|
||||
return neo4j.GraphDatabase.driver(
|
||||
self._bolt_uri(endpoint, port),
|
||||
auth=AuthManagers.bearer(
|
||||
neptune_auth_provider(region, self._https_url(endpoint, port))
|
||||
),
|
||||
keep_alive=True,
|
||||
max_connection_lifetime=MAX_CONNECTION_LIFETIME,
|
||||
connection_timeout=CONNECTION_TIMEOUT,
|
||||
connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT,
|
||||
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
|
||||
max_transaction_retry_time=0,
|
||||
)
|
||||
|
||||
# Lifecycle
|
||||
|
||||
def init(self) -> None:
|
||||
if self._writer is not None:
|
||||
return
|
||||
with self._lock:
|
||||
if self._writer is None:
|
||||
cfg = self._config()
|
||||
writer_endpoint = cfg["WRITER_ENDPOINT"]
|
||||
reader_endpoint = cfg["READER_ENDPOINT"] or writer_endpoint
|
||||
|
||||
# Eager connectivity checks are best-effort
|
||||
# A Neptune that is down at boot must not crash the process, same degradation model as Postgres
|
||||
# Drivers reconnect lazily on first use
|
||||
# /health/ready surfaces the outage until it recovers
|
||||
self._writer = self._build_driver(writer_endpoint)
|
||||
self._verify_best_effort(self._writer, "writer")
|
||||
|
||||
if reader_endpoint == writer_endpoint:
|
||||
self._reader = self._writer
|
||||
|
||||
else:
|
||||
self._reader = self._build_driver(reader_endpoint)
|
||||
self._verify_best_effort(self._reader, "reader")
|
||||
|
||||
if not self._atexit_registered:
|
||||
atexit.register(self.close)
|
||||
self._atexit_registered = True
|
||||
|
||||
def close(self) -> None:
|
||||
with self._lock:
|
||||
# `Driver.close()` is idempotent, so closing the same driver twice
|
||||
# (when reader aliases writer on single-endpoint configs) is safe
|
||||
for driver in (self._reader, self._writer):
|
||||
if driver is None:
|
||||
continue
|
||||
try:
|
||||
driver.close()
|
||||
except Exception: # pragma: no cover - best-effort
|
||||
pass
|
||||
self._writer = None
|
||||
self._reader = None
|
||||
|
||||
# Sessions
|
||||
|
||||
def _get_writer(self) -> neo4j.Driver:
|
||||
self.init()
|
||||
assert self._writer is not None
|
||||
return self._writer
|
||||
|
||||
def _get_reader(self) -> neo4j.Driver:
|
||||
self.init()
|
||||
assert self._reader is not None
|
||||
return self._reader
|
||||
|
||||
@staticmethod
|
||||
def _verify_best_effort(driver: neo4j.Driver, role: str) -> None:
|
||||
try:
|
||||
driver.verify_connectivity()
|
||||
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Neptune %s endpoint unreachable at init; continuing with a lazily-reconnecting driver",
|
||||
role,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def verify_connectivity(self) -> None:
|
||||
# The API read path uses the reader driver
|
||||
# On single-endpoint clusters it aliases the writer, so this also covers the writer
|
||||
# A writer-only outage is a workers' concern (no HTTP probe there) and deliberately does not fail API readiness
|
||||
self._get_reader().verify_connectivity()
|
||||
|
||||
@contextmanager
|
||||
def get_session(
|
||||
self,
|
||||
database: str | None = None, # noqa: ARG002 - ignored on Neptune
|
||||
default_access_mode: str | None = None,
|
||||
) -> Iterator[RetryableSession]:
|
||||
from api.attack_paths.database import (
|
||||
ClientStatementException,
|
||||
GraphDatabaseQueryException,
|
||||
WriteQueryNotAllowedException,
|
||||
)
|
||||
|
||||
driver = (
|
||||
self._get_reader()
|
||||
if default_access_mode == neo4j.READ_ACCESS
|
||||
else self._get_writer()
|
||||
)
|
||||
|
||||
session_wrapper: RetryableSession | None = None
|
||||
try:
|
||||
session_wrapper = RetryableSession(
|
||||
session_factory=lambda: driver.session(
|
||||
default_access_mode=default_access_mode
|
||||
),
|
||||
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
|
||||
)
|
||||
yield session_wrapper
|
||||
|
||||
except neo4j.exceptions.Neo4jError as exc:
|
||||
if (
|
||||
default_access_mode == neo4j.READ_ACCESS
|
||||
and exc.code
|
||||
and exc.code in READ_EXCEPTION_CODES
|
||||
):
|
||||
raise WriteQueryNotAllowedException(
|
||||
message="Read query not allowed", code=READ_EXCEPTION_CODES[0]
|
||||
)
|
||||
|
||||
message = exc.message if exc.message is not None else str(exc)
|
||||
if exc.code and exc.code.startswith(CLIENT_STATEMENT_EXCEPTION_PREFIX):
|
||||
raise ClientStatementException(message=message, code=exc.code)
|
||||
raise GraphDatabaseQueryException(message=message, code=exc.code)
|
||||
|
||||
finally:
|
||||
if session_wrapper is not None:
|
||||
session_wrapper.close()
|
||||
|
||||
# Operations
|
||||
|
||||
def execute_read_query(
|
||||
self,
|
||||
database: str, # noqa: ARG002 - ignored on Neptune
|
||||
cypher: str,
|
||||
parameters: dict[str, Any] | None = None,
|
||||
) -> neo4j.graph.Graph:
|
||||
with self.get_session(default_access_mode=neo4j.READ_ACCESS) as session:
|
||||
|
||||
def _run(tx: neo4j.ManagedTransaction) -> neo4j.graph.Graph:
|
||||
result = tx.run(
|
||||
cypher, parameters or {}, timeout=READ_QUERY_TIMEOUT_SECONDS
|
||||
)
|
||||
return result.graph()
|
||||
|
||||
return session.execute_read(_run)
|
||||
|
||||
def create_database(self, database: str) -> None: # noqa: ARG002
|
||||
# Neptune clusters are single-database; there is nothing to create.
|
||||
return None
|
||||
|
||||
def drop_database(self, database: str) -> None: # noqa: ARG002
|
||||
# Neptune clusters are single-database; there is nothing to drop.
|
||||
return None
|
||||
|
||||
def drop_subgraph(self, database: str, provider_id: str) -> int: # noqa: ARG002
|
||||
"""Delete a provider's subgraph in two bounded phases.
|
||||
|
||||
Neptune write transactions are capped at ~2 minutes. A naive
|
||||
`DETACH DELETE` on a label-scanned batch grows unbounded with graph
|
||||
density (one node can drag thousands of relationships into the same
|
||||
transaction). Instead:
|
||||
|
||||
1. Delete relationships incident to provider nodes, one fixed-size
|
||||
batch per transaction.
|
||||
2. Delete the now-orphaned nodes, one fixed-size batch per transaction.
|
||||
|
||||
Each transaction does work proportional to `batch_size`, never to the
|
||||
graph's branching factor.
|
||||
"""
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
BATCH_SIZE,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
get_provider_label,
|
||||
)
|
||||
|
||||
provider_label = get_provider_label(provider_id)
|
||||
deleted_relationships = 0
|
||||
relationship_batches = 0
|
||||
node_batches = 0
|
||||
drop_t0 = time.perf_counter()
|
||||
|
||||
logger.info(
|
||||
"Dropping provider graph from Neptune sink "
|
||||
"(provider=%s, provider_label=%s)",
|
||||
provider_id,
|
||||
provider_label,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Opening Neptune writer session for provider graph drop (provider=%s)",
|
||||
provider_id,
|
||||
)
|
||||
with self.get_session() as session:
|
||||
logger.info(
|
||||
"Opened Neptune writer session for provider graph drop (provider=%s)",
|
||||
provider_id,
|
||||
)
|
||||
while True:
|
||||
next_batch = relationship_batches + 1
|
||||
logger.info(
|
||||
"Deleting relationship batch from Neptune sink "
|
||||
"(provider=%s, batch=%s, total_rels=%s, elapsed=%.3fs)",
|
||||
provider_id,
|
||||
next_batch,
|
||||
deleted_relationships,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (:`{provider_label}`)-[r]-()
|
||||
WITH DISTINCT r LIMIT $batch_size
|
||||
DELETE r
|
||||
RETURN COUNT(r) AS deleted_rels_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
)
|
||||
record = result.single()
|
||||
deleted_rels = (record["deleted_rels_count"] if record else 0) or 0
|
||||
if deleted_rels == 0:
|
||||
break
|
||||
relationship_batches += 1
|
||||
deleted_relationships += deleted_rels
|
||||
logger.info(
|
||||
"Deleted relationship batch from Neptune sink "
|
||||
"(provider=%s, batch=%s, deleted_rels=%s, total_rels=%s, "
|
||||
"elapsed=%.3fs)",
|
||||
provider_id,
|
||||
relationship_batches,
|
||||
deleted_rels,
|
||||
deleted_relationships,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
|
||||
deleted_nodes = 0
|
||||
while True:
|
||||
next_batch = node_batches + 1
|
||||
logger.info(
|
||||
"Deleting node batch from Neptune sink "
|
||||
"(provider=%s, batch=%s, total_nodes=%s, elapsed=%.3fs)",
|
||||
provider_id,
|
||||
next_batch,
|
||||
deleted_nodes,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
result = session.run(
|
||||
f"""
|
||||
MATCH (n:`{PROVIDER_RESOURCE_LABEL}`:`{provider_label}`)
|
||||
WITH n LIMIT $batch_size
|
||||
DELETE n
|
||||
RETURN COUNT(n) AS deleted_nodes_count
|
||||
""",
|
||||
{"batch_size": BATCH_SIZE},
|
||||
)
|
||||
record = result.single()
|
||||
deleted = (record["deleted_nodes_count"] if record else 0) or 0
|
||||
if deleted == 0:
|
||||
break
|
||||
node_batches += 1
|
||||
deleted_nodes += deleted
|
||||
logger.info(
|
||||
"Deleted node batch from Neptune sink "
|
||||
"(provider=%s, batch=%s, deleted_nodes=%s, total_nodes=%s, "
|
||||
"elapsed=%.3fs)",
|
||||
provider_id,
|
||||
node_batches,
|
||||
deleted,
|
||||
deleted_nodes,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Finished dropping provider graph from Neptune sink "
|
||||
"(provider=%s, relationship_batches=%s, deleted_rels=%s, "
|
||||
"node_batches=%s, deleted_nodes=%s, elapsed=%.3fs)",
|
||||
provider_id,
|
||||
relationship_batches,
|
||||
deleted_relationships,
|
||||
node_batches,
|
||||
deleted_nodes,
|
||||
time.perf_counter() - drop_t0,
|
||||
)
|
||||
return deleted_nodes
|
||||
|
||||
def has_provider_data(self, database: str, provider_id: str) -> bool: # noqa: ARG002
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
get_provider_label,
|
||||
)
|
||||
|
||||
provider_label = get_provider_label(provider_id)
|
||||
query = (
|
||||
f"MATCH (n:{PROVIDER_RESOURCE_LABEL}:`{provider_label}`) RETURN 1 LIMIT 1"
|
||||
)
|
||||
with self.get_session(default_access_mode=neo4j.READ_ACCESS) as session:
|
||||
result = session.run(query)
|
||||
return result.single() is not None
|
||||
|
||||
def clear_cache(self, database: str) -> None: # noqa: ARG002
|
||||
# Neptune has no user-facing cache-clear procedure; no-op.
|
||||
return None
|
||||
|
||||
# Sync write path
|
||||
|
||||
def ensure_sync_indexes(self, database: str) -> None: # noqa: ARG002
|
||||
# Neptune routes node and relationship lookups through `~id`, which is the cluster's primary key
|
||||
# No additional index is needed or supported
|
||||
return None
|
||||
|
||||
def write_nodes(
|
||||
self,
|
||||
database: str, # noqa: ARG002
|
||||
labels: str,
|
||||
rows: list[dict[str, Any]],
|
||||
) -> None:
|
||||
if not rows:
|
||||
return
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
PROVIDER_ELEMENT_ID_PROPERTY,
|
||||
PROVIDER_RESOURCE_LABEL,
|
||||
)
|
||||
|
||||
# MERGE on `~id` is the documented and engine-optimized idempotent
|
||||
# upsert pattern for Neptune openCypher. The label inside the MERGE
|
||||
# matters: Neptune assigns a default `vertex` label to any node
|
||||
# created without an explicit one, so we pin `_ProviderResource`
|
||||
# (which every synced node carries anyway) at MERGE-time. Additional
|
||||
# labels are added after
|
||||
#
|
||||
# We also write `_provider_element_id` as a regular property so
|
||||
# non-sync code (drop_subgraph, query helpers) keeps a stable contract
|
||||
# that doesn't know about `~id`
|
||||
query = f"""
|
||||
UNWIND $rows AS row
|
||||
MERGE (n:`{PROVIDER_RESOURCE_LABEL}` {{`~id`: row.provider_element_id}})
|
||||
SET n:{labels}
|
||||
SET n += row.props
|
||||
SET n.`{PROVIDER_ELEMENT_ID_PROPERTY}` = row.provider_element_id
|
||||
"""
|
||||
with self.get_session() as session:
|
||||
session.run(query, {"rows": rows}).consume()
|
||||
|
||||
def write_relationships(
|
||||
self,
|
||||
database: str, # noqa: ARG002
|
||||
rel_type: str,
|
||||
provider_id: str, # noqa: ARG002 - encoded in start/end `~id` already
|
||||
rows: list[dict[str, Any]],
|
||||
) -> None:
|
||||
if not rows:
|
||||
return
|
||||
from tasks.jobs.attack_paths.config import PROVIDER_ELEMENT_ID_PROPERTY
|
||||
|
||||
# `id(n) = $value` is Neptune's parameterized fast path; both endpoint
|
||||
# MATCHes resolve in O(1) via the system `~id`, so per-row work stays
|
||||
# bounded regardless of batch size
|
||||
query = f"""
|
||||
UNWIND $rows AS row
|
||||
MATCH (s) WHERE id(s) = row.start_element_id
|
||||
MATCH (e) WHERE id(e) = row.end_element_id
|
||||
MERGE (s)-[r:`{rel_type}` {{`{PROVIDER_ELEMENT_ID_PROPERTY}`: row.provider_element_id}}]->(e)
|
||||
SET r += row.props
|
||||
"""
|
||||
with self.get_session() as session:
|
||||
session.run(query, {"rows": rows}).consume()
|
||||
|
||||
# Test helpers
|
||||
|
||||
def get_writer(self) -> neo4j.Driver:
|
||||
return self._get_writer()
|
||||
|
||||
def get_reader(self) -> neo4j.Driver:
|
||||
return self._get_reader()
|
||||
|
||||
|
||||
# SigV4 auth provider
|
||||
|
||||
|
||||
class _NeptuneAuthToken(neo4j.Auth):
|
||||
"""Neo4j Auth backed by a SigV4-signed GET to `/opencypher`."""
|
||||
|
||||
def __init__(self, region: str, url: str) -> None:
|
||||
session = BotoSession()
|
||||
credentials = session.get_credentials()
|
||||
if credentials is None:
|
||||
raise RuntimeError(
|
||||
"No AWS credentials available for Neptune SigV4 signing. "
|
||||
"Ensure the boto3 credential chain can resolve."
|
||||
)
|
||||
credentials = credentials.get_frozen_credentials()
|
||||
|
||||
request = AWSRequest(method="GET", url=url + "/opencypher")
|
||||
# SigV4 canonical Host must carry the real `host:port`
|
||||
# Neptune runs on a non-default port (8182), so `.hostname` would drop it and break signing
|
||||
request.headers.add_header("Host", urlsplit(url).netloc)
|
||||
SigV4Auth(credentials, "neptune-db", region).add_auth(request)
|
||||
|
||||
auth_obj = {
|
||||
header: request.headers[header]
|
||||
for header in (
|
||||
"Authorization",
|
||||
"X-Amz-Date",
|
||||
"X-Amz-Security-Token",
|
||||
"Host",
|
||||
)
|
||||
if header in request.headers
|
||||
}
|
||||
auth_obj["HttpMethod"] = "GET"
|
||||
|
||||
super().__init__("basic", "username", json.dumps(auth_obj))
|
||||
|
||||
|
||||
def neptune_auth_provider(region: str, https_url: str) -> Callable[[], ExpiringAuth]:
|
||||
"""Return a callable the neo4j driver can invoke to refresh credentials."""
|
||||
|
||||
def _provider() -> ExpiringAuth:
|
||||
token = _NeptuneAuthToken(region, https_url)
|
||||
expires_at = (
|
||||
datetime.datetime.now(datetime.UTC)
|
||||
+ datetime.timedelta(minutes=SIGV4_TOKEN_LIFETIME_MINUTES)
|
||||
).timestamp()
|
||||
return ExpiringAuth(auth=token, expires_at=expires_at)
|
||||
|
||||
return _provider
|
||||
@@ -1,12 +1,11 @@
|
||||
import logging
|
||||
|
||||
from typing import Any, Iterable
|
||||
from collections.abc import Iterable
|
||||
from typing import Any
|
||||
|
||||
import neo4j
|
||||
|
||||
from rest_framework.exceptions import APIException, PermissionDenied, ValidationError
|
||||
|
||||
from api.attack_paths import database as graph_database, AttackPathsQueryDefinition
|
||||
from api.attack_paths import AttackPathsQueryDefinition
|
||||
from api.attack_paths import database as graph_database
|
||||
from api.attack_paths import sink as sink_module
|
||||
from api.attack_paths.cypher_sanitizer import (
|
||||
inject_provider_label,
|
||||
validate_custom_query,
|
||||
@@ -16,7 +15,10 @@ from api.attack_paths.queries.schema import (
|
||||
RAW_SCHEMA_URL,
|
||||
get_cartography_schema_query,
|
||||
)
|
||||
from api.models import AttackPathsScan
|
||||
from config.custom_logging import BackendLogger
|
||||
from config.env import env
|
||||
from rest_framework.exceptions import APIException, PermissionDenied, ValidationError
|
||||
from tasks.jobs.attack_paths.config import (
|
||||
INTERNAL_LABELS,
|
||||
INTERNAL_PROPERTIES,
|
||||
@@ -27,6 +29,10 @@ from tasks.jobs.attack_paths.config import (
|
||||
logger = logging.getLogger(BackendLogger.API)
|
||||
|
||||
|
||||
def _custom_query_timeout_ms() -> int:
|
||||
return env.int("ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30) * 1000
|
||||
|
||||
|
||||
# Predefined query helpers
|
||||
|
||||
|
||||
@@ -103,13 +109,13 @@ def execute_query(
|
||||
definition: AttackPathsQueryDefinition,
|
||||
parameters: dict[str, Any],
|
||||
provider_id: str,
|
||||
scan: AttackPathsScan,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
graph = graph_database.execute_read_query(
|
||||
database=database_name,
|
||||
cypher=definition.cypher,
|
||||
parameters=parameters,
|
||||
)
|
||||
# TODO: drop after Neptune cutover
|
||||
# Route reads by the scan row's recorded sink, not by current settings.
|
||||
backend = sink_module.get_backend_for_scan(scan)
|
||||
graph = backend.execute_read_query(database_name, definition.cypher, parameters)
|
||||
return _serialize_graph(graph, provider_id)
|
||||
|
||||
except graph_database.WriteQueryNotAllowedException:
|
||||
@@ -143,22 +149,31 @@ def execute_custom_query(
|
||||
database_name: str,
|
||||
cypher: str,
|
||||
provider_id: str,
|
||||
scan: AttackPathsScan,
|
||||
) -> dict[str, Any]:
|
||||
# Defense-in-depth for custom queries:
|
||||
# 1. neo4j.READ_ACCESS — prevents mutations at the driver level
|
||||
# 2. inject_provider_label() — regex-based label injection scopes node patterns
|
||||
# 3. _serialize_graph() — post-query filter drops nodes without the provider label
|
||||
# 1. `neo4j.READ_ACCESS` — prevents mutations at the driver level
|
||||
# 2. `inject_provider_label()` — regex-based label injection scopes node patterns
|
||||
# 3. `_serialize_graph()` — post-query filter drops nodes without the provider label
|
||||
# 4. `USING QUERY:TIMEOUTMILLISECONDS` on Neptune — server-side runaway cutoff
|
||||
#
|
||||
# Layer 2 is best-effort (regex can't fully parse Cypher);
|
||||
# layer 3 is the safety net that guarantees provider isolation.
|
||||
validate_custom_query(cypher)
|
||||
cypher = inject_provider_label(cypher, provider_id)
|
||||
|
||||
# TODO: drop after Neptune cutover
|
||||
backend = sink_module.get_backend_for_scan(scan)
|
||||
|
||||
# Neptune enforces a cluster-level query timeout; prepending the hint
|
||||
# makes the limit explicit and matches the client-side read timeout.
|
||||
# Applies only when the scan's graph lives in Neptune.
|
||||
if getattr(scan, "sink_backend", None) == "neptune":
|
||||
timeout_ms = _custom_query_timeout_ms()
|
||||
cypher = f"USING QUERY:TIMEOUTMILLISECONDS {timeout_ms}\n{cypher}"
|
||||
|
||||
try:
|
||||
graph = graph_database.execute_read_query(
|
||||
database=database_name,
|
||||
cypher=cypher,
|
||||
)
|
||||
graph = backend.execute_read_query(database_name, cypher, None)
|
||||
serialized = _serialize_graph(graph, provider_id)
|
||||
return _truncate_graph(serialized)
|
||||
|
||||
@@ -181,10 +196,11 @@ def execute_custom_query(
|
||||
|
||||
|
||||
def get_cartography_schema(
|
||||
database_name: str, provider_id: str
|
||||
database_name: str, provider_id: str, scan: AttackPathsScan
|
||||
) -> dict[str, str] | None:
|
||||
try:
|
||||
with graph_database.get_session(
|
||||
backend = sink_module.get_backend_for_scan(scan)
|
||||
with backend.get_session(
|
||||
database_name, default_access_mode=neo4j.READ_ACCESS
|
||||
) as session:
|
||||
result = session.run(get_cartography_schema_query(provider_id))
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
from typing import Optional, Tuple
|
||||
from math import isfinite
|
||||
from uuid import UUID
|
||||
|
||||
from api.db_router import MainRouter
|
||||
from api.models import TenantAPIKey, TenantAPIKeyManager
|
||||
from cryptography.fernet import InvalidToken
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.utils import timezone
|
||||
from drf_simple_apikey.backends import APIKeyAuthentication as BaseAPIKeyAuth
|
||||
from drf_simple_apikey.crypto import get_crypto
|
||||
from drf_simple_apikey.settings import package_settings
|
||||
from rest_framework.authentication import BaseAuthentication
|
||||
from rest_framework.exceptions import AuthenticationFailed
|
||||
from rest_framework.request import Request
|
||||
from rest_framework_simplejwt.authentication import JWTAuthentication
|
||||
|
||||
from api.db_router import MainRouter
|
||||
from api.models import TenantAPIKey, TenantAPIKeyManager
|
||||
|
||||
|
||||
class TenantAPIKeyAuthentication(BaseAPIKeyAuth):
|
||||
model = TenantAPIKey
|
||||
@@ -23,18 +24,49 @@ class TenantAPIKeyAuthentication(BaseAPIKeyAuth):
|
||||
def _authenticate_credentials(self, request, key):
|
||||
"""
|
||||
Override to use admin connection, bypassing RLS during authentication.
|
||||
Delegates to parent after temporarily routing model queries to admin DB.
|
||||
"""
|
||||
# Temporarily point the model's manager to admin database
|
||||
original_objects = self.model.objects
|
||||
self.model.objects = self.model.objects.using(MainRouter.admin_db)
|
||||
try:
|
||||
payload = self.key_crypto.decrypt(key)
|
||||
except ValueError:
|
||||
raise AuthenticationFailed("Invalid API Key.")
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
raise AuthenticationFailed("Invalid API Key.")
|
||||
|
||||
payload_pk = payload.get("_pk")
|
||||
payload_exp = payload.get("_exp")
|
||||
if (
|
||||
not isinstance(payload_pk, str)
|
||||
or isinstance(payload_exp, bool)
|
||||
or not isinstance(payload_exp, (int, float))
|
||||
or not isfinite(payload_exp)
|
||||
):
|
||||
raise AuthenticationFailed("Invalid API Key.")
|
||||
|
||||
try:
|
||||
# Call parent method which will now use admin database
|
||||
return super()._authenticate_credentials(request, key)
|
||||
finally:
|
||||
# Restore original manager
|
||||
self.model.objects = original_objects
|
||||
api_key_pk = UUID(payload_pk)
|
||||
except ValueError:
|
||||
raise AuthenticationFailed("Invalid API Key.")
|
||||
|
||||
if payload_exp < timezone.now().timestamp():
|
||||
raise AuthenticationFailed("API Key has already expired.")
|
||||
|
||||
try:
|
||||
api_key = self.model.objects.using(MainRouter.admin_db).get(id=api_key_pk)
|
||||
except ObjectDoesNotExist:
|
||||
raise AuthenticationFailed("No entity matching this api key.")
|
||||
|
||||
if api_key.revoked:
|
||||
raise AuthenticationFailed("This API Key has been revoked.")
|
||||
|
||||
client_ip = request.META.get(package_settings.IP_ADDRESS_HEADER)
|
||||
if api_key.blacklisted_ips and client_ip in api_key.blacklisted_ips:
|
||||
raise AuthenticationFailed("Access denied from blacklisted IP.")
|
||||
|
||||
if api_key.whitelisted_ips and client_ip not in api_key.whitelisted_ips:
|
||||
raise AuthenticationFailed("Access restricted to specific IP addresses.")
|
||||
|
||||
return api_key.entity, key
|
||||
|
||||
def authenticate(self, request: Request):
|
||||
prefixed_key = self.get_key(request)
|
||||
@@ -81,7 +113,7 @@ class CombinedJWTOrAPIKeyAuthentication(BaseAuthentication):
|
||||
jwt_auth = JWTAuthentication()
|
||||
api_key_auth = TenantAPIKeyAuthentication()
|
||||
|
||||
def authenticate(self, request: Request) -> Optional[Tuple[object, dict]]:
|
||||
def authenticate(self, request: Request) -> tuple[object, dict] | None:
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
|
||||
# Prioritize JWT authentication if both are present
|
||||
@@ -93,3 +125,30 @@ class CombinedJWTOrAPIKeyAuthentication(BaseAuthentication):
|
||||
|
||||
# Default fallback
|
||||
return self.jwt_auth.authenticate(request)
|
||||
|
||||
|
||||
class SSEAuthentication(CombinedJWTOrAPIKeyAuthentication):
|
||||
"""JWT/API-Key auth that also accepts `?access_token=<jwt>`.
|
||||
|
||||
Browser `EventSource` is the only widely available SSE client API
|
||||
and it cannot set the `Authorization` header (its constructor takes
|
||||
only a URL and `withCredentials`). To keep browser SSE clients on
|
||||
the same auth stack as the rest of the API, SSE endpoints additionally
|
||||
accept a JWT via the `?access_token=<jwt>` query parameter — the
|
||||
standard parameter name defined in RFC 6750 Section 2.3 for bearer tokens.
|
||||
"""
|
||||
|
||||
def authenticate(self, request: Request):
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
if auth_header:
|
||||
return super().authenticate(request)
|
||||
|
||||
raw_token = request.query_params.get("access_token")
|
||||
if not raw_token:
|
||||
# No header and no query token — let the default path raise
|
||||
# the canonical AuthenticationFailed via the parent class.
|
||||
return super().authenticate(request)
|
||||
|
||||
validated_token = self.jwt_auth.get_validated_token(raw_token)
|
||||
user = self.jwt_auth.get_user(validated_token)
|
||||
return user, validated_token
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
from api.authentication import CombinedJWTOrAPIKeyAuthentication
|
||||
from api.db_router import MainRouter, reset_read_db_alias, set_read_db_alias
|
||||
from api.db_utils import POSTGRES_USER_VAR, rls_transaction
|
||||
from api.filters import CustomDjangoFilterBackend
|
||||
from api.models import Role, UserRoleRelationship
|
||||
from api.rbac.permissions import HasPermissions
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from rest_framework import permissions
|
||||
@@ -8,13 +14,6 @@ from rest_framework.response import Response
|
||||
from rest_framework_json_api import filters
|
||||
from rest_framework_json_api.views import ModelViewSet
|
||||
|
||||
from api.authentication import CombinedJWTOrAPIKeyAuthentication
|
||||
from api.db_router import MainRouter, reset_read_db_alias, set_read_db_alias
|
||||
from api.db_utils import POSTGRES_USER_VAR, rls_transaction
|
||||
from api.filters import CustomDjangoFilterBackend
|
||||
from api.models import Role, UserRoleRelationship
|
||||
from api.rbac.permissions import HasPermissions
|
||||
|
||||
|
||||
class BaseViewSet(ModelViewSet):
|
||||
authentication_classes = [CombinedJWTOrAPIKeyAuthentication]
|
||||
|
||||
@@ -112,14 +112,14 @@ def get_compliance_frameworks(provider_type: Provider.ProviderChoices) -> list[s
|
||||
"""List compliance framework identifiers available for `provider_type`.
|
||||
|
||||
Includes both per-provider frameworks and universal top-level frameworks
|
||||
(e.g. ``dora``, ``csa_ccm_4.0``).
|
||||
(e.g. ``dora_2022_2554``, ``csa_ccm_4.0``).
|
||||
|
||||
Args:
|
||||
provider_type (Provider.ProviderChoices): The cloud provider type
|
||||
(e.g., "aws", "azure", "gcp", "m365").
|
||||
|
||||
Returns:
|
||||
list[str]: Framework identifiers (e.g., "cis_1.4_aws", "dora").
|
||||
list[str]: Framework identifiers (e.g., "cis_1.4_aws", "dora_2022_2554").
|
||||
"""
|
||||
global AVAILABLE_COMPLIANCE_FRAMEWORKS
|
||||
if provider_type not in AVAILABLE_COMPLIANCE_FRAMEWORKS:
|
||||
@@ -352,7 +352,7 @@ def generate_compliance_overview_template(
|
||||
total_requirements += 1
|
||||
provider_check_list = list(requirement.checks.get(provider_type, []))
|
||||
total_checks = len(provider_check_list)
|
||||
checks_dict = {check: None for check in provider_check_list}
|
||||
checks_dict = dict.fromkeys(provider_check_list)
|
||||
|
||||
req_status_val = "MANUAL" if total_checks == 0 else "PASS"
|
||||
|
||||
|
||||
@@ -3,8 +3,14 @@ import secrets
|
||||
import time
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from api.db_router import (
|
||||
READ_REPLICA_ALIAS,
|
||||
get_read_db_alias,
|
||||
reset_read_db_alias,
|
||||
set_read_db_alias,
|
||||
)
|
||||
from celery.utils.log import get_task_logger
|
||||
from config.env import env
|
||||
from django.conf import settings
|
||||
@@ -22,13 +28,6 @@ from psycopg2 import sql as psycopg2_sql
|
||||
from psycopg2.extensions import AsIs, new_type, register_adapter, register_type
|
||||
from rest_framework_json_api.serializers import ValidationError
|
||||
|
||||
from api.db_router import (
|
||||
READ_REPLICA_ALIAS,
|
||||
get_read_db_alias,
|
||||
reset_read_db_alias,
|
||||
set_read_db_alias,
|
||||
)
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
DB_USER = settings.DATABASES["default"]["USER"] if not settings.TESTING else "test"
|
||||
@@ -170,7 +169,7 @@ def one_week_from_now():
|
||||
"""
|
||||
Return a datetime object with a date one week from now.
|
||||
"""
|
||||
return datetime.now(timezone.utc) + timedelta(days=7)
|
||||
return datetime.now(UTC) + timedelta(days=7)
|
||||
|
||||
|
||||
def generate_random_token(length: int = 14, symbols: str | None = None) -> str:
|
||||
@@ -405,10 +404,10 @@ def _should_create_index_on_partition(
|
||||
# Unknown month abbreviation, include it to be safe
|
||||
return True
|
||||
|
||||
partition_date = datetime(year, month, 1, tzinfo=timezone.utc)
|
||||
partition_date = datetime(year, month, 1, tzinfo=UTC)
|
||||
|
||||
# Get current month start
|
||||
now = datetime.now(timezone.utc)
|
||||
now = datetime.now(UTC)
|
||||
current_month_start = now.replace(
|
||||
day=1, hour=0, minute=0, second=0, microsecond=0
|
||||
)
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
import uuid
|
||||
from functools import wraps
|
||||
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.db import DatabaseError, connection, transaction
|
||||
from rest_framework_json_api.serializers import ValidationError
|
||||
|
||||
from api.db_router import READ_REPLICA_ALIAS
|
||||
from api.db_utils import POSTGRES_TENANT_VAR, SET_CONFIG_QUERY, rls_transaction
|
||||
from api.exceptions import ProviderDeletedException
|
||||
from api.models import Provider, Scan
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.db import DatabaseError, connection, transaction
|
||||
from rest_framework_json_api.serializers import ValidationError
|
||||
|
||||
|
||||
def set_tenant(func=None, *, keep_tenant=False):
|
||||
|
||||
+144
-32
@@ -1,19 +1,4 @@
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
|
||||
from dateutil.parser import parse
|
||||
from django.conf import settings
|
||||
from django.db.models import F, Q
|
||||
from django_filters.rest_framework import (
|
||||
BaseInFilter,
|
||||
BooleanFilter,
|
||||
CharFilter,
|
||||
ChoiceFilter,
|
||||
DateFilter,
|
||||
FilterSet,
|
||||
UUIDFilter,
|
||||
)
|
||||
from rest_framework_json_api.django_filters.backends import DjangoFilterBackend
|
||||
from rest_framework_json_api.serializers import ValidationError
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
|
||||
from api.constants import SEVERITY_ORDER
|
||||
from api.db_utils import (
|
||||
@@ -68,6 +53,20 @@ from api.uuid_utils import (
|
||||
uuid7_start,
|
||||
)
|
||||
from api.v1.serializers import TaskBase
|
||||
from dateutil.parser import parse
|
||||
from django.conf import settings
|
||||
from django.db.models import F, Q
|
||||
from django_filters.rest_framework import (
|
||||
BaseInFilter,
|
||||
BooleanFilter,
|
||||
CharFilter,
|
||||
ChoiceFilter,
|
||||
DateFilter,
|
||||
FilterSet,
|
||||
UUIDFilter,
|
||||
)
|
||||
from rest_framework_json_api.django_filters.backends import DjangoFilterBackend
|
||||
from rest_framework_json_api.serializers import ValidationError
|
||||
|
||||
|
||||
class CustomDjangoFilterBackend(DjangoFilterBackend):
|
||||
@@ -102,7 +101,7 @@ class BaseProviderFilter(FilterSet):
|
||||
"""
|
||||
Abstract base filter for models with direct FK to Provider.
|
||||
|
||||
Provides standard provider_id and provider_type filters.
|
||||
Provides standard provider_id, provider_type, and provider_groups filters.
|
||||
Subclasses must define Meta.model.
|
||||
"""
|
||||
|
||||
@@ -116,6 +115,16 @@ class BaseProviderFilter(FilterSet):
|
||||
choices=Provider.ProviderChoices.choices,
|
||||
lookup_expr="in",
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
@@ -126,7 +135,7 @@ class BaseScanProviderFilter(FilterSet):
|
||||
"""
|
||||
Abstract base filter for models with FK to Scan (and Scan has FK to Provider).
|
||||
|
||||
Provides standard provider_id and provider_type filters via scan relationship.
|
||||
Provides standard provider_id, provider_type, and provider_groups filters via scan relationship.
|
||||
Subclasses must define Meta.model.
|
||||
"""
|
||||
|
||||
@@ -140,6 +149,16 @@ class BaseScanProviderFilter(FilterSet):
|
||||
choices=Provider.ProviderChoices.choices,
|
||||
lookup_expr="in",
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
@@ -160,6 +179,16 @@ class CommonFindingFilters(FilterSet):
|
||||
provider_type__in = ChoiceInFilter(
|
||||
choices=Provider.ProviderChoices.choices, field_name="scan__provider__provider"
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
provider_uid = CharFilter(field_name="scan__provider__uid", lookup_expr="exact")
|
||||
provider_uid__in = CharInFilter(field_name="scan__provider__uid", lookup_expr="in")
|
||||
provider_uid__icontains = CharFilter(
|
||||
@@ -370,6 +399,12 @@ class ProviderFilter(FilterSet):
|
||||
choices=Provider.ProviderChoices.choices,
|
||||
lookup_expr="in",
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="provider_groups__id", lookup_expr="exact", distinct=True
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="provider_groups__id", lookup_expr="in", distinct=True
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = Provider
|
||||
@@ -395,6 +430,16 @@ class ProviderRelationshipFilterSet(FilterSet):
|
||||
provider_type__in = ChoiceInFilter(
|
||||
choices=Provider.ProviderChoices.choices, field_name="provider__provider"
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
provider_uid = CharFilter(field_name="provider__uid", lookup_expr="exact")
|
||||
provider_uid__in = CharInFilter(field_name="provider__uid", lookup_expr="in")
|
||||
provider_uid__icontains = CharFilter(
|
||||
@@ -552,12 +597,12 @@ class ResourceFilter(ProviderRelationshipFilterSet):
|
||||
gte_date = (
|
||||
parse(self.data.get("updated_at__gte")).date()
|
||||
if self.data.get("updated_at__gte")
|
||||
else datetime.now(timezone.utc).date()
|
||||
else datetime.now(UTC).date()
|
||||
)
|
||||
lte_date = (
|
||||
parse(self.data.get("updated_at__lte")).date()
|
||||
if self.data.get("updated_at__lte")
|
||||
else datetime.now(timezone.utc).date()
|
||||
else datetime.now(UTC).date()
|
||||
)
|
||||
|
||||
if abs(lte_date - gte_date) > timedelta(
|
||||
@@ -702,9 +747,9 @@ class FindingFilter(CommonFindingFilters):
|
||||
lte_date = cleaned.get("inserted_at__lte") or exact_date
|
||||
|
||||
if gte_date is None:
|
||||
gte_date = datetime.now(timezone.utc).date()
|
||||
gte_date = datetime.now(UTC).date()
|
||||
if lte_date is None:
|
||||
lte_date = datetime.now(timezone.utc).date()
|
||||
lte_date = datetime.now(UTC).date()
|
||||
|
||||
if abs(lte_date - gte_date) > timedelta(
|
||||
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
|
||||
@@ -798,7 +843,7 @@ class FindingFilter(CommonFindingFilters):
|
||||
def maybe_date_to_datetime(value):
|
||||
dt = value
|
||||
if isinstance(value, date):
|
||||
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
|
||||
dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
|
||||
return dt
|
||||
|
||||
|
||||
@@ -887,9 +932,9 @@ class FindingGroupFilter(CommonFindingFilters):
|
||||
lte_date = cleaned.get("inserted_at__lte") or exact_date
|
||||
|
||||
if gte_date is None:
|
||||
gte_date = datetime.now(timezone.utc).date()
|
||||
gte_date = datetime.now(UTC).date()
|
||||
if lte_date is None:
|
||||
lte_date = datetime.now(timezone.utc).date()
|
||||
lte_date = datetime.now(UTC).date()
|
||||
|
||||
if abs(lte_date - gte_date) > timedelta(
|
||||
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
|
||||
@@ -931,7 +976,7 @@ class FindingGroupFilter(CommonFindingFilters):
|
||||
"""Convert date to datetime if needed."""
|
||||
dt = value
|
||||
if isinstance(value, date):
|
||||
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
|
||||
dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
|
||||
return dt
|
||||
|
||||
|
||||
@@ -1001,6 +1046,16 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
|
||||
field_name="provider__provider", choices=Provider.ProviderChoices.choices
|
||||
)
|
||||
provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = FindingGroupDailySummary
|
||||
@@ -1035,9 +1090,9 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
|
||||
lte_date = cleaned.get("inserted_at__lte") or exact_date
|
||||
|
||||
if gte_date is None:
|
||||
gte_date = datetime.now(timezone.utc).date()
|
||||
gte_date = datetime.now(UTC).date()
|
||||
if lte_date is None:
|
||||
lte_date = datetime.now(timezone.utc).date()
|
||||
lte_date = datetime.now(UTC).date()
|
||||
|
||||
if abs(lte_date - gte_date) > timedelta(
|
||||
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
|
||||
@@ -1076,7 +1131,7 @@ class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
|
||||
def _maybe_date_to_datetime(value):
|
||||
dt = value
|
||||
if isinstance(value, date):
|
||||
dt = datetime.combine(value, datetime.min.time(), tzinfo=timezone.utc)
|
||||
dt = datetime.combine(value, datetime.min.time(), tzinfo=UTC)
|
||||
return dt
|
||||
|
||||
|
||||
@@ -1101,6 +1156,16 @@ class LatestFindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
|
||||
field_name="provider__provider", choices=Provider.ProviderChoices.choices
|
||||
)
|
||||
provider_type__in = CharInFilter(field_name="provider__provider", lookup_expr="in")
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = FindingGroupDailySummary
|
||||
@@ -1280,12 +1345,19 @@ class RoleFilter(FilterSet):
|
||||
}
|
||||
|
||||
|
||||
class ComplianceOverviewFilter(FilterSet):
|
||||
class ComplianceOverviewFilter(BaseScanProviderFilter):
|
||||
"""
|
||||
Keep provider filters in the schema while runtime filtering resolves scans first.
|
||||
|
||||
Compliance overview provider filters are applied to the latest completed scans
|
||||
in the viewset, then this filterset handles the remaining compliance fields.
|
||||
"""
|
||||
|
||||
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
|
||||
scan_id = UUIDFilter(field_name="scan_id", required=True)
|
||||
scan_id = UUIDFilter(field_name="scan_id")
|
||||
region = CharFilter(field_name="region")
|
||||
|
||||
class Meta:
|
||||
class Meta(BaseScanProviderFilter.Meta):
|
||||
model = ComplianceRequirementOverview
|
||||
fields = {
|
||||
"inserted_at": ["date", "gte", "lte"],
|
||||
@@ -1306,6 +1378,16 @@ class ScanSummaryFilter(FilterSet):
|
||||
provider_type__in = ChoiceInFilter(
|
||||
field_name="scan__provider__provider", choices=Provider.ProviderChoices.choices
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
region = CharFilter(field_name="region")
|
||||
|
||||
class Meta:
|
||||
@@ -1329,6 +1411,16 @@ class DailySeveritySummaryFilter(FilterSet):
|
||||
provider_type__in = ChoiceInFilter(
|
||||
field_name="provider__provider", choices=Provider.ProviderChoices.choices
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
date_from = DateFilter(method="filter_noop")
|
||||
date_to = DateFilter(method="filter_noop")
|
||||
|
||||
@@ -1585,6 +1677,16 @@ class ThreatScoreSnapshotFilter(FilterSet):
|
||||
choices=Provider.ProviderChoices.choices,
|
||||
lookup_expr="in",
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
compliance_id = CharFilter(field_name="compliance_id", lookup_expr="exact")
|
||||
compliance_id__in = CharInFilter(field_name="compliance_id", lookup_expr="in")
|
||||
|
||||
@@ -1628,6 +1730,16 @@ class ResourceGroupOverviewFilter(FilterSet):
|
||||
choices=Provider.ProviderChoices.choices,
|
||||
lookup_expr="in",
|
||||
)
|
||||
provider_groups = UUIDFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="exact",
|
||||
distinct=True,
|
||||
)
|
||||
provider_groups__in = UUIDInFilter(
|
||||
field_name="scan__provider__provider_groups__id",
|
||||
lookup_expr="in",
|
||||
distinct=True,
|
||||
)
|
||||
resource_group = CharFilter(field_name="resource_group", lookup_expr="exact")
|
||||
resource_group__in = CharInFilter(field_name="resource_group", lookup_expr="in")
|
||||
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
Format (draft-inadarei-api-health-check-06).
|
||||
|
||||
Liveness reports only process status. Readiness verifies that PostgreSQL,
|
||||
Valkey and Neo4j are reachable and returns per-dependency detail when any
|
||||
of them is unreachable.
|
||||
Valkey and the attack-paths graph store (Neo4j or Neptune, per
|
||||
``ATTACK_PATHS_SINK_DATABASE``) are reachable and returns per-dependency
|
||||
detail when any of them is unreachable.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -11,8 +12,10 @@ from __future__ import annotations
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from concurrent.futures import TimeoutError as FuturesTimeoutError
|
||||
from contextlib import suppress
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
import redis
|
||||
@@ -37,9 +40,28 @@ STATUS_FAIL = "fail"
|
||||
STATUS_WARN = "warn"
|
||||
|
||||
# Short socket timeout so a stuck Valkey cannot stall the probe.
|
||||
# Neo4j inherits its driver-level ``connection_acquisition_timeout``.
|
||||
VALKEY_PROBE_TIMEOUT_SECONDS = 2
|
||||
|
||||
# Probe-scoped budget for the graph database.
|
||||
# ``Driver.verify_connectivity()`` takes no timeout; its only bound is the
|
||||
# driver-level ``connection_acquisition_timeout`` (60s on Neptune). The
|
||||
# probe needs its own budget, independent of the workload driver, so a
|
||||
# graph-database outage cannot pin a worker thread (and the readiness lock)
|
||||
# for a minute.
|
||||
GRAPH_DB_PROBE_TIMEOUT_SECONDS = 5
|
||||
|
||||
# Bounded pool that enforces ``GRAPH_DB_PROBE_TIMEOUT_SECONDS``. If the
|
||||
# graph database is unreachable the probe call blocks until the driver's
|
||||
# own acquisition timeout fires; we abandon the future after the budget and
|
||||
# report ``fail``. Orphaned tasks are capped by ``max_workers`` plus the 3s
|
||||
# readiness cache plus the per-IP throttle, so they cannot pile up: worst
|
||||
# case during a graph-database outage is every readiness call failing fast
|
||||
# in ``GRAPH_DB_PROBE_TIMEOUT_SECONDS`` with at most 2 background threads
|
||||
# stuck for <= the driver acquisition timeout.
|
||||
_graph_db_probe_executor = ThreadPoolExecutor(
|
||||
max_workers=2, thread_name_prefix="health-graph-db-probe"
|
||||
)
|
||||
|
||||
# Brief cache window so high-frequency probes (ALB target groups, scrapers)
|
||||
# do not stampede the actual dependency checks.
|
||||
CACHE_CONTROL_HEADER = "max-age=3, must-revalidate"
|
||||
@@ -62,11 +84,7 @@ class HealthJSONRenderer(JSONRenderer):
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return (
|
||||
datetime.now(timezone.utc)
|
||||
.isoformat(timespec="milliseconds")
|
||||
.replace("+00:00", "Z")
|
||||
)
|
||||
return datetime.now(UTC).isoformat(timespec="milliseconds").replace("+00:00", "Z")
|
||||
|
||||
|
||||
def _measure(name: str, check_fn) -> tuple[dict[str, Any], float]:
|
||||
@@ -113,11 +131,24 @@ def _probe_valkey() -> None:
|
||||
client.close()
|
||||
|
||||
|
||||
def _probe_neo4j() -> None:
|
||||
# Lazy import: avoids pulling attack_paths into the boot import graph.
|
||||
from api.attack_paths.database import get_driver
|
||||
def _graph_db_component_id() -> str:
|
||||
"""Return the active graph database name for the ``componentId`` field."""
|
||||
return settings.ATTACK_PATHS_SINK_DATABASE.strip().lower()
|
||||
|
||||
get_driver().verify_connectivity()
|
||||
|
||||
def _probe_graph_db() -> None:
|
||||
# Lazy import: avoids pulling attack_paths into the boot import graph
|
||||
from api.attack_paths.database import verify_connectivity
|
||||
|
||||
future = _graph_db_probe_executor.submit(verify_connectivity)
|
||||
try:
|
||||
future.result(timeout=GRAPH_DB_PROBE_TIMEOUT_SECONDS)
|
||||
except FuturesTimeoutError as exc:
|
||||
# Do not wait for the abandoned task; it ends when the driver's own acquisition timeout fires
|
||||
future.cancel()
|
||||
raise TimeoutError(
|
||||
f"graph-db probe exceeded {GRAPH_DB_PROBE_TIMEOUT_SECONDS}s"
|
||||
) from exc
|
||||
|
||||
|
||||
def _build_check_entry(
|
||||
@@ -180,14 +211,18 @@ def _readiness_payload() -> tuple[dict[str, Any], int]:
|
||||
):
|
||||
return snapshot[1], snapshot[2]
|
||||
|
||||
graph_db_component_id = _graph_db_component_id()
|
||||
|
||||
postgres_result, postgres_ms = _measure("postgres", _probe_postgres)
|
||||
valkey_result, valkey_ms = _measure("valkey", _probe_valkey)
|
||||
neo4j_result, neo4j_ms = _measure("neo4j", _probe_neo4j)
|
||||
graph_db_result, graph_db_ms = _measure(graph_db_component_id, _probe_graph_db)
|
||||
|
||||
entries = [
|
||||
_build_check_entry("postgres", "datastore", postgres_result, postgres_ms),
|
||||
_build_check_entry("valkey", "datastore", valkey_result, valkey_ms),
|
||||
_build_check_entry("neo4j", "datastore", neo4j_result, neo4j_ms),
|
||||
_build_check_entry(
|
||||
graph_db_component_id, "datastore", graph_db_result, graph_db_ms
|
||||
),
|
||||
]
|
||||
overall = _aggregate_status(entries)
|
||||
|
||||
@@ -195,7 +230,7 @@ def _readiness_payload() -> tuple[dict[str, Any], int]:
|
||||
payload["checks"] = {
|
||||
"postgres:responseTime": [entries[0]],
|
||||
"valkey:responseTime": [entries[1]],
|
||||
"neo4j:responseTime": [entries[2]],
|
||||
"graphdb:responseTime": [entries[2]],
|
||||
}
|
||||
|
||||
http_status = (
|
||||
@@ -237,10 +272,10 @@ class LivenessView(APIView):
|
||||
class ReadinessView(APIView):
|
||||
"""Readiness probe.
|
||||
|
||||
Returns 200 when PostgreSQL, Valkey and Neo4j all respond, or 503 with
|
||||
per-dependency detail when any of them is unreachable. Per-IP throttle
|
||||
plus the short in-process result cache cap the real dependency hits
|
||||
regardless of inbound traffic shape.
|
||||
Returns 200 when PostgreSQL, Valkey and the attack-paths graph store
|
||||
all respond, or 503 with per-dependency detail when any of them is
|
||||
unreachable. Per-IP throttle plus the short in-process result cache cap
|
||||
the real dependency hits regardless of inbound traffic shape.
|
||||
"""
|
||||
|
||||
authentication_classes: list = []
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
import random
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
from math import ceil
|
||||
from uuid import uuid4
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from tqdm import tqdm
|
||||
|
||||
from api.db_utils import rls_transaction
|
||||
from api.models import (
|
||||
Finding,
|
||||
@@ -16,7 +13,9 @@ from api.models import (
|
||||
Scan,
|
||||
StatusChoices,
|
||||
)
|
||||
from django.core.management.base import BaseCommand
|
||||
from prowler.lib.check.models import CheckMetadata
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@@ -116,7 +115,7 @@ class Command(BaseCommand):
|
||||
trigger="manual",
|
||||
state="executing",
|
||||
progress=0,
|
||||
started_at=datetime.now(timezone.utc),
|
||||
started_at=datetime.now(UTC),
|
||||
)
|
||||
scan_state = "completed"
|
||||
|
||||
@@ -272,10 +271,8 @@ class Command(BaseCommand):
|
||||
self.stdout.write(self.style.ERROR(f"Failed to populate test data: {e}"))
|
||||
scan_state = "failed"
|
||||
finally:
|
||||
scan.completed_at = datetime.now(timezone.utc)
|
||||
scan.duration = int(
|
||||
(datetime.now(timezone.utc) - scan.started_at).total_seconds()
|
||||
)
|
||||
scan.completed_at = datetime.now(UTC)
|
||||
scan.duration = int((datetime.now(UTC) - scan.started_at).total_seconds())
|
||||
scan.progress = 100
|
||||
scan.state = scan_state
|
||||
scan.unique_resource_count = num_resources
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from tasks.jobs.orphan_recovery import reconcile_orphans
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,31 @@ import logging
|
||||
import time
|
||||
|
||||
from config.custom_logging import BackendLogger
|
||||
from django.core.handlers.asgi import ASGIRequest
|
||||
from django.db import connections
|
||||
|
||||
|
||||
class CloseDBConnectionsMiddleware:
|
||||
"""
|
||||
Close request-scoped DB connections at the end of each ASGI request.
|
||||
|
||||
Under the ASGI worker, connections opened by sync views are not released
|
||||
by Django's normal request-boundary cleanup, so they accumulate idle until
|
||||
Postgres runs out of slots. Only ASGI requests are handled; the sync WSGI
|
||||
test client manages its own connections and must be left alone.
|
||||
"""
|
||||
|
||||
def __init__(self, get_response):
|
||||
self.get_response = get_response
|
||||
|
||||
def __call__(self, request):
|
||||
try:
|
||||
return self.get_response(request)
|
||||
finally:
|
||||
if isinstance(request, ASGIRequest):
|
||||
for conn in connections.all(initialized_only=True):
|
||||
if not conn.in_atomic_block:
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
|
||||
|
||||
def extract_auth_info(request) -> dict:
|
||||
|
||||
@@ -1,26 +1,13 @@
|
||||
import uuid
|
||||
from functools import partial
|
||||
|
||||
import api.rls
|
||||
import django.contrib.auth.models
|
||||
import django.contrib.postgres.indexes
|
||||
import django.contrib.postgres.search
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
from psqlextra.backend.migrations.operations.add_default_partition import (
|
||||
PostgresAddDefaultPartition,
|
||||
)
|
||||
from psqlextra.backend.migrations.operations.create_partitioned_model import (
|
||||
PostgresCreatePartitionedModel,
|
||||
)
|
||||
from psqlextra.manager.manager import PostgresManager
|
||||
from psqlextra.models.partitioned import PostgresPartitionedModel
|
||||
from psqlextra.types import PostgresPartitioningMethod
|
||||
from uuid6 import uuid7
|
||||
|
||||
import api.rls
|
||||
from api.db_utils import (
|
||||
DB_PROWLER_PASSWORD,
|
||||
DB_PROWLER_USER,
|
||||
@@ -53,6 +40,18 @@ from api.models import (
|
||||
StateChoices,
|
||||
StatusChoices,
|
||||
)
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
from psqlextra.backend.migrations.operations.add_default_partition import (
|
||||
PostgresAddDefaultPartition,
|
||||
)
|
||||
from psqlextra.backend.migrations.operations.create_partitioned_model import (
|
||||
PostgresCreatePartitionedModel,
|
||||
)
|
||||
from psqlextra.manager.manager import PostgresManager
|
||||
from psqlextra.models.partitioned import PostgresPartitionedModel
|
||||
from psqlextra.types import PostgresPartitioningMethod
|
||||
from uuid6 import uuid7
|
||||
|
||||
DB_NAME = settings.DATABASES["default"]["NAME"]
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from api.db_utils import DB_PROWLER_USER
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import DB_PROWLER_USER
|
||||
|
||||
DB_NAME = settings.DATABASES["default"]["NAME"]
|
||||
|
||||
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_router import MainRouter
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def create_admin_role(apps, schema_editor):
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
from django_celery_beat.models import PeriodicTask
|
||||
|
||||
from api.db_utils import rls_transaction
|
||||
from api.models import Scan, StateChoices
|
||||
from django.db import migrations, models
|
||||
from django_celery_beat.models import PeriodicTask
|
||||
|
||||
|
||||
def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
|
||||
@@ -17,11 +16,11 @@ def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
|
||||
tenant_id = task_kwargs["tenant_id"]
|
||||
provider_id = task_kwargs["provider_id"]
|
||||
|
||||
current_time = datetime.now(timezone.utc)
|
||||
current_time = datetime.now(UTC)
|
||||
scheduled_time_today = datetime.combine(
|
||||
current_time.date(),
|
||||
daily_scheduled_scan_task.start_time.time(),
|
||||
tzinfo=timezone.utc,
|
||||
tzinfo=UTC,
|
||||
)
|
||||
|
||||
if current_time < scheduled_time_today:
|
||||
|
||||
@@ -2,10 +2,9 @@
|
||||
|
||||
from functools import partial
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import IntegrationTypeEnum, PostgresEnumMigration, register_enum
|
||||
from api.models import Integration
|
||||
from django.db import migrations
|
||||
|
||||
IntegrationTypeEnumMigration = PostgresEnumMigration(
|
||||
enum_name="integration_type",
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from api.rls import RowLevelSecurityConstraint
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Generated by Django 5.1.5 on 2025-03-25 11:29
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Generated by Django 5.1.7 on 2025-04-16 08:47
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
import api.db_utils
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
import uuid6
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from functools import partial
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from functools import partial
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from api.rls import RowLevelSecurityConstraint
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from functools import partial
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -2,13 +2,12 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -2,10 +2,9 @@
|
||||
|
||||
from functools import partial
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import PostgresEnumMigration, ProcessorTypeEnum, register_enum
|
||||
from api.models import Processor
|
||||
from django.db import migrations
|
||||
|
||||
ProcessorTypeEnumMigration = PostgresEnumMigration(
|
||||
enum_name="processor_type",
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from api.rls import RowLevelSecurityConstraint
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from functools import partial
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from functools import partial
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Generated by Django 5.1.7 on 2025-07-09 14:44
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
import api.db_utils
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -2,15 +2,14 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
import drf_simple_apikey.models
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -4,15 +4,14 @@ import json
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from api.db_router import MainRouter
|
||||
from config.custom_logging import BackendLogger
|
||||
from cryptography.fernet import Fernet
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
from api.db_router import MainRouter
|
||||
|
||||
logger = logging.getLogger(BackendLogger.API)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Generated by Django 5.1.7 on 2025-10-14 00:00
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
import api.db_utils
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -2,14 +2,13 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.contrib.postgres.fields
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Generated by Django 5.1.10 on 2025-09-09 09:25
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
import api.db_utils
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Generated by Django 5.1.13 on 2025-11-05 08:37
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
import api.db_utils
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
|
||||
import uuid
|
||||
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.rls
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
# Generated by Django 5.1.14 on 2025-12-10
|
||||
|
||||
from django.db import migrations
|
||||
from tasks.tasks import backfill_daily_severity_summaries_task
|
||||
|
||||
from api.db_router import MainRouter
|
||||
from api.rls import Tenant
|
||||
from django.db import migrations
|
||||
from tasks.tasks import backfill_daily_severity_summaries_task
|
||||
|
||||
|
||||
def trigger_backfill_task(apps, schema_editor):
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import uuid
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Generated by Django migration for Alibaba Cloud provider support
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
import api.db_utils
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import uuid
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
import api.db_utils
|
||||
import api.rls
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user