Compare commits

..

1 Commits

Author SHA1 Message Date
Prowler Bot baaf56ea5e chore(api): Update prowler dependency to v5.27 for release 5.27.0 (#11219)
Co-authored-by: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
2026-05-19 12:17:44 +02:00
800 changed files with 43059 additions and 43356 deletions
-17
View File
@@ -1,17 +0,0 @@
{
"name": "prowler-plugins",
"description": "Prowler Cloud Security for Claude Code",
"owner": {
"name": "Prowler",
"email": "support@prowler.com"
},
"plugins": [
{
"name": "prowler",
"source": "./claude_plugins/prowler",
"description": "Prowler for Claude Code — cloud security and compliance skills powered by the Prowler MCP server. Bundles compliance triage and remediation; more skills coming.",
"category": "security",
"homepage": "https://prowler.com"
}
]
}
+1 -8
View File
@@ -11,14 +11,7 @@ envs = "wt step copy-ignored"
[[pre-start]]
deps = "uv sync"
# Block 3: prepare pnpm via corepack.
[[pre-start]]
corepack-enable = "corepack enable"
[[pre-start]]
corepack-install = "cd ui && corepack install"
# Block 4: reminder - last visible output before `wt switch` returns.
# Block 3: reminder - last visible output before `wt switch` returns.
# Hooks can't mutate the parent shell, so venv activation is manual.
[[pre-start]]
reminder = "echo '>> Reminder: activate the venv in this shell with: source .venv/bin/activate'"
+1 -1
View File
@@ -145,7 +145,7 @@ SENTRY_RELEASE=local
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT}
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.30.0
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.27.0
# Social login credentials
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
+22 -22
View File
@@ -6,17 +6,17 @@
version: 2
updates:
# v5
# - package-ecosystem: "pip"
# directory: "/"
# schedule:
# interval: "monthly"
# open-pull-requests-limit: 25
# target-branch: master
# labels:
# - "dependencies"
# - "pip"
# cooldown:
# default-days: 7
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 25
target-branch: master
labels:
- "dependencies"
- "pip"
cooldown:
default-days: 7
# Dependabot Updates are temporary disabled - 2025/03/19
# - package-ecosystem: "pip"
@@ -66,17 +66,17 @@ updates:
cooldown:
default-days: 7
# - package-ecosystem: "pre-commit"
# directory: "/"
# schedule:
# interval: "monthly"
# open-pull-requests-limit: 25
# target-branch: master
# labels:
# - "dependencies"
# - "pre-commit"
# cooldown:
# default-days: 7
- package-ecosystem: "pre-commit"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 25
target-branch: master
labels:
- "dependencies"
- "pre-commit"
cooldown:
default-days: 7
# Dependabot Updates are temporary disabled - 2025/04/15
# v4.6
-140
View File
@@ -1,140 +0,0 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:best-practices",
":enablePreCommit",
":semanticCommits",
":enableVulnerabilityAlertsWithLabel(security)",
"docker:enableMajor",
"helpers:pinGitHubActionDigestsToSemver",
"helpers:disableTypesNodeMajor",
"security:openssf-scorecard",
"customManagers:githubActionsVersions",
"customManagers:dockerfileVersions"
],
"timezone": "Europe/Madrid",
"baseBranchPatterns": [
"master"
],
"labels": [
"dependencies"
],
"dependencyDashboardTitle": "Dependency Dashboard",
"prConcurrentLimit": 20,
"prHourlyLimit": 10,
"vulnerabilityAlerts": {
"prHourlyLimit": 0,
"prConcurrentLimit": 0
},
"configMigration": true,
"minimumReleaseAge": "7 days",
"rangeStrategy": "pin",
"packageRules": [
{
"description": "Patches: 1st of every month, Madrid overnight window (22:00-06:00)",
"matchUpdateTypes": [
"patch"
],
"schedule": [
"* 22-23,0-5 1 * *"
],
"enabled": false
},
{
"description": "Minors: 8th of every 3 months, Madrid overnight window (22:00-06:00)",
"matchUpdateTypes": [
"minor"
],
"schedule": [
"* 22-23,0-5 8 */3 *"
],
"enabled": false
},
{
"description": "Majors: 15th of every 3 months, Madrid overnight window",
"matchUpdateTypes": [
"major"
],
"schedule": [
"* 22-23,0-5 15 */3 *"
],
"enabled": false
},
{
"description": "GitHub Actions - single grouped PR, no changelog, scope=ci",
"matchManagers": [
"github-actions"
],
"groupName": "github-actions",
"semanticCommitScope": "ci",
"addLabels": [
"no-changelog"
]
},
{
"description": "Docker images - single grouped PR, no changelog, scope=docker",
"matchManagers": [
"dockerfile",
"docker-compose"
],
"groupName": "docker",
"semanticCommitScope": "docker",
"addLabels": [
"no-changelog"
]
},
{
"description": "Pre-commit hooks - single grouped PR, scope=pre-commit",
"matchManagers": [
"pre-commit"
],
"groupName": "pre-commit hooks",
"semanticCommitScope": "pre-commit",
"addLabels": [
"no-changelog"
]
},
{
"description": "UI - scope=ui",
"matchFileNames": [
"ui/**"
],
"semanticCommitScope": "ui"
},
{
"description": "API - scope=api",
"matchFileNames": [
"api/**"
],
"semanticCommitScope": "api"
},
{
"description": "MCP server - scope=mcp",
"matchFileNames": [
"mcp_server/**"
],
"semanticCommitScope": "mcp"
},
{
"description": "Python SDK (root) - scope=sdk",
"matchFileNames": [
"pyproject.toml",
"poetry.lock",
"util/prowler-bulk-provisioning/**"
],
"semanticCommitScope": "sdk"
},
{
"description": "UI devDependencies - no changelog",
"matchFileNames": [
"ui/**"
],
"matchDepTypes": [
"devDependencies"
],
"addLabels": [
"no-changelog"
]
}
]
}
+2 -2
View File
@@ -35,7 +35,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -53,7 +53,7 @@ jobs:
- name: Check for API changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
api/**
+3 -3
View File
@@ -44,7 +44,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -61,12 +61,12 @@ jobs:
persist-credentials: false
- name: Initialize CodeQL
uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
uses: github/codeql-action/init@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/api-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
uses: github/codeql-action/analyze@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
category: '/language:${{ matrix.language }}'
+13 -19
View File
@@ -46,7 +46,7 @@ jobs:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
@@ -65,7 +65,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -108,7 +108,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -122,7 +122,6 @@ jobs:
github.com:443
powershellinfraartifacts-gkhedzdeaghdezhr.z01.azurefd.net:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
pypi.org:443
registry-1.docker.io:443
release-assets.githubusercontent.com:443
@@ -133,18 +132,14 @@ jobs:
with:
persist-credentials: false
- name: Refresh prowler SDK pin to current branch tip
- name: Pin prowler SDK to latest master commit
if: github.event_name == 'push'
run: |
# api/pyproject.toml has `@master` on master and `@v5.X` on release
# branches (set by prepare-release.yml). uv lock --upgrade-package
# re-resolves whichever ref is present against the current branch tip
# and writes the SHA into api/uv.lock. The Dockerfile runs
# `uv sync --locked`, which is what actually drives the install.
pip install --no-cache-dir "uv==0.11.14"
(cd api && uv lock --upgrade-package prowler)
LATEST_SHA=$(git ls-remote https://github.com/prowler-cloud/prowler.git refs/heads/master | cut -f1)
sed -i "s|prowler-cloud/prowler.git@master|prowler-cloud/prowler.git@${LATEST_SHA}|" api/pyproject.toml
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -155,7 +150,7 @@ jobs:
- name: Build and push API container for ${{ matrix.arch }}
id: container-push
if: github.event_name == 'push' || github.event_name == 'release' || github.event_name == 'workflow_dispatch'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: ${{ env.WORKING_DIRECTORY }}
push: true
@@ -175,7 +170,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -184,9 +179,8 @@ jobs:
registry-1.docker.io:443
auth.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -236,7 +230,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -283,7 +277,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
+5 -6
View File
@@ -36,7 +36,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -50,7 +50,7 @@ jobs:
- name: Check if Dockerfile changed
id: dockerfile-changed
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: api/Dockerfile
@@ -72,7 +72,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -83,7 +83,6 @@ jobs:
registry-1.docker.io:443
auth.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
debian.map.fastlydns.net:80
release-assets.githubusercontent.com:443
objects.githubusercontent.com:443
@@ -104,7 +103,7 @@ jobs:
- name: Check for API changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: api/**
files_ignore: |
@@ -119,7 +118,7 @@ jobs:
- name: Build container
if: steps.check-changes.outputs.any_changed == 'true'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: ${{ env.API_WORKING_DIR }}
push: false
+2 -2
View File
@@ -50,7 +50,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -73,7 +73,7 @@ jobs:
- name: Check for API changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
api/**
+2 -2
View File
@@ -78,7 +78,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -102,7 +102,7 @@ jobs:
- name: Check for API changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
api/**
+1 -1
View File
@@ -30,7 +30,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
+7 -40
View File
@@ -29,7 +29,7 @@ jobs:
patch_version: ${{ steps.detect.outputs.patch_version }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -75,7 +75,7 @@ jobs:
pull-requests: write
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -139,17 +139,6 @@ jobs:
sed -i "s|version = \"${CURRENT_API_VERSION}\"|version = \"${NEXT_API_VERSION}\"|" api/pyproject.toml
sed -i "s| version: ${CURRENT_API_VERSION}| version: ${NEXT_API_VERSION}|" api/src/backend/api/specs/v1.yaml
- name: Regenerate lockfiles after version bump
run: |
set -e
# The bumps above edit pyproject.toml / api/pyproject.toml but leave
# uv.lock / api/uv.lock stale, which makes `uv sync --locked` fail in
# the container builds. Refresh both with the uv version the images
# pin (plain `uv lock`, no --upgrade: only the version line changes).
pip install --no-cache-dir "uv==0.11.14"
uv lock
(cd api && uv lock)
- name: Bump UI version (.env)
run: |
set -e
@@ -166,7 +155,7 @@ jobs:
run: git --no-pager diff
- name: Create PR for next versions to master
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
with:
author: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
@@ -202,7 +191,7 @@ jobs:
pull-requests: write
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -251,17 +240,6 @@ jobs:
sed -i "s|version = \"${CURRENT_API_VERSION}\"|version = \"${FIRST_API_PATCH_VERSION}\"|" api/pyproject.toml
sed -i "s| version: ${CURRENT_API_VERSION}| version: ${FIRST_API_PATCH_VERSION}|" api/src/backend/api/specs/v1.yaml
- name: Regenerate lockfiles after version bump
run: |
set -e
# The bumps above edit pyproject.toml / api/pyproject.toml but leave
# uv.lock / api/uv.lock stale, which makes `uv sync --locked` fail in
# the container builds. Refresh both with the uv version the images
# pin (plain `uv lock`, no --upgrade: only the version line changes).
pip install --no-cache-dir "uv==0.11.14"
uv lock
(cd api && uv lock)
- name: Bump UI version (.env)
run: |
set -e
@@ -271,7 +249,7 @@ jobs:
run: git --no-pager diff
- name: Create PR for first patch versions to version branch
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
with:
author: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
@@ -307,7 +285,7 @@ jobs:
pull-requests: write
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -363,17 +341,6 @@ jobs:
sed -i "s|version = \"${CURRENT_API_VERSION}\"|version = \"${NEXT_API_PATCH_VERSION}\"|" api/pyproject.toml
sed -i "s| version: ${CURRENT_API_VERSION}| version: ${NEXT_API_PATCH_VERSION}|" api/src/backend/api/specs/v1.yaml
- name: Regenerate lockfiles after version bump
run: |
set -e
# The bumps above edit pyproject.toml / api/pyproject.toml but leave
# uv.lock / api/uv.lock stale, which makes `uv sync --locked` fail in
# the container builds. Refresh both with the uv version the images
# pin (plain `uv lock`, no --upgrade: only the version line changes).
pip install --no-cache-dir "uv==0.11.14"
uv lock
(cd api && uv lock)
- name: Bump UI version (.env)
run: |
set -e
@@ -383,7 +350,7 @@ jobs:
run: git --no-pager diff
- name: Create PR for next patch versions to version branch
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
with:
author: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
+2 -2
View File
@@ -36,7 +36,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -51,6 +51,6 @@ jobs:
persist-credentials: false
- name: Run zizmor
uses: zizmorcore/zizmor-action@a16621b09c6db4281f81a93cb393b05dcd7b7165 # v0.5.5
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
with:
token: ${{ github.token }}
+1 -1
View File
@@ -22,7 +22,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+1 -1
View File
@@ -26,7 +26,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+1 -1
View File
@@ -25,7 +25,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+2 -2
View File
@@ -25,7 +25,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
# We can't block as Trufflehog needs to verify secrets against vendors
egress-policy: audit
@@ -44,6 +44,6 @@ jobs:
- name: Scan diff for secrets with TruffleHog
# Action auto-injects --since-commit/--branch from event payload; passing them in extra_args produces duplicate flags.
uses: trufflesecurity/trufflehog@37b77001d0174ebec2fcca2bd83ff83a6d45a3ab # v3.95.3
uses: trufflesecurity/trufflehog@ef6e76c3c4023279497fab4721ffa071a722fd05 # v3.92.4
with:
extra_args: --results=verified,unknown
+1 -1
View File
@@ -33,7 +33,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+1 -1
View File
@@ -26,7 +26,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+1 -1
View File
@@ -22,7 +22,7 @@ jobs:
issues: write
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
+12 -12
View File
@@ -66,12 +66,12 @@ jobs:
title: ${{ steps.compute-text.outputs.title }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
- name: Setup Scripts
uses: github/gh-aw/actions/setup@4d44d0e89851a877f4ddc0cb6c0197e42b1016c5 # v0.73.0
uses: github/gh-aw/actions/setup@9382be3ca9ac18917e111a99d4e6bbff58d0dccc # v0.43.23
with:
destination: /opt/gh-aw/actions
- name: Check workflow file timestamps
@@ -135,12 +135,12 @@ jobs:
secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
- name: Setup Scripts
uses: github/gh-aw/actions/setup@4d44d0e89851a877f4ddc0cb6c0197e42b1016c5 # v0.73.0
uses: github/gh-aw/actions/setup@9382be3ca9ac18917e111a99d4e6bbff58d0dccc # v0.43.23
with:
destination: /opt/gh-aw/actions
- name: Checkout repository
@@ -870,12 +870,12 @@ jobs:
total_count: ${{ steps.missing_tool.outputs.total_count }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
- name: Setup Scripts
uses: github/gh-aw/actions/setup@4d44d0e89851a877f4ddc0cb6c0197e42b1016c5 # v0.73.0
uses: github/gh-aw/actions/setup@9382be3ca9ac18917e111a99d4e6bbff58d0dccc # v0.43.23
with:
destination: /opt/gh-aw/actions
- name: Download agent output artifact
@@ -982,12 +982,12 @@ jobs:
success: ${{ steps.parse_results.outputs.success }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
- name: Setup Scripts
uses: github/gh-aw/actions/setup@4d44d0e89851a877f4ddc0cb6c0197e42b1016c5 # v0.73.0
uses: github/gh-aw/actions/setup@9382be3ca9ac18917e111a99d4e6bbff58d0dccc # v0.43.23
with:
destination: /opt/gh-aw/actions
- name: Download agent artifacts
@@ -1091,12 +1091,12 @@ jobs:
activated: ${{ (steps.check_membership.outputs.is_team_member == 'true') && (steps.check_rate_limit.outputs.rate_limit_ok == 'true') }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
- name: Setup Scripts
uses: github/gh-aw/actions/setup@4d44d0e89851a877f4ddc0cb6c0197e42b1016c5 # v0.73.0
uses: github/gh-aw/actions/setup@9382be3ca9ac18917e111a99d4e6bbff58d0dccc # v0.43.23
with:
destination: /opt/gh-aw/actions
- name: Add eyes reaction for immediate feedback
@@ -1164,12 +1164,12 @@ jobs:
process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
- name: Setup Scripts
uses: github/gh-aw/actions/setup@4d44d0e89851a877f4ddc0cb6c0197e42b1016c5 # v0.73.0
uses: github/gh-aw/actions/setup@9382be3ca9ac18917e111a99d4e6bbff58d0dccc # v0.43.23
with:
destination: /opt/gh-aw/actions
- name: Download agent output artifact
+3 -3
View File
@@ -27,12 +27,12 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
- name: Apply labels to PR
uses: actions/labeler@f27b608878404679385c85cfa523b85ccb86e213 # v6.1.0
uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1
with:
sync-labels: true
@@ -46,7 +46,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
-60
View File
@@ -1,60 +0,0 @@
name: 'Docs: Markdown Lint'
on:
push:
branches:
- 'master'
- 'v5.*'
pull_request:
branches:
- 'master'
- 'v5.*'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
markdown-lint:
if: github.repository == 'prowler-cloud/prowler'
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
api.github.com:443
github.com:443
registry.npmjs.org:443
release-assets.githubusercontent.com:443
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version-file: ui/.nvmrc
- name: Setup pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
package_json_file: ui/package.json
run_install: false
- name: Run markdownlint
# Pin must match .pre-commit-config.yaml so prek and CI behave identically.
# pnpm dlx doesn't accept --ignore-scripts as a flag; the env var
# disables postinstall scripts on transitives the same way.
env:
pnpm_config_ignore_scripts: 'true'
run: pnpm dlx markdownlint-cli@0.45.0 '**/*.md'
+9 -11
View File
@@ -45,7 +45,7 @@ jobs:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
@@ -64,7 +64,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -106,7 +106,7 @@ jobs:
packages: write
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -114,7 +114,6 @@ jobs:
registry-1.docker.io:443
auth.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
ghcr.io:443
pkg-containers.githubusercontent.com:443
files.pythonhosted.org:443
@@ -126,7 +125,7 @@ jobs:
persist-credentials: false
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -137,7 +136,7 @@ jobs:
- name: Build and push MCP container for ${{ matrix.arch }}
id: container-push
if: github.event_name == 'push' || github.event_name == 'release' || github.event_name == 'workflow_dispatch'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: ${{ env.WORKING_DIRECTORY }}
push: true
@@ -165,19 +164,18 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
registry-1.docker.io:443
auth.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
github.com:443
release-assets.githubusercontent.com:443
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -227,7 +225,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -274,7 +272,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
+5 -5
View File
@@ -36,7 +36,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -50,7 +50,7 @@ jobs:
- name: Check if Dockerfile changed
id: dockerfile-changed
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: mcp_server/Dockerfile
@@ -71,7 +71,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -99,7 +99,7 @@ jobs:
- name: Check for MCP changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: mcp_server/**
files_ignore: |
@@ -112,7 +112,7 @@ jobs:
- name: Build MCP container
if: steps.check-changes.outputs.any_changed == 'true'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: ${{ env.MCP_WORKING_DIR }}
push: false
+3 -3
View File
@@ -29,7 +29,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -67,7 +67,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -113,7 +113,7 @@ jobs:
- name: Publish prowler-mcp package to PyPI
if: steps.pypi-check.outputs.skip != 'true'
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
with:
packages-dir: ${{ env.WORKING_DIRECTORY }}/dist/
print-hash: true
-75
View File
@@ -1,75 +0,0 @@
name: 'MCP: Security'
on:
push:
branches:
- 'master'
- 'v5.*'
paths:
- 'mcp_server/pyproject.toml'
- 'mcp_server/uv.lock'
- '.github/workflows/mcp-security.yml'
- '.github/actions/osv-scanner/**'
- '.github/scripts/osv-scan.sh'
pull_request:
branches:
- 'master'
- 'v5.*'
paths:
- 'mcp_server/pyproject.toml'
- 'mcp_server/uv.lock'
- '.github/workflows/mcp-security.yml'
- '.github/actions/osv-scanner/**'
- '.github/scripts/osv-scan.sh'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
mcp-security-scans:
if: github.repository == 'prowler-cloud/prowler'
runs-on: ubuntu-latest
timeout-minutes: 15
permissions:
contents: read
pull-requests: write # osv-scanner action posts/updates a PR comment with findings
steps:
- name: Harden Runner
uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1
with:
egress-policy: block
allowed-endpoints: >
github.com:443
api.github.com:443
objects.githubusercontent.com:443
release-assets.githubusercontent.com:443
api.osv.dev:443
api.deps.dev:443
osv-vulnerabilities.storage.googleapis.com:443
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# zizmor: ignore[artipacked]
persist-credentials: true # Required by tj-actions/changed-files to fetch PR branch
- name: Check for MCP dependency changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files: |
mcp_server/pyproject.toml
mcp_server/uv.lock
.github/workflows/mcp-security.yml
.github/actions/osv-scanner/**
.github/scripts/osv-scan.sh
- name: Dependency vulnerability scan with osv-scanner
if: steps.check-changes.outputs.any_changed == 'true'
uses: ./.github/actions/osv-scanner
with:
lockfile: mcp_server/uv.lock
@@ -48,7 +48,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -61,7 +61,7 @@ jobs:
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
- name: Build ${{ matrix.component }} container (linux/arm64)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
@@ -83,7 +83,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+2 -2
View File
@@ -31,7 +31,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -52,7 +52,7 @@ jobs:
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
api/**
@@ -35,7 +35,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -56,7 +56,7 @@ jobs:
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
prowler/providers/**/services/**/*.metadata.json
+2 -2
View File
@@ -28,7 +28,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -47,7 +47,7 @@ jobs:
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: '**'
+1 -1
View File
@@ -26,7 +26,7 @@ jobs:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
+2 -2
View File
@@ -29,7 +29,7 @@ jobs:
pull-requests: write
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -338,7 +338,7 @@ jobs:
- name: Create PR for API dependency update
if: ${{ env.PATCH_VERSION == '0' }}
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
commit-message: 'chore(api): update prowler dependency to ${{ env.BRANCH_NAME }} for release ${{ env.PROWLER_VERSION }}'
@@ -1,57 +0,0 @@
name: 'CI: Renovate Config Validate'
on:
pull_request:
branches:
- 'master'
paths:
- '.github/renovate.json'
- '.pre-commit-config.yaml'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions: {}
env:
# renovate: datasource=pypi depName=prek
PREK_VERSION: '0.4.0'
jobs:
validate:
name: Validate Renovate config
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
with:
egress-policy: block
allowed-endpoints: >
api.github.com:443
github.com:443
objects.githubusercontent.com:443
codeload.github.com:443
release-assets.githubusercontent.com:443
pypi.org:443
files.pythonhosted.org:443
registry.npmjs.org:443
nodejs.org:443
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Set up uv
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
- name: Install prek
run: uv tool install "prek==${PREK_VERSION}"
- name: Validate Renovate config
run: prek run renovate-config-validator --files .github/renovate.json
@@ -25,7 +25,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
+2 -2
View File
@@ -32,7 +32,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -48,7 +48,7 @@ jobs:
- name: Check for SDK changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: ./**
files_ignore: |
+3 -3
View File
@@ -51,7 +51,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -66,12 +66,12 @@ jobs:
persist-credentials: false
- name: Initialize CodeQL
uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
uses: github/codeql-action/init@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/sdk-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
uses: github/codeql-action/analyze@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
category: '/language:${{ matrix.language }}'
+12 -14
View File
@@ -60,7 +60,7 @@ jobs:
contents: read
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -98,7 +98,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -141,7 +141,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -149,7 +149,6 @@ jobs:
public.ecr.aws:443
registry-1.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
auth.docker.io:443
debian.map.fastlydns.net:80
github.com:443
@@ -168,13 +167,13 @@ jobs:
persist-credentials: false
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to Public ECR
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: public.ecr.aws
username: ${{ secrets.PUBLIC_ECR_AWS_ACCESS_KEY_ID }}
@@ -188,7 +187,7 @@ jobs:
- name: Build and push SDK container for ${{ matrix.arch }}
id: container-push
if: github.event_name == 'push' || github.event_name == 'release' || github.event_name == 'workflow_dispatch'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: .
file: ${{ env.DOCKERFILE_PATH }}
@@ -209,7 +208,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -217,20 +216,19 @@ jobs:
auth.docker.io:443
public.ecr.aws:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
github.com:443
release-assets.githubusercontent.com:443
api.ecr-public.us-east-1.amazonaws.com:443
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to Public ECR
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: public.ecr.aws
username: ${{ secrets.PUBLIC_ECR_AWS_ACCESS_KEY_ID }}
@@ -267,7 +265,7 @@ jobs:
# Push to toniblyx/prowler only for current version (latest/stable/release tags)
- name: Login to DockerHub (toniblyx)
if: needs.setup.outputs.latest_tag == 'latest'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.TONIBLYX_DOCKERHUB_USERNAME }}
password: ${{ secrets.TONIBLYX_DOCKERHUB_PASSWORD }}
@@ -292,7 +290,7 @@ jobs:
# Re-login as prowlercloud for cleanup of intermediate tags
- name: Login to DockerHub (prowlercloud)
if: always()
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -320,7 +318,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+5 -6
View File
@@ -41,7 +41,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -55,7 +55,7 @@ jobs:
- name: Check if Dockerfile changed
id: dockerfile-changed
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: Dockerfile
@@ -77,7 +77,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -85,7 +85,6 @@ jobs:
registry-1.docker.io:443
auth.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
api.github.com:443
mirror.gcr.io:443
check.trivy.dev:443
@@ -109,7 +108,7 @@ jobs:
- name: Check for SDK changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: ./**
files_ignore: |
@@ -138,7 +137,7 @@ jobs:
- name: Build SDK container
if: steps.check-changes.outputs.any_changed == 'true'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: .
push: false
+5 -5
View File
@@ -28,7 +28,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -66,7 +66,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -85,7 +85,7 @@ jobs:
run: uv build
- name: Publish Prowler package to PyPI
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
with:
print-hash: true
@@ -102,7 +102,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -129,6 +129,6 @@ jobs:
run: uv build
- name: Publish prowler-cloud package to PyPI
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
with:
print-hash: true
@@ -27,7 +27,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -47,7 +47,7 @@ jobs:
run: pip install boto3
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@d979d5b3a71173a29b74b5b88418bfda9437d885 # v6.1.1
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0
with:
aws-region: ${{ env.AWS_REGION }}
role-to-assume: ${{ secrets.DEV_IAM_ROLE_ARN }}
@@ -58,7 +58,7 @@ jobs:
- name: Create pull request
id: create-pr
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
author: 'prowler-bot <179230569+prowler-bot@users.noreply.github.com>'
@@ -25,7 +25,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -55,7 +55,7 @@ jobs:
- name: Create pull request
id: create-pr
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
author: 'prowler-bot <179230569+prowler-bot@users.noreply.github.com>'
+2 -2
View File
@@ -47,7 +47,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -69,7 +69,7 @@ jobs:
- name: Check for SDK changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files:
./**
+18 -67
View File
@@ -32,7 +32,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -46,7 +46,6 @@ jobs:
schema.ocsf.io:443
registry-1.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
powershellinfraartifacts-gkhedzdeaghdezhr.z01.azurefd.net:443
o26192.ingest.us.sentry.io:443
management.azure.com:443
@@ -70,7 +69,7 @@ jobs:
- name: Check for SDK changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: ./**
files_ignore: |
@@ -103,7 +102,7 @@ jobs:
- name: Check if AWS files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-aws
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/aws/**
@@ -233,7 +232,7 @@ jobs:
- name: Check if Azure files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-azure
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/azure/**
@@ -257,7 +256,7 @@ jobs:
- name: Check if GCP files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-gcp
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/gcp/**
@@ -281,7 +280,7 @@ jobs:
- name: Check if Kubernetes files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-kubernetes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/kubernetes/**
@@ -305,7 +304,7 @@ jobs:
- name: Check if GitHub files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-github
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/github/**
@@ -329,7 +328,7 @@ jobs:
- name: Check if Okta files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-okta
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/okta/**
@@ -353,7 +352,7 @@ jobs:
- name: Check if NHN files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-nhn
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/nhn/**
@@ -377,7 +376,7 @@ jobs:
- name: Check if M365 files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-m365
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/m365/**
@@ -401,7 +400,7 @@ jobs:
- name: Check if IaC files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-iac
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/iac/**
@@ -425,7 +424,7 @@ jobs:
- name: Check if MongoDB Atlas files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-mongodbatlas
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/mongodbatlas/**
@@ -449,7 +448,7 @@ jobs:
- name: Check if OCI files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-oraclecloud
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/oraclecloud/**
@@ -473,7 +472,7 @@ jobs:
- name: Check if OpenStack files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-openstack
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/openstack/**
@@ -497,7 +496,7 @@ jobs:
- name: Check if Google Workspace files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-googleworkspace
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/googleworkspace/**
@@ -521,7 +520,7 @@ jobs:
- name: Check if Vercel files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-vercel
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/**/vercel/**
@@ -541,59 +540,11 @@ jobs:
flags: prowler-py${{ matrix.python-version }}-vercel
files: ./vercel_coverage.xml
# Scaleway Provider
- name: Check if Scaleway files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-scaleway
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files: |
./prowler/**/scaleway/**
./tests/**/scaleway/**
./uv.lock
- name: Run Scaleway tests
if: steps.changed-scaleway.outputs.any_changed == 'true'
run: uv run pytest -n auto --cov=./prowler/providers/scaleway --cov-report=xml:scaleway_coverage.xml tests/providers/scaleway
- name: Upload Scaleway coverage to Codecov
if: steps.changed-scaleway.outputs.any_changed == 'true'
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: prowler-py${{ matrix.python-version }}-scaleway
files: ./scaleway_coverage.xml
# StackIT Provider
- name: Check if StackIT files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-stackit
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files: |
./prowler/**/stackit/**
./tests/**/stackit/**
./uv.lock
- name: Run StackIT tests
if: steps.changed-stackit.outputs.any_changed == 'true'
run: uv run pytest -n auto --cov=./prowler/providers/stackit --cov-report=xml:stackit_coverage.xml tests/providers/stackit
- name: Upload StackIT coverage to Codecov
if: steps.changed-stackit.outputs.any_changed == 'true'
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: prowler-py${{ matrix.python-version }}-stackit
files: ./stackit_coverage.xml
# Lib
- name: Check if Lib files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-lib
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/lib/**
@@ -617,7 +568,7 @@ jobs:
- name: Check if Config files changed
if: steps.check-changes.outputs.any_changed == 'true'
id: changed-config
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
./prowler/config/**
+2 -2
View File
@@ -52,7 +52,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -68,7 +68,7 @@ jobs:
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
- name: Setup Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+3 -3
View File
@@ -47,7 +47,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -62,12 +62,12 @@ jobs:
persist-credentials: false
- name: Initialize CodeQL
uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
uses: github/codeql-action/init@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/ui-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
uses: github/codeql-action/analyze@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
category: '/language:${{ matrix.language }}'
+9 -11
View File
@@ -48,7 +48,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -67,7 +67,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -110,13 +110,12 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
registry-1.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
auth.docker.io:443
registry.npmjs.org:443
dl-cdn.alpinelinux.org:443
@@ -130,7 +129,7 @@ jobs:
persist-credentials: false
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -141,7 +140,7 @@ jobs:
- name: Build and push UI container for ${{ matrix.arch }}
id: container-push
if: github.event_name == 'push' || github.event_name == 'release' || github.event_name == 'workflow_dispatch'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
@@ -164,7 +163,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -173,10 +172,9 @@ jobs:
registry-1.docker.io:443
auth.docker.io:443
production.cloudflare.docker.com:443
production.cloudfront.docker.com:443
- name: Login to DockerHub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -226,7 +224,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -273,7 +271,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
+5 -5
View File
@@ -36,7 +36,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -50,7 +50,7 @@ jobs:
- name: Check if Dockerfile changed
id: dockerfile-changed
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: ui/Dockerfile
@@ -72,7 +72,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -100,7 +100,7 @@ jobs:
- name: Check for UI changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: ui/**
files_ignore: |
@@ -114,7 +114,7 @@ jobs:
- name: Build UI container
if: steps.check-changes.outputs.any_changed == 'true'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
with:
context: ${{ env.UI_WORKING_DIR }}
target: prod
+5 -5
View File
@@ -85,7 +85,7 @@ jobs:
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
@@ -172,7 +172,7 @@ jobs:
- name: Setup Node.js
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
node-version-file: 'ui/.nvmrc'
node-version: '24.13.0'
- name: Setup pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
@@ -184,7 +184,7 @@ jobs:
run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
- name: Setup pnpm and Next.js cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: |
${{ env.STORE_PATH }}
@@ -204,7 +204,7 @@ jobs:
run: pnpm run build
- name: Cache Playwright browsers
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
id: playwright-cache
with:
path: ~/.cache/ms-playwright
@@ -295,7 +295,7 @@ jobs:
contents: read
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: audit
+2 -2
View File
@@ -39,7 +39,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -59,7 +59,7 @@ jobs:
- name: Check for UI dependency changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
ui/package.json
+9 -8
View File
@@ -16,6 +16,7 @@ concurrency:
env:
UI_WORKING_DIR: ./ui
NODE_VERSION: "24.13.0"
permissions: {}
@@ -31,7 +32,7 @@ jobs:
steps:
- name: Harden Runner
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0
with:
egress-policy: block
allowed-endpoints: >
@@ -53,7 +54,7 @@ jobs:
- name: Check for UI changes
id: check-changes
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
ui/**
@@ -66,7 +67,7 @@ jobs:
- name: Get changed source files for targeted tests
id: changed-source
if: steps.check-changes.outputs.any_changed == 'true'
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
ui/**/*.ts
@@ -82,7 +83,7 @@ jobs:
- name: Check for critical path changes (run all tests)
id: critical-changes
if: steps.check-changes.outputs.any_changed == 'true'
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files: |
ui/lib/**
@@ -92,11 +93,11 @@ jobs:
ui/vitest.config.ts
ui/vitest.setup.ts
- name: Setup Node.js
- name: Setup Node.js ${{ env.NODE_VERSION }}
if: steps.check-changes.outputs.any_changed == 'true'
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
node-version-file: 'ui/.nvmrc'
node-version: ${{ env.NODE_VERSION }}
- name: Setup pnpm
if: steps.check-changes.outputs.any_changed == 'true'
@@ -112,7 +113,7 @@ jobs:
- name: Setup pnpm and Next.js cache
if: steps.check-changes.outputs.any_changed == 'true'
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: |
${{ env.STORE_PATH }}
@@ -161,7 +162,7 @@ jobs:
- name: Cache Playwright browsers
if: steps.check-changes.outputs.any_changed == 'true'
id: playwright-cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-chromium-${{ hashFiles('ui/pnpm-lock.yaml') }}
+1
View File
@@ -60,6 +60,7 @@ htmlcov/
**/mcp-config.json
**/mcpServers.json
.mcp/
.mcp.json
# AI Coding Assistants - Cursor
.cursorignore
-10
View File
@@ -1,10 +0,0 @@
{
"extends": "markdownlint/style/prettier",
"first-line-h1": false,
"no-duplicate-heading": {
"siblings_only": true
},
"no-inline-html": false,
"line-length": false,
"no-bare-urls": false
}
-16
View File
@@ -1,16 +0,0 @@
node_modules/
ui/node_modules/
.git/
.venv/
**/.venv/
dist/
build/
htmlcov/
.next/
ui/.next/
ui/out/
contrib/
# Auto-generated content (keepachangelog format legitimately repeats section headings).
# Revisit with the team — see beads task on markdownlint rule triage.
**/CHANGELOG.md
-15
View File
@@ -49,14 +49,6 @@ repos:
files: ^\.github/(workflows|actions)/.+\.ya?ml$|^\.github/dependabot\.ya?ml$
priority: 30
## RENOVATE
- repo: https://github.com/renovatebot/pre-commit-hooks
rev: 43.150.0
hooks:
- id: renovate-config-validator
files: ^\.github/renovate\.json$
priority: 10
## BASH
- repo: https://github.com/koalaman/shellcheck-precommit
rev: v0.11.0
@@ -133,13 +125,6 @@ repos:
pass_filenames: false
priority: 50
## MARKDOWN
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.45.0
hooks:
- id: markdownlint
priority: 30
## CONTAINERS
- repo: https://github.com/hadolint/hadolint
rev: v2.14.0
-1
View File
@@ -1,3 +1,2 @@
.envrc
ui/.env.local
openspec/
-2
View File
@@ -11,7 +11,6 @@
Use these skills for detailed patterns on-demand:
### Generic Skills (Any Project)
| Skill | Description | URL |
|-------|-------------|-----|
| `typescript` | Const types, flat interfaces, utility types | [SKILL.md](skills/typescript/SKILL.md) |
@@ -29,7 +28,6 @@ Use these skills for detailed patterns on-demand:
| `tdd` | Test-Driven Development workflow | [SKILL.md](skills/tdd/SKILL.md) |
### Prowler-Specific Skills
| Skill | Description | URL |
|-------|-------------|-----|
| `prowler` | Project overview, component navigation | [SKILL.md](skills/prowler/SKILL.md) |
+3 -4
View File
@@ -1,4 +1,4 @@
# Do you want to learn on how to
# Do you want to learn on how to...
- [Contribute with your code or fixes to Prowler](https://docs.prowler.com/developer-guide/introduction)
- [Create a new provider](https://docs.prowler.com/developer-guide/provider)
@@ -32,6 +32,5 @@ Provider-specific developer notes:
Want some swag as appreciation for your contribution?
## Prowler Developer Guide
<https://goto.prowler.com/devguide>
# Prowler Developer Guide
https://goto.prowler.com/devguide
+7 -7
View File
@@ -76,11 +76,11 @@ USER prowler
WORKDIR /home/prowler
# Copy necessary files
COPY --chown=prowler:prowler prowler/ /home/prowler/prowler/
COPY --chown=prowler:prowler dashboard/ /home/prowler/dashboard/
COPY --chown=prowler:prowler pyproject.toml uv.lock /home/prowler/
COPY --chown=prowler:prowler README.md /home/prowler/
COPY --chown=prowler:prowler prowler/providers/m365/lib/powershell/m365_powershell.py /home/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py
COPY prowler/ /home/prowler/prowler/
COPY dashboard/ /home/prowler/dashboard/
COPY pyproject.toml uv.lock /home/prowler/
COPY README.md /home/prowler/
COPY prowler/providers/m365/lib/powershell/m365_powershell.py /home/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py
# Install Python dependencies
ENV HOME='/home/prowler'
@@ -89,7 +89,7 @@ ENV PATH="${HOME}/.local/bin:${PATH}"
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir uv==0.11.14
RUN uv sync --locked --compile-bytecode && \
RUN uv sync --compile-bytecode && \
rm -rf ~/.cache/uv
# Install PowerShell modules
@@ -100,4 +100,4 @@ RUN pip uninstall dash-html-components -y && \
pip uninstall dash-core-components -y
USER prowler
ENTRYPOINT ["/home/prowler/.venv/bin/prowler"]
ENTRYPOINT [".venv/bin/prowler"]
+22 -35
View File
@@ -1,6 +1,6 @@
<p align="center">
<img align="center" alt="Prowler logo" src="https://github.com/prowler-cloud/prowler/blob/master/docs/img/prowler-logo-black.png#gh-light-mode-only" width="50%" height="50%">
<img align="center" alt="Prowler logo" src="https://github.com/prowler-cloud/prowler/blob/master/docs/img/prowler-logo-white.png#gh-dark-mode-only" width="50%" height="50%">
<img align="center" src="https://github.com/prowler-cloud/prowler/blob/master/docs/img/prowler-logo-black.png#gh-light-mode-only" width="50%" height="50%">
<img align="center" src="https://github.com/prowler-cloud/prowler/blob/master/docs/img/prowler-logo-white.png#gh-dark-mode-only" width="50%" height="50%">
</p>
<p align="center">
<b><i>Prowler</b> is the Open Cloud Security Platform trusted by thousands to automate security and compliance in any cloud environment. With hundreds of ready-to-use checks and compliance frameworks, Prowler delivers real-time, customizable monitoring and seamless integrations, making cloud security simple, scalable, and cost-effective for organizations of any size.
@@ -22,8 +22,8 @@
<a href="https://pypistats.org/packages/prowler"><img alt="PyPI Downloads" src="https://img.shields.io/pypi/dw/prowler.svg?label=downloads"></a>
<a href="https://hub.docker.com/r/toniblyx/prowler"><img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/toniblyx/prowler"></a>
<a href="https://gallery.ecr.aws/prowler-cloud/prowler"><img width="120" height=19" alt="AWS ECR Gallery" src="https://user-images.githubusercontent.com/3985464/151531396-b6535a68-c907-44eb-95a1-a09508178616.png"></a>
<a href="https://codecov.io/gh/prowler-cloud/prowler"><img alt="Codecov coverage" src="https://codecov.io/gh/prowler-cloud/prowler/graph/badge.svg?token=OflBGsdpDl"/></a>
<a href="https://insights.linuxfoundation.org/project/prowler-cloud-prowler"><img alt="Linux Foundation insights health score" src="https://insights.linuxfoundation.org/api/badge/health-score?project=prowler-cloud-prowler"/></a>
<a href="https://codecov.io/gh/prowler-cloud/prowler"><img src="https://codecov.io/gh/prowler-cloud/prowler/graph/badge.svg?token=OflBGsdpDl"/></a>
<a href="https://insights.linuxfoundation.org/project/prowler-cloud-prowler"><img src="https://insights.linuxfoundation.org/api/badge/health-score?project=prowler-cloud-prowler"/></a>
</p>
<p align="center">
<a href="https://github.com/prowler-cloud/prowler/releases"><img alt="Version" src="https://img.shields.io/github/v/release/prowler-cloud/prowler"></a>
@@ -36,7 +36,7 @@
</p>
<hr>
<p align="center">
<img align="center" alt="Prowler Cloud demo" src="/docs/img/prowler-cloud.gif" width="100%" height="100%">
<img align="center" src="/docs/img/prowler-cloud.gif" width="100%" height="100%">
</p>
# Description
@@ -122,7 +122,6 @@ Every AWS provider scan will enqueue an Attack Paths ingestion job automatically
| Vercel | 26 | 6 | 0 | 8 | Official | UI, API, CLI |
| Okta | 1 | 1 | 0 | 1 | Official | CLI |
| Scaleway [Contact us](https://prowler.com/contact) | 1 | 1 | 0 | 1 | Unofficial | CLI |
| StackIT [Contact us](https://prowler.com/contact) | 4 | 1 | 0 | 1 | Unofficial | CLI |
| NHN | 6 | 2 | 1 | 0 | Unofficial | CLI |
> [!Note]
@@ -147,13 +146,11 @@ Prowler App offers flexible installation methods tailored to various environment
### Docker Compose
#### Requirements
**Requirements**
- `Docker Compose` installed: https://docs.docker.com/compose/install/.
* `Docker Compose` installed: https://docs.docker.com/compose/install/.
#### Commands
_macOS/Linux:_
**Commands**
``` console
VERSION=$(curl -s https://api.github.com/repos/prowler-cloud/prowler/releases/latest | jq -r .tag_name)
@@ -163,16 +160,6 @@ curl -sLO "https://raw.githubusercontent.com/prowler-cloud/prowler/refs/tags/${V
docker compose up -d
```
_Windows PowerShell:_
``` powershell
$VERSION = (Invoke-RestMethod -Uri "https://api.github.com/repos/prowler-cloud/prowler/releases/latest").tag_name
Invoke-WebRequest -Uri "https://raw.githubusercontent.com/prowler-cloud/prowler/refs/tags/$VERSION/docker-compose.yml" -OutFile "docker-compose.yml"
# Environment variables can be customized in the .env file. Using default values in production environments is not recommended.
Invoke-WebRequest -Uri "https://raw.githubusercontent.com/prowler-cloud/prowler/refs/tags/$VERSION/.env" -OutFile ".env"
docker compose up -d
```
> [!WARNING]
> 🔒 For a secure setup, the API auto-generates a unique key pair, `DJANGO_TOKEN_SIGNING_KEY` and `DJANGO_TOKEN_VERIFYING_KEY`, and stores it in `~/.config/prowler-api` (non-container) or the bound Docker volume in `_data/api` (container). Never commit or reuse static/default keys. To rotate keys, delete the stored key files and restart the API.
@@ -188,14 +175,14 @@ You can find more information in the [Troubleshooting](./docs/troubleshooting.md
### From GitHub
#### Requirements
**Requirements**
- `git` installed.
- `uv` installed: [uv installation](https://docs.astral.sh/uv/getting-started/installation/).
- `pnpm` installed: [pnpm installation](https://pnpm.io/installation).
- `Docker Compose` installed: https://docs.docker.com/compose/install/.
* `git` installed.
* `uv` installed: [uv installation](https://docs.astral.sh/uv/getting-started/installation/).
* `pnpm` installed: [pnpm installation](https://pnpm.io/installation).
* `Docker Compose` installed: https://docs.docker.com/compose/install/.
#### Commands to run the API
**Commands to run the API**
``` console
git clone https://github.com/prowler-cloud/prowler
@@ -212,7 +199,7 @@ gunicorn -c config/guniconf.py config.wsgi:application
> After completing the setup, access the API documentation at http://localhost:8080/api/v1/docs.
#### Commands to run the API Worker
**Commands to run the API Worker**
``` console
git clone https://github.com/prowler-cloud/prowler
@@ -225,7 +212,7 @@ cd src/backend
python -m celery -A config.celery worker -l info -E
```
#### Commands to run the API Scheduler
**Commands to run the API Scheduler**
``` console
git clone https://github.com/prowler-cloud/prowler
@@ -238,7 +225,7 @@ cd src/backend
python -m celery -A config.celery beat -l info --scheduler django_celery_beat.schedulers:DatabaseScheduler
```
#### Commands to run the UI
**Commands to run the UI**
``` console
git clone https://github.com/prowler-cloud/prowler
@@ -250,7 +237,7 @@ pnpm start
> Once configured, access the Prowler App at http://localhost:3000. Sign up using your email and password to get started.
#### Pre-commit Hooks Setup
**Pre-commit Hooks Setup**
Some pre-commit hooks require tools installed on your system:
@@ -270,14 +257,14 @@ prowler -v
### Containers
#### Available Versions of Prowler CLI
**Available Versions of Prowler CLI**
The following versions of Prowler CLI are available, depending on your requirements:
- `latest`: Synchronizes with the `master` branch. Note that this version is not stable.
- `v4-latest`: Synchronizes with the `v4` branch. Note that this version is not stable.
- `v3-latest`: Synchronizes with the `v3` branch. Note that this version is not stable.
- `<x.y.z>` (release): Stable releases corresponding to specific versions. See the [complete list of Prowler releases](https://github.com/prowler-cloud/prowler/releases).
- `<x.y.z>` (release): Stable releases corresponding to specific versions. You can find the complete list of releases [here](https://github.com/prowler-cloud/prowler/releases).
- `stable`: Always points to the latest release.
- `v4-stable`: Always points to the latest release for v4.
- `v3-stable`: Always points to the latest release for v3.
@@ -306,7 +293,7 @@ python prowler-cli.py -v
# 🛡️ GitHub Action
The official **Prowler GitHub Action** runs Prowler scans in your GitHub workflows using the official [`prowlercloud/prowler`](https://hub.docker.com/r/prowlercloud/prowler) Docker image. Scans run on any [supported provider](https://docs.prowler.com/user-guide/providers/), with optional [`--push-to-cloud`](https://docs.prowler.com/user-guide/tutorials/prowler-import-findings) to send findings to Prowler Cloud and optional SARIF upload so findings show up in the repo's **Security → Code scanning** tab and as inline PR annotations.
The official **Prowler GitHub Action** runs Prowler scans in your GitHub workflows using the official [`prowlercloud/prowler`](https://hub.docker.com/r/prowlercloud/prowler) Docker image. Scans run on any [supported provider](https://docs.prowler.com/user-guide/providers/), with optional [`--push-to-cloud`](https://docs.prowler.com/user-guide/tutorials/prowler-app-import-findings) to send findings to Prowler Cloud and optional SARIF upload so findings show up in the repo's **Security → Code scanning** tab and as inline PR annotations.
```yaml
name: Prowler IaC Scan
@@ -351,7 +338,7 @@ Full configuration, per-provider authentication, and SARIF examples: [Prowler Gi
## Prowler CLI
### Running Prowler
**Running Prowler**
Prowler can be executed across various environments, offering flexibility to meet your needs. It can be run from:
+3 -3
View File
@@ -22,7 +22,7 @@ inputs:
required: false
default: json-ocsf
push-to-cloud:
description: Push scan findings to Prowler Cloud. Requires the PROWLER_CLOUD_API_KEY environment variable. See https://docs.prowler.com/user-guide/tutorials/prowler-import-findings#using-the-cli
description: Push scan findings to Prowler Cloud. Requires the PROWLER_CLOUD_API_KEY environment variable. See https://docs.prowler.com/user-guide/tutorials/prowler-app-import-findings#using-the-cli
required: false
default: "false"
flags:
@@ -167,7 +167,7 @@ runs:
- name: Upload SARIF to GitHub Code Scanning
if: always() && inputs.upload-sarif == 'true' && steps.find-sarif.outputs.sarif_path != ''
uses: github/codeql-action/upload-sarif@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
with:
sarif_file: ${{ steps.find-sarif.outputs.sarif_path }}
category: ${{ inputs.sarif-category }}
@@ -299,7 +299,7 @@ runs:
echo ""
echo "**Get started in 3 steps:**"
echo "1. Create an account at [cloud.prowler.com](https://cloud.prowler.com)"
echo "2. Generate a Prowler Cloud API key ([docs](https://docs.prowler.com/user-guide/tutorials/prowler-import-findings#using-the-cli))"
echo "2. Generate a Prowler Cloud API key ([docs](https://docs.prowler.com/user-guide/tutorials/prowler-app-import-findings#using-the-cli))"
echo "3. Add \`PROWLER_CLOUD_API_KEY\` to your GitHub secrets and set \`push-to-cloud: true\` on this action"
echo ""
echo "See [prowler.com/pricing](https://prowler.com/pricing) for plan details."
+4 -4
View File
@@ -10,7 +10,7 @@
> - [`jsonapi`](../skills/jsonapi/SKILL.md) - Strict JSON:API v1.1 spec compliance
> - [`pytest`](../skills/pytest/SKILL.md) - Generic pytest patterns
## Auto-invoke Skills
### Auto-invoke Skills
When performing these actions, ALWAYS invoke the corresponding skill FIRST:
@@ -81,7 +81,7 @@ When performing these actions, ALWAYS invoke the corresponding skill FIRST:
## DECISION TREES
### Serializer Selection
```text
```
Read → <Model>Serializer
Create → <Model>CreateSerializer
Update → <Model>UpdateSerializer
@@ -89,7 +89,7 @@ Nested read → <Model>IncludeSerializer
```
### Task vs View
```text
```
< 100ms → View
> 100ms or external API → Celery task
Needs retry → Celery task
@@ -105,7 +105,7 @@ Django 5.1.x | DRF 3.15.x | djangorestframework-jsonapi 7.x | Celery 5.4.x | Pos
## PROJECT STRUCTURE
```text
```
api/src/backend/
├── api/ # Main Django app
│ ├── v1/ # API version 1 (views, serializers, urls)
+1 -54
View File
@@ -2,60 +2,6 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.31.0] (Prowler UNRELEASED)
### 🚀 Added
- Automatic recovery of allowlisted idempotent background tasks whose worker died during a deploy or crash: stuck scan and summary tasks are detected and re-run instead of staying pending forever, with a `reconcile_orphan_tasks` management command for on-demand recovery [(#11416)](https://github.com/prowler-cloud/prowler/pull/11416)
- Jira integration no longer creates duplicate issues on a retried send; findings already ticketed are skipped [(#11416)](https://github.com/prowler-cloud/prowler/pull/11416)
- DORA compliance framework support [(#11131)](https://github.com/prowler-cloud/prowler/pull/11131)
### 🔄 Changed
- Allowlisted idempotent background tasks are no longer lost when a worker is stopped or crashes mid-task; tasks with external side effects are marked terminal instead of blindly re-running [(#11416)](https://github.com/prowler-cloud/prowler/pull/11416)
- A recovered scan rewrites its findings, summaries, attack surface, and compliance data instead of appending to the previous run, so recovery never leaves stale or duplicate materialized rows [(#11416)](https://github.com/prowler-cloud/prowler/pull/11416)
### 🐞 Fixed
- Workers now shut down gracefully on deploy or restart, finishing or re-queueing in-flight tasks instead of being force-killed and leaving them stuck [(#11416)](https://github.com/prowler-cloud/prowler/pull/11416)
---
## [1.30.1] (Prowler v5.29.1)
### 🐞 Fixed
- `GET /api/v1/findings` N+1 query loading `resources__tags` when listing findings [(#11420)](https://github.com/prowler-cloud/prowler/pull/11420)
- Clean up the scan tmp output directory when `scan-report` fails so partial files do not accumulate and fill the worker disk (`No space left on device`) [(#11421)](https://github.com/prowler-cloud/prowler/pull/11421)
---
## [1.30.0] (Prowler v5.29.0)
### 🔄 Changed
- Scan finding ingestion: bulk-resolve `Resource`/`ResourceTag` rows, replace per-mapping `SELECT FOR UPDATE` with deferred `ResourceTagMapping.bulk_create(ignore_conflicts=True)`, wrap each micro-batch in a single `rls_transaction`, and raise `SCAN_DB_BATCH_SIZE` to 1000 [(#11249)](https://github.com/prowler-cloud/prowler/pull/11249)
- Faster `GET /api/v1/finding-groups/latest` aggregation on tenants where one recent scan holds most findings [(#11380)](https://github.com/prowler-cloud/prowler/pull/11380)
---
## [1.29.1] (Prowler v5.28.1)
### 🐞 Fixed
- `finding-groups` slow response with finding-level filters such as `region`; check title and description are now read from the daily summaries, which drops sorting by `check_title` [(#11326)](https://github.com/prowler-cloud/prowler/pull/11326)
---
## [1.29.0] (Prowler v5.28.0)
### 🚀 Added
- `okta` provider support [(#11184)](https://github.com/prowler-cloud/prowler/pull/11184)
- `resource.metadata` attribute included in `/api/v1/findings?include=resources` [(#11187)](https://github.com/prowler-cloud/prowler/pull/11187)
---
## [1.28.0] (Prowler v5.27.0)
### 🚀 Added
@@ -74,6 +20,7 @@ All notable changes to the **Prowler API** are documented in this file.
- `perform_scan_task` and `perform_scheduled_scan_task` now short-circuit with a warning and `return None` when the target provider no longer exists, instead of letting `handle_provider_deletion` raise `ProviderDeletedException`. `perform_scheduled_scan_task` also removes any orphan `PeriodicTask` it finds so beat stops re-firing scans for deleted providers. Prevents queued messages for deleted providers from being recorded as `FAILURE` [(#11185)](https://github.com/prowler-cloud/prowler/pull/11185)
- Attack Paths: `BEDROCK-001` and `BEDROCK-002` now target roles trusting `bedrock-agentcore.amazonaws.com` instead of `bedrock.amazonaws.com`, eliminating false positives against regular Bedrock service roles (Agents, Knowledge Bases, model invocation) [(#11141)](https://github.com/prowler-cloud/prowler/pull/11141)
---
## [1.27.1] (Prowler v5.26.1)
+4 -4
View File
@@ -89,7 +89,7 @@ WORKDIR /home/prowler
# Ensure output directory exists
RUN mkdir -p /tmp/prowler_api_output
COPY --chown=prowler:prowler pyproject.toml uv.lock ./
COPY pyproject.toml uv.lock ./
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir uv==0.11.14
@@ -97,13 +97,13 @@ RUN pip install --no-cache-dir --upgrade pip && \
ENV PATH="/home/prowler/.local/bin:$PATH"
# Add `--no-install-project` to avoid installing the current project as a package
RUN uv sync --locked --no-install-project && \
RUN uv sync --no-install-project && \
rm -rf ~/.cache/uv
RUN .venv/bin/python .venv/lib/python3.12/site-packages/prowler/providers/m365/lib/powershell/m365_powershell.py
COPY --chown=prowler:prowler src/backend/ ./backend/
COPY --chown=prowler:prowler docker-entrypoint.sh ./docker-entrypoint.sh
COPY src/backend/ ./backend/
COPY docker-entrypoint.sh ./docker-entrypoint.sh
WORKDIR /home/prowler/backend
+29 -29
View File
@@ -2,7 +2,7 @@
This repository contains the JSON API and Task Runner components for Prowler, which facilitate a complete backend that interacts with the Prowler SDK and is used by the Prowler UI.
## Components
# Components
The Prowler API is composed of the following components:
- The JSON API, which is an API built with Django Rest Framework.
@@ -10,13 +10,13 @@ The Prowler API is composed of the following components:
- The PostgreSQL database, which is used to store the data.
- The Valkey database, which is an in-memory database which is used as a message broker for the Celery workers.
### Note about Valkey
## Note about Valkey
[Valkey](https://valkey.io/) is an open source (BSD) high performance key/value datastore.
Valkey exposes a Redis 7.2 compliant API. Any service that exposes the Redis API can be used with Prowler API.
## Modify environment variables
# Modify environment variables
Under the root path of the project, you can find a file called `.env`. This file shows all the environment variables that the project uses. You should review it and set the values for the variables you want to change.
@@ -24,7 +24,7 @@ If you dont set `DJANGO_TOKEN_SIGNING_KEY` or `DJANGO_TOKEN_VERIFYING_KEY`, t
**Important note**: Every Prowler version (or repository branches and tags) could have different variables set in its `.env` file. Please use the `.env` file that corresponds with each version.
### Local deployment
## Local deployment
Keep in mind if you export the `.env` file to use it with local deployment that you will have to do it within the context of the virtual environment, not before. Otherwise, variables will not be loaded properly.
To do this, you can run:
@@ -34,12 +34,12 @@ set -a
source .env
```
## 🚀 Production deployment
### Docker deployment
# 🚀 Production deployment
## Docker deployment
This method requires `docker` and `docker compose`.
#### Clone the repository
### Clone the repository
```console
# HTTPS
@@ -50,13 +50,13 @@ git clone git@github.com:prowler-cloud/api.git
```
#### Build the base image
### Build the base image
```console
docker compose --profile prod build
```
#### Run the production service
### Run the production service
This command will start the Django production server and the Celery worker and also the Valkey and PostgreSQL databases.
@@ -68,7 +68,7 @@ You can access the server in `http://localhost:8080`.
> **NOTE:** notice how the port is different. When developing using docker, the port will be `8080` to prevent conflicts.
#### View the Production Server Logs
### View the Production Server Logs
To view the logs for any component (e.g., Django, Celery worker), you can use the following command with a wildcard. This command will follow logs for any container that matches the specified pattern:
@@ -133,13 +133,13 @@ gunicorn -c config/guniconf.py config.wsgi:application
> By default, the Gunicorn server will try to use as many workers as your machine can handle. You can manually change that in the `src/backend/config/guniconf.py` file.
## 🧪 Development guide
# 🧪 Development guide
### Local deployment
## Local deployment
To use this method, you'll need to set up a Python virtual environment (version ">=3.11,<3.13") and keep dependencies updated. Additionally, ensure that `uv` and `docker compose` are installed.
#### Clone the repository
### Clone the repository
```console
# HTTPS
@@ -150,7 +150,7 @@ git clone git@github.com:prowler-cloud/api.git
```
#### Start the PostgreSQL Database and Valkey
### Start the PostgreSQL Database and Valkey
The PostgreSQL database (version 16.3) and Valkey (version 7) are required for the development environment. To make development easier, we have provided a `docker-compose` file that will start these components for you.
@@ -161,7 +161,7 @@ The PostgreSQL database (version 16.3) and Valkey (version 7) are required for t
docker compose up postgres valkey -d
```
#### Install the Python dependencies
### Install the Python dependencies
> You must have uv installed
@@ -169,7 +169,7 @@ docker compose up postgres valkey -d
uv sync
```
#### Apply migrations
### Apply migrations
For migrations, you need to force the `admin` database router. Assuming you have the correct environment variables and Python virtual environment, run:
@@ -178,7 +178,7 @@ cd src/backend
python manage.py migrate --database admin
```
#### Run the Django development server
### Run the Django development server
```console
cd src/backend
@@ -188,7 +188,7 @@ python manage.py runserver
You can access the server in `http://localhost:8000`.
All changes in the code will be automatically reloaded in the server.
#### Run the Celery worker
### Run the Celery worker
```console
python -m celery -A config.celery worker -l info -E
@@ -196,11 +196,11 @@ python -m celery -A config.celery worker -l info -E
The Celery worker does not detect and reload changes in the code, so you need to restart it manually when you make changes.
### Docker deployment
## Docker deployment
This method requires `docker` and `docker compose`.
#### Clone the repository
### Clone the repository
```console
# HTTPS
@@ -211,13 +211,13 @@ git clone git@github.com:prowler-cloud/api.git
```
#### Build the base image
### Build the base image
```console
docker compose --profile dev build
```
#### Run the development service
### Run the development service
This command will start the Django development server and the Celery worker and also the Valkey and PostgreSQL databases.
@@ -230,7 +230,7 @@ All changes in the code will be automatically reloaded in the server.
> **NOTE:** notice how the port is different. When developing using docker, the port will be `8080` to prevent conflicts.
#### View the development server logs
### View the development server logs
To view the logs for any component (e.g., Django, Celery worker), you can use the following command with a wildcard. This command will follow logs for any container that matches the specified pattern:
@@ -238,7 +238,7 @@ To view the logs for any component (e.g., Django, Celery worker), you can use th
docker logs -f $(docker ps --format "{{.Names}}" | grep 'api-')
```
### Applying migrations
## Applying migrations
For migrations, you need to force the `admin` database router. Assuming you have the correct environment variables and Python virtual environment, run:
@@ -247,7 +247,7 @@ cd src/backend
uv run python manage.py migrate --database admin
```
### Apply fixtures
## Apply fixtures
Fixtures are used to populate the database with initial development data.
@@ -258,7 +258,7 @@ uv run python manage.py loaddata api/fixtures/0_dev_users.json --database admin
> The default credentials are `dev@prowler.com:Thisisapassword123@` or `dev2@prowler.com:Thisisapassword123@`
### Run tests
## Run tests
Note that the tests will fail if you use the same `.env` file as the development environment.
@@ -269,7 +269,7 @@ cd src/backend
uv run pytest
```
## Custom commands
# Custom commands
Django provides a way to create custom commands that can be run from the command line.
@@ -281,7 +281,7 @@ To run a custom command, you need to be in the `prowler/api/src/backend` directo
uv run python manage.py <command_name>
```
### Generate dummy data
## Generate dummy data
```console
python manage.py findings --tenant
@@ -298,7 +298,7 @@ This command creates, for a given tenant, a provider, scan and a set of findings
>
> The last step is required to access the findings details, since the UI needs that to print all the information.
#### Example
### Example
```console
~/backend $ uv run python manage.py findings --tenant
+4 -4
View File
@@ -22,12 +22,12 @@ apply_fixtures() {
start_dev_server() {
echo "Starting the development server..."
exec uv run python manage.py runserver 0.0.0.0:"${DJANGO_PORT:-8080}"
uv run python manage.py runserver 0.0.0.0:"${DJANGO_PORT:-8080}"
}
start_prod_server() {
echo "Starting the Gunicorn server..."
exec uv run gunicorn -c config/guniconf.py config.wsgi:application
uv run gunicorn -c config/guniconf.py config.wsgi:application
}
resolve_worker_hostname() {
@@ -47,7 +47,7 @@ resolve_worker_hostname() {
start_worker() {
echo "Starting the worker..."
exec uv run python -m celery -A config.celery worker \
uv run python -m celery -A config.celery worker \
-n "$(resolve_worker_hostname)" \
-l "${DJANGO_LOGGING_LEVEL:-info}" \
-Q celery,scans,scan-reports,deletion,backfill,overview,integrations,compliance,attack-paths-scans \
@@ -56,7 +56,7 @@ start_worker() {
start_worker_beat() {
echo "Starting the worker-beat..."
exec uv run python -m celery -A config.celery beat -l "${DJANGO_LOGGING_LEVEL:-info}" --scheduler django_celery_beat.schedulers:DatabaseScheduler
uv run python -m celery -A config.celery beat -l "${DJANGO_LOGGING_LEVEL:-info}" --scheduler django_celery_beat.schedulers:DatabaseScheduler
}
manage_db_partitions() {
-86
View File
@@ -1,86 +0,0 @@
# Orphan Celery task recovery
When a worker is terminated mid-task (a deploy, an OOM kill, a node eviction), the
task it was running can be left non-terminal forever: the `Scan` stays `EXECUTING`,
the `TaskResult` stays `STARTED`, and nothing re-runs it. This page describes the
mechanisms that detect and recover allowlisted idempotent orphans so users never
see a stuck scan and pending-task alerts do not fire.
## How recovery works
1. **Durable delivery.** The broker is configured so a task message is acknowledged
only after the task finishes (`task_acks_late`), one task is reserved at a time
(`worker_prefetch_multiplier = 1`), and an abruptly-lost worker re-queues its task
(`task_reject_on_worker_lost`). On `SIGTERM` the worker is given a soft-shutdown
window (`worker_soft_shutdown_timeout`) to finish or re-queue in-flight work
before it is force-killed.
2. **Periodic watchdog.** A Beat task, `reconcile-orphan-tasks`, runs every couple of
minutes (a `django_celery_beat` periodic task created by migration). For each
in-flight task result with an allowlisted idempotent task name, it pings the
worker recorded on the task's `TaskResult`:
- worker responds -> the task is still running, leave it alone;
- worker is gone (and the scan started before a short grace window) -> it is a
real orphan: the stale task is revoked and marked terminal (clearing the
pending/started alert), and the scan is re-enqueued from scratch.
The re-run is safe because only tasks with proven idempotency are allowlisted.
Scan persistence, for example, clears the scan's prior findings and materialized
summary/compliance rows before re-writing them. Jira sends are allowlisted too:
each finding is reserved in a dispatch table before the external call, so a re-run
skips already-ticketed findings (the worst case is one finding missed if a worker
is hard-killed mid-send, never a duplicate issue). Other external side effects stay
terminal: the S3 upload rebuilds from worker-local files that do not survive a
crash, and report/Security Hub recovery is out of scope.
3. **Recovery cap.** Each automatic re-enqueue increments `Scan.recovery_count`.
After `--max-attempts` recoveries (default 3) the scan is marked `FAILED` instead
of re-enqueued, so a task that repeatedly kills its worker cannot loop forever.
A Postgres advisory lock ensures that, even with multiple API/worker replicas, only
one reconciliation runs at a time; the others no-op.
## On-demand command
The same logic is available as a management command, useful right after a deploy or
for manual intervention:
```bash
python manage.py reconcile_orphan_tasks # recover now
python manage.py reconcile_orphan_tasks --dry-run # report orphans, change nothing
python manage.py reconcile_orphan_tasks --grace-minutes 5 --max-attempts 3
```
## Configuration
All settings have safe defaults; override via environment variables.
| Env var | Default | Purpose |
| --- | --- | --- |
| `DJANGO_CELERY_WORKER_PREFETCH_MULTIPLIER` | `1` | Tasks reserved per worker process. |
| `DJANGO_CELERY_WORKER_SOFT_SHUTDOWN_TIMEOUT` | `60` | Seconds the worker drains/re-queues on `SIGTERM` before force-kill. |
| `DJANGO_CELERY_TASK_TIME_LIMIT` | `21600` (6h) | Hard limit for most tasks; connection checks are capped at 120s. |
| `DJANGO_CELERY_TASK_SOFT_TIME_LIMIT` | hard - 600 | Soft limit; raises `SoftTimeLimitExceeded` for cleanup. |
| `DJANGO_CELERY_LONG_TASK_TIME_LIMIT` | `172800` (48h) | Hard limit for scans and provider/tenant deletions, which can legitimately run for more than a day. |
| `DJANGO_CELERY_LONG_TASK_SOFT_TIME_LIMIT` | long hard - 600 | Soft limit for the long-running tasks above. |
`task_acks_late` and `task_reject_on_worker_lost` are enabled in `config/celery.py`.
## Deployment requirement
Two conditions must both hold for the soft shutdown to actually drain work:
1. **The worker must receive `SIGTERM`.** The container entrypoint `exec`s the
Celery process so it runs as PID 1; otherwise `SIGTERM` from `docker stop`/ECS
hits the entrypoint shell, never reaches Celery, and the worker is hard-killed
(SIGKILL) at the grace deadline without draining. Custom entrypoints must
preserve the `exec`.
2. **The orchestrator must give the worker enough time** before force-killing it.
Set the stop grace period to exceed `DJANGO_CELERY_WORKER_SOFT_SHUTDOWN_TIMEOUT`
plus a margin:
- **docker-compose:** `stop_grace_period` on the worker services (set to `120s`).
- **AWS ECS:** the worker container `stopTimeout` (configured in the deployment
repository).
If either condition is missing, long tasks are still recovered by the watchdog,
but they are cut mid-run on every deploy instead of draining.
+2 -2
View File
@@ -43,7 +43,7 @@ dependencies = [
"defusedxml==0.7.1",
"gunicorn==23.0.0",
"lxml==6.1.0",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.27",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (==1.3.0)",
"sentry-sdk[django] (==2.56.0)",
@@ -68,7 +68,7 @@ name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
version = "1.31.0"
version = "1.28.0"
[tool.uv]
# Transitive pins matching master to avoid silent drift; bump deliberately.
+36 -47
View File
@@ -1,9 +1,7 @@
from collections.abc import Iterable, Mapping
from api.models import Provider
from prowler.lib.check.compliance_models import (
get_bulk_compliance_frameworks_universal,
)
from prowler.lib.check.compliance_models import Compliance
from prowler.lib.check.models import CheckMetadata
AVAILABLE_COMPLIANCE_FRAMEWORKS = {}
@@ -96,22 +94,25 @@ PROWLER_CHECKS = LazyChecksMapping()
def get_compliance_frameworks(provider_type: Provider.ProviderChoices) -> list[str]:
"""List compliance framework identifiers available for `provider_type`.
"""List compliance frameworks the API can load for `provider_type`.
Includes both per-provider frameworks and universal top-level frameworks
(e.g. ``dora``, ``csa_ccm_4.0``).
The list is sourced from `Compliance.get_bulk` so that the names
returned here are guaranteed to be loadable by the bulk loader. This
prevents downstream key mismatches (e.g. CSV report generation iterating
framework names and looking them up in the bulk dict).
Args:
provider_type (Provider.ProviderChoices): The cloud provider type
(e.g., "aws", "azure", "gcp", "m365").
provider_type (Provider.ProviderChoices): The cloud provider type for which to retrieve
available compliance frameworks (e.g., "aws", "azure", "gcp", "m365").
Returns:
list[str]: Framework identifiers (e.g., "cis_1.4_aws", "dora").
list[str]: A list of framework identifiers (e.g., "cis_1.4_aws", "mitre_attack_azure") available
for the given provider.
"""
global AVAILABLE_COMPLIANCE_FRAMEWORKS
if provider_type not in AVAILABLE_COMPLIANCE_FRAMEWORKS:
AVAILABLE_COMPLIANCE_FRAMEWORKS[provider_type] = list(
get_bulk_compliance_frameworks_universal(provider_type).keys()
Compliance.get_bulk(provider_type).keys()
)
return AVAILABLE_COMPLIANCE_FRAMEWORKS[provider_type]
@@ -138,14 +139,18 @@ def get_prowler_provider_compliance(provider_type: Provider.ProviderChoices) ->
"""
Retrieve the Prowler compliance data for a specified provider type.
This function fetches the compliance frameworks and their associated
requirements for the given cloud provider.
Args:
provider_type (Provider.ProviderChoices): The provider type
(e.g., 'aws', 'azure') for which to retrieve compliance data.
Returns:
dict: Mapping of framework name to `ComplianceFramework` for the provider.
dict: A dictionary mapping compliance framework names to their respective
Compliance objects for the specified provider.
"""
return get_bulk_compliance_frameworks_universal(provider_type)
return Compliance.get_bulk(provider_type)
def _load_provider_assets(provider_type: Provider.ProviderChoices) -> tuple[dict, dict]:
@@ -204,8 +209,8 @@ def load_prowler_checks(
for compliance_name, compliance_data in prowler_compliance.get(
provider_type, {}
).items():
for requirement in compliance_data.requirements:
for check in requirement.checks.get(provider_type, []):
for requirement in compliance_data.Requirements:
for check in requirement.Checks:
try:
checks[provider_type][check].add(compliance_name)
except KeyError:
@@ -285,40 +290,24 @@ def generate_compliance_overview_template(
requirements_status = {"passed": 0, "failed": 0, "manual": 0}
total_requirements = 0
for requirement in compliance_data.requirements:
for requirement in compliance_data.Requirements:
total_requirements += 1
provider_check_list = list(requirement.checks.get(provider_type, []))
total_checks = len(provider_check_list)
checks_dict = {check: None for check in provider_check_list}
total_checks = len(requirement.Checks)
checks_dict = {check: None for check in requirement.Checks}
req_status_val = "MANUAL" if total_checks == 0 else "PASS"
# MITRE attrs are wrapped under `_raw_attributes` by the
# universal adapter — unwrap so consumers see the flat list.
requirement_attributes = requirement.attributes
if (
isinstance(requirement_attributes, dict)
and "_raw_attributes" in requirement_attributes
):
attributes_payload = list(requirement_attributes["_raw_attributes"])
elif isinstance(requirement_attributes, dict):
attributes_payload = (
[dict(requirement_attributes)] if requirement_attributes else []
)
else:
attributes_payload = [
dict(attribute) for attribute in requirement_attributes
]
# Build requirement dictionary
requirement_dict = {
"name": requirement.name or requirement.id,
"description": requirement.description,
"tactics": requirement.tactics or [],
"subtechniques": requirement.sub_techniques or [],
"platforms": requirement.platforms or [],
"technique_url": requirement.technique_url or "",
"attributes": attributes_payload,
"name": requirement.Name or requirement.Id,
"description": requirement.Description,
"tactics": getattr(requirement, "Tactics", []),
"subtechniques": getattr(requirement, "SubTechniques", []),
"platforms": getattr(requirement, "Platforms", []),
"technique_url": getattr(requirement, "TechniqueURL", ""),
"attributes": [
dict(attribute) for attribute in requirement.Attributes
],
"checks": checks_dict,
"checks_status": {
"pass": 0,
@@ -336,15 +325,15 @@ def generate_compliance_overview_template(
requirements_status["passed"] += 1
# Add requirement to compliance requirements
compliance_requirements[requirement.id] = requirement_dict
compliance_requirements[requirement.Id] = requirement_dict
# Build compliance dictionary
compliance_dict = {
"framework": compliance_data.framework,
"name": compliance_data.name,
"version": compliance_data.version,
"framework": compliance_data.Framework,
"name": compliance_data.Name,
"version": compliance_data.Version,
"provider": provider_type,
"description": compliance_data.description,
"description": compliance_data.Description,
"requirements": compliance_requirements,
"requirements_status": requirements_status,
"total_requirements": total_requirements,
@@ -1,49 +0,0 @@
from django.core.management.base import BaseCommand
from tasks.jobs.orphan_recovery import reconcile_orphans
class Command(BaseCommand):
help = (
"Recover orphaned allowlisted Celery tasks whose worker is gone and mark "
"other stale task results terminal. Single-flight via a Postgres advisory lock."
)
def add_arguments(self, parser):
parser.add_argument(
"--grace-minutes",
type=int,
default=2,
help="Skip tasks started within this window (worker may still register).",
)
parser.add_argument(
"--max-attempts",
type=int,
default=3,
help="Give up re-running a task after this many recovery attempts (scans are marked FAILED).",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Detect and report orphans without revoking or re-enqueuing.",
)
def handle(self, *args, **options):
result = reconcile_orphans(
grace_minutes=options["grace_minutes"],
max_attempts=options["max_attempts"],
dry_run=options["dry_run"],
)
if not result.get("acquired"):
self.stdout.write("Reconcile skipped: another run holds the lock.")
return
self.stdout.write(
self.style.SUCCESS(
"Orphan reconcile complete: "
f"recovered={len(result.get('recovered', []))} "
f"failed={len(result.get('failed', []))} "
f"skipped(in-flight)={len(result.get('skipped', []))}"
)
)
@@ -1,41 +0,0 @@
from django.db import migrations
import api.db_utils
class Migration(migrations.Migration):
dependencies = [
("api", "0092_findings_arrays_gin_index_parent"),
]
operations = [
migrations.AlterField(
model_name="provider",
name="provider",
field=api.db_utils.ProviderEnumField(
choices=[
("aws", "AWS"),
("azure", "Azure"),
("gcp", "GCP"),
("kubernetes", "Kubernetes"),
("m365", "M365"),
("github", "GitHub"),
("mongodbatlas", "MongoDB Atlas"),
("iac", "IaC"),
("oraclecloud", "Oracle Cloud Infrastructure"),
("alibabacloud", "Alibaba Cloud"),
("cloudflare", "Cloudflare"),
("openstack", "OpenStack"),
("image", "Image"),
("googleworkspace", "Google Workspace"),
("vercel", "Vercel"),
("okta", "Okta"),
],
default="aws",
),
),
migrations.RunSQL(
"ALTER TYPE provider ADD VALUE IF NOT EXISTS 'okta';",
reverse_sql=migrations.RunSQL.noop,
),
]
@@ -1,17 +0,0 @@
# Generated by Django 5.1.15 on 2026-05-30 17:38
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0093_okta_provider"),
]
operations = [
migrations.AddField(
model_name="scan",
name="recovery_count",
field=models.IntegerField(default=0),
),
]
@@ -1,49 +0,0 @@
from django.db import migrations
TASK_NAME = "reconcile-orphan-tasks"
INTERVAL_MINUTES = 2
def create_periodic_task(apps, schema_editor):
IntervalSchedule = apps.get_model("django_celery_beat", "IntervalSchedule")
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")
schedule, _ = IntervalSchedule.objects.get_or_create(
every=INTERVAL_MINUTES,
period="minutes",
)
PeriodicTask.objects.update_or_create(
name=TASK_NAME,
defaults={
"task": TASK_NAME,
"interval": schedule,
"enabled": True,
},
)
def delete_periodic_task(apps, schema_editor):
IntervalSchedule = apps.get_model("django_celery_beat", "IntervalSchedule")
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")
PeriodicTask.objects.filter(name=TASK_NAME).delete()
# Clean up the schedule if no other task references it
IntervalSchedule.objects.filter(
every=INTERVAL_MINUTES,
period="minutes",
periodictask__isnull=True,
).delete()
class Migration(migrations.Migration):
dependencies = [
("api", "0094_scan_recovery_count"),
("django_celery_beat", "0019_alter_periodictasks_options"),
]
operations = [
migrations.RunPython(create_periodic_task, delete_periodic_task),
]
@@ -1,64 +0,0 @@
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
("api", "0095_reconcile_orphan_tasks_periodic_task"),
]
operations = [
migrations.CreateModel(
name="JiraIssueDispatch",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("finding_id", models.UUIDField()),
(
"integration",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="jira_dispatches",
to="api.integration",
),
),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "jira_issue_dispatches",
"abstract": False,
},
),
migrations.AddConstraint(
model_name="jiraissuedispatch",
constraint=models.UniqueConstraint(
fields=("tenant_id", "integration_id", "finding_id"),
name="unique_jira_issue_dispatch",
),
),
migrations.AddConstraint(
model_name="jiraissuedispatch",
constraint=api.rls.RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_jiraissuedispatch",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
]
-59
View File
@@ -296,7 +296,6 @@ class Provider(RowLevelSecurityProtectedModel):
IMAGE = "image", _("Image")
GOOGLEWORKSPACE = "googleworkspace", _("Google Workspace")
VERCEL = "vercel", _("Vercel")
OKTA = "okta", _("Okta")
@staticmethod
def validate_aws_uid(value):
@@ -355,26 +354,6 @@ class Provider(RowLevelSecurityProtectedModel):
pointer="/data/attributes/uid",
)
@staticmethod
def validate_okta_uid(value):
if not re.match(
r"^[a-z0-9][a-z0-9-]*\.("
r"okta\.com|oktapreview\.com|okta-emea\.com|"
r"okta-gov\.com|okta\.mil|okta-miltest\.com|trex-govcloud\.com"
r")$",
value,
):
raise ModelValidationError(
detail=(
"Okta provider ID must be a valid Okta-managed org domain "
"(e.g., acme.okta.com, also .oktapreview.com / .okta-emea.com "
"/ .okta-gov.com / .okta.mil / .okta-miltest.com / "
".trex-govcloud.com), without scheme or path."
),
code="okta-uid",
pointer="/data/attributes/uid",
)
@staticmethod
def validate_kubernetes_uid(value):
if not re.match(
@@ -501,12 +480,6 @@ class Provider(RowLevelSecurityProtectedModel):
def clean(self):
super().clean()
if self.provider == self.ProviderChoices.OKTA and self.uid:
# Mirror the SDK, which lowercases the org domain before connecting.
# Without this the API would reject Acme.okta.com even though the
# SDK would accept it, and stored uids could disagree with the
# authenticated org domain.
self.uid = self.uid.strip().lower()
getattr(self, f"validate_{self.provider}_uid")(self.uid)
def save(self, *args, **kwargs):
@@ -666,9 +639,6 @@ class Scan(RowLevelSecurityProtectedModel):
state = StateEnumField(choices=StateChoices.choices, default=StateChoices.AVAILABLE)
unique_resource_count = models.IntegerField(default=0)
progress = models.IntegerField(default=0)
# Incremented by the scan-specific orphan-recovery path each time this scan is
# re-pointed to a fresh task; for observability (the retry cap is a Valkey counter).
recovery_count = models.IntegerField(default=0)
scanner_args = models.JSONField(default=dict)
duration = models.IntegerField(null=True, blank=True)
scheduled_at = models.DateTimeField(null=True, blank=True)
@@ -2001,35 +1971,6 @@ class IntegrationProviderRelationship(RowLevelSecurityProtectedModel):
]
class JiraIssueDispatch(RowLevelSecurityProtectedModel):
"""Tracks findings already sent to a Jira integration.
Lets the Jira task be re-run safely (e.g. by orphan recovery): findings with
an existing dispatch row are skipped, so no duplicate issues are created.
"""
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
integration = models.ForeignKey(
Integration, on_delete=models.CASCADE, related_name="jira_dispatches"
)
finding_id = models.UUIDField()
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "jira_issue_dispatches"
constraints = [
models.UniqueConstraint(
fields=["tenant_id", "integration_id", "finding_id"],
name="unique_jira_issue_dispatch",
),
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
class SAMLToken(models.Model):
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
+2 -227
View File
@@ -1,7 +1,7 @@
openapi: 3.0.3
info:
title: Prowler API
version: 1.31.0
version: 1.28.0
description: |-
Prowler API specification.
@@ -373,7 +373,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -390,7 +389,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -414,7 +412,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -433,7 +430,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -1457,7 +1453,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -1474,7 +1469,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -1497,7 +1491,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -1516,7 +1509,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -2005,7 +1997,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -2022,7 +2013,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -2045,7 +2035,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -2064,7 +2053,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -2596,7 +2584,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -2613,7 +2600,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -2636,7 +2622,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -2655,7 +2640,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -3150,7 +3134,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -3167,7 +3150,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -3191,7 +3173,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -3210,7 +3191,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -3760,7 +3740,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -3777,7 +3756,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -3801,7 +3779,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -3820,7 +3797,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -4278,7 +4254,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -4295,7 +4270,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -4319,7 +4293,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -4338,7 +4311,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -4794,7 +4766,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -4811,7 +4782,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -4835,7 +4805,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -4854,7 +4823,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -5298,7 +5266,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -5315,7 +5282,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -5339,7 +5305,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -5358,7 +5323,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -7192,7 +7156,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -7209,7 +7172,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -7233,7 +7195,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -7252,7 +7213,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- name: filter[search]
@@ -7375,7 +7335,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -7392,7 +7351,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -7416,7 +7374,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -7435,7 +7392,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- name: filter[search]
@@ -7547,7 +7503,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -7564,7 +7519,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -7587,7 +7541,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -7606,7 +7559,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- name: filter[search]
@@ -7750,7 +7702,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -7767,7 +7718,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -7791,7 +7741,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -7810,7 +7759,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -7967,7 +7915,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -7984,7 +7931,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -8008,7 +7954,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -8027,7 +7972,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -8178,7 +8122,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -8195,7 +8138,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -8218,7 +8160,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -8237,7 +8178,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- name: filter[search]
@@ -8430,7 +8370,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -8447,7 +8386,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -8471,7 +8409,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -8490,7 +8427,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -8612,7 +8548,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -8629,7 +8564,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -8653,7 +8587,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -8672,7 +8605,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -8818,7 +8750,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -8835,7 +8766,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -8859,7 +8789,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -8878,7 +8807,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -9665,7 +9593,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -9682,7 +9609,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider__in]
schema:
@@ -9706,7 +9632,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -9725,7 +9650,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -9749,7 +9673,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -9766,7 +9689,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -9790,7 +9712,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -9809,7 +9730,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- name: filter[search]
@@ -10480,7 +10400,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -10497,7 +10416,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -10521,7 +10439,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -10540,7 +10457,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -11035,7 +10951,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -11052,7 +10967,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -11076,7 +10990,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -11095,7 +11008,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -11403,7 +11315,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -11420,7 +11331,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -11444,7 +11354,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -11463,7 +11372,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -11777,7 +11685,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -11794,7 +11701,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -11818,7 +11724,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -11837,7 +11742,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -12676,7 +12580,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
* `aws` - AWS
* `azure` - Azure
@@ -12693,7 +12596,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
- in: query
name: filter[provider_type__in]
schema:
@@ -12717,7 +12619,6 @@ paths:
- openstack
- oraclecloud
- vercel
- okta
description: |-
Multiple values may be separated by commas.
@@ -12736,7 +12637,6 @@ paths:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
explode: false
style: form
- in: query
@@ -13137,59 +13037,8 @@ paths:
responses:
'200':
description: CSV file containing the compliance report
'202':
description: The task is in progress
'403':
description: There is a problem with credentials
'404':
description: Compliance report not found, or the scan has no reports yet
/api/v1/scans/{id}/compliance/{name}/ocsf:
get:
operationId: scans_compliance_ocsf_retrieve
description: Download a specific compliance report as an OCSF JSON file. Only
universal frameworks that declare an output configuration produce this artifact
(currently 'dora' and 'csa_ccm_4.0'); any other framework returns 404.
summary: Retrieve compliance report as OCSF JSON
parameters:
- in: query
name: fields[scan-reports]
schema:
type: array
items:
type: string
enum:
- id
- name
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
- in: path
name: id
schema:
type: string
format: uuid
description: A UUID string identifying this scan.
required: true
- in: path
name: name
schema:
type: string
description: The compliance report name, like 'dora'
required: true
tags:
- Scan
security:
- JWT or API Key: []
responses:
'200':
description: OCSF JSON file containing the compliance report
'202':
description: The task is in progress
'403':
description: There is a problem with credentials
'404':
description: Compliance report not found, the framework does not provide
an OCSF export, or the scan has no reports yet
description: Compliance report not found
/api/v1/scans/{id}/csa:
get:
operationId: scans_csa_retrieve
@@ -20266,23 +20115,6 @@ components:
required:
- clouds_yaml_content
- clouds_yaml_cloud
- type: object
title: Okta OAuth Credentials
properties:
okta_client_id:
type: string
description: Client ID of the Okta API Services app used for OAuth 2.0 private-key JWT authentication.
okta_private_key:
type: string
description: PEM-encoded private key whose matching public key (JWK) is registered on the Okta service app.
okta_scopes:
type: array
items:
type: string
description: OAuth scopes to request. Optional; defaults to the minimum set required to run the currently enabled Okta checks.
required:
- okta_client_id
- okta_private_key
- type: object
title: Vercel API Token
properties:
@@ -21295,7 +21127,6 @@ components:
- image
- googleworkspace
- vercel
- okta
type: string
description: |-
* `aws` - AWS
@@ -21313,7 +21144,6 @@ components:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
x-spec-enum-id: 91f917e0c3ab97e8
uid:
type: string
@@ -21435,7 +21265,6 @@ components:
- image
- googleworkspace
- vercel
- okta
type: string
x-spec-enum-id: 91f917e0c3ab97e8
description: |-
@@ -21456,7 +21285,6 @@ components:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
uid:
type: string
title: Unique identifier for the provider, set by the provider
@@ -21509,7 +21337,6 @@ components:
- image
- googleworkspace
- vercel
- okta
type: string
x-spec-enum-id: 91f917e0c3ab97e8
description: |-
@@ -21530,7 +21357,6 @@ components:
* `image` - Image
* `googleworkspace` - Google Workspace
* `vercel` - Vercel
* `okta` - Okta
uid:
type: string
minLength: 3
@@ -22380,23 +22206,6 @@ components:
required:
- clouds_yaml_content
- clouds_yaml_cloud
- type: object
title: Okta OAuth Credentials
properties:
okta_client_id:
type: string
description: Client ID of the Okta API Services app used for OAuth 2.0 private-key JWT authentication.
okta_private_key:
type: string
description: PEM-encoded private key whose matching public key (JWK) is registered on the Okta service app.
okta_scopes:
type: array
items:
type: string
description: OAuth scopes to request. Optional; defaults to the minimum set required to run the currently enabled Okta checks.
required:
- okta_client_id
- okta_private_key
- type: object
title: Vercel API Token
properties:
@@ -22822,23 +22631,6 @@ components:
required:
- clouds_yaml_content
- clouds_yaml_cloud
- type: object
title: Okta OAuth Credentials
properties:
okta_client_id:
type: string
description: Client ID of the Okta API Services app used for OAuth 2.0 private-key JWT authentication.
okta_private_key:
type: string
description: PEM-encoded private key whose matching public key (JWK) is registered on the Okta service app.
okta_scopes:
type: array
items:
type: string
description: OAuth scopes to request. Optional; defaults to the minimum set required to run the currently enabled Okta checks.
required:
- okta_client_id
- okta_private_key
- type: object
title: Vercel API Token
properties:
@@ -23274,23 +23066,6 @@ components:
required:
- clouds_yaml_content
- clouds_yaml_cloud
- type: object
title: Okta OAuth Credentials
properties:
okta_client_id:
type: string
description: Client ID of the Okta API Services app used for OAuth 2.0 private-key JWT authentication.
okta_private_key:
type: string
description: PEM-encoded private key whose matching public key (JWK) is registered on the Okta service app.
okta_scopes:
type: array
items:
type: string
description: OAuth scopes to request. Optional; defaults to the minimum set required to run the currently enabled Okta checks.
required:
- okta_client_id
- okta_private_key
- type: object
title: Vercel API Token
properties:
+40 -51
View File
@@ -12,9 +12,7 @@ from api.compliance import (
load_prowler_checks,
)
from api.models import Provider
from prowler.lib.check.compliance_models import (
get_bulk_compliance_frameworks_universal,
)
from prowler.lib.check.compliance_models import Compliance
class TestCompliance:
@@ -30,16 +28,16 @@ class TestCompliance:
assert set(checks) == {"check1", "check2", "check3"}
mock_check_metadata.get_bulk.assert_called_once_with(provider_type)
@patch("api.compliance.get_bulk_compliance_frameworks_universal")
def test_get_prowler_provider_compliance(self, mock_get_bulk):
@patch("api.compliance.Compliance")
def test_get_prowler_provider_compliance(self, mock_compliance):
provider_type = Provider.ProviderChoices.AWS
mock_get_bulk.return_value = {
mock_compliance.get_bulk.return_value = {
"compliance1": MagicMock(),
"compliance2": MagicMock(),
}
compliance_data = get_prowler_provider_compliance(provider_type)
assert compliance_data == mock_get_bulk.return_value
mock_get_bulk.assert_called_once_with(provider_type)
assert compliance_data == mock_compliance.get_bulk.return_value
mock_compliance.get_bulk.assert_called_once_with(provider_type)
@patch("api.compliance.get_prowler_provider_checks")
@patch("api.models.Provider.ProviderChoices")
@@ -53,9 +51,9 @@ class TestCompliance:
prowler_compliance = {
"aws": {
"compliance1": MagicMock(
requirements=[
Requirements=[
MagicMock(
checks={"aws": ["check1", "check2"]},
Checks=["check1", "check2"],
),
],
),
@@ -169,38 +167,35 @@ class TestCompliance:
def test_generate_compliance_overview_template(self, mock_provider_choices):
mock_provider_choices.values = ["aws"]
# ``name`` is a reserved MagicMock kwarg (it labels the mock for repr,
# it does NOT set a ``.name`` attribute), so it must be assigned
# explicitly after construction.
requirement1 = MagicMock(
id="requirement1",
description="Description of requirement 1",
attributes=[],
checks={"aws": ["check1", "check2"]},
tactics=["tactic1"],
sub_techniques=["subtechnique1"],
platforms=["platform1"],
technique_url="https://example.com",
Id="requirement1",
Name="Requirement 1",
Description="Description of requirement 1",
Attributes=[],
Checks=["check1", "check2"],
Tactics=["tactic1"],
SubTechniques=["subtechnique1"],
Platforms=["platform1"],
TechniqueURL="https://example.com",
)
requirement1.name = "Requirement 1"
requirement2 = MagicMock(
id="requirement2",
description="Description of requirement 2",
attributes=[],
checks={"aws": []},
tactics=[],
sub_techniques=[],
platforms=[],
technique_url="",
Id="requirement2",
Name="Requirement 2",
Description="Description of requirement 2",
Attributes=[],
Checks=[],
Tactics=[],
SubTechniques=[],
Platforms=[],
TechniqueURL="",
)
requirement2.name = "Requirement 2"
compliance1 = MagicMock(
requirements=[requirement1, requirement2],
framework="Framework 1",
version="1.0",
description="Description of compliance1",
Requirements=[requirement1, requirement2],
Framework="Framework 1",
Version="1.0",
Description="Description of compliance1",
Name="Compliance 1",
)
compliance1.name = "Compliance 1"
prowler_compliance = {"aws": {"compliance1": compliance1}}
template = generate_compliance_overview_template(prowler_compliance)
@@ -276,28 +271,24 @@ def reset_compliance_cache():
class TestGetComplianceFrameworks:
def test_returns_keys_from_compliance_get_bulk(self, reset_compliance_cache):
with patch(
"api.compliance.get_bulk_compliance_frameworks_universal"
) as mock_get_bulk:
mock_get_bulk.return_value = {
with patch("api.compliance.Compliance") as mock_compliance:
mock_compliance.get_bulk.return_value = {
"cis_1.4_aws": MagicMock(),
"mitre_attack_aws": MagicMock(),
}
result = get_compliance_frameworks(Provider.ProviderChoices.AWS)
assert sorted(result) == ["cis_1.4_aws", "mitre_attack_aws"]
mock_get_bulk.assert_called_once_with(Provider.ProviderChoices.AWS)
mock_compliance.get_bulk.assert_called_once_with(Provider.ProviderChoices.AWS)
def test_caches_result_per_provider(self, reset_compliance_cache):
with patch(
"api.compliance.get_bulk_compliance_frameworks_universal"
) as mock_get_bulk:
mock_get_bulk.return_value = {"cis_1.4_aws": MagicMock()}
with patch("api.compliance.Compliance") as mock_compliance:
mock_compliance.get_bulk.return_value = {"cis_1.4_aws": MagicMock()}
get_compliance_frameworks(Provider.ProviderChoices.AWS)
get_compliance_frameworks(Provider.ProviderChoices.AWS)
# Cached after first call.
assert mock_get_bulk.call_count == 1
assert mock_compliance.get_bulk.call_count == 1
@pytest.mark.parametrize(
"provider_type",
@@ -305,19 +296,17 @@ class TestGetComplianceFrameworks:
)
def test_listing_is_subset_of_bulk(self, reset_compliance_cache, provider_type):
"""Regression for CLOUD-API-40S: every name returned by
``get_compliance_frameworks`` must be loadable via
``get_bulk_compliance_frameworks_universal``.
``get_compliance_frameworks`` must be loadable via ``Compliance.get_bulk``.
A divergence here is what produced ``KeyError: 'csa_ccm_4.0'`` in
``generate_outputs_task`` after universal/multi-provider compliance
JSONs were introduced at the top-level ``prowler/compliance/`` path.
"""
bulk_keys = set(get_bulk_compliance_frameworks_universal(provider_type).keys())
bulk_keys = set(Compliance.get_bulk(provider_type).keys())
listed = set(get_compliance_frameworks(provider_type))
missing = listed - bulk_keys
assert not missing, (
f"get_compliance_frameworks({provider_type!r}) returned names not "
f"loadable by get_bulk_compliance_frameworks_universal: "
f"{sorted(missing)}"
f"loadable by Compliance.get_bulk: {sorted(missing)}"
)
-31
View File
@@ -31,7 +31,6 @@ from prowler.providers.image.image_provider import ImageProvider
from prowler.providers.kubernetes.kubernetes_provider import KubernetesProvider
from prowler.providers.m365.m365_provider import M365Provider
from prowler.providers.mongodbatlas.mongodbatlas_provider import MongodbatlasProvider
from prowler.providers.okta.okta_provider import OktaProvider
from prowler.providers.openstack.openstack_provider import OpenstackProvider
from prowler.providers.oraclecloud.oraclecloud_provider import OraclecloudProvider
from prowler.providers.vercel.vercel_provider import VercelProvider
@@ -131,7 +130,6 @@ class TestReturnProwlerProvider:
(Provider.ProviderChoices.OPENSTACK.value, OpenstackProvider),
(Provider.ProviderChoices.IMAGE.value, ImageProvider),
(Provider.ProviderChoices.VERCEL.value, VercelProvider),
(Provider.ProviderChoices.OKTA.value, OktaProvider),
],
)
def test_return_prowler_provider(self, provider_type, expected_provider):
@@ -240,31 +238,6 @@ class TestProwlerProviderConnectionTest:
raise_on_exception=False,
)
@patch("api.utils.return_prowler_provider")
def test_prowler_provider_connection_test_okta_provider(
self, mock_return_prowler_provider
):
"""Test connection test for Okta provider passes org domain and provider_id."""
provider = MagicMock()
provider.uid = "acme.okta.com"
provider.provider = Provider.ProviderChoices.OKTA.value
provider.secret.secret = {
"okta_client_id": "0oa123456789abcdef",
"okta_private_key": "-----BEGIN PRIVATE KEY-----\ntest\n-----END PRIVATE KEY-----",
"okta_scopes": ["okta.policies.read"],
}
mock_return_prowler_provider.return_value = MagicMock()
prowler_provider_connection_test(provider)
mock_return_prowler_provider.return_value.test_connection.assert_called_once_with(
okta_client_id="0oa123456789abcdef",
okta_private_key="-----BEGIN PRIVATE KEY-----\ntest\n-----END PRIVATE KEY-----",
okta_scopes=["okta.policies.read"],
okta_org_domain="acme.okta.com",
provider_id="acme.okta.com",
raise_on_exception=False,
)
@patch("api.utils.return_prowler_provider")
def test_prowler_provider_connection_test_image_provider_no_creds(
self, mock_return_prowler_provider
@@ -335,10 +308,6 @@ class TestGetProwlerProviderKwargs:
Provider.ProviderChoices.VERCEL.value,
{"team_id": "provider_uid"},
),
(
Provider.ProviderChoices.OKTA.value,
{"okta_org_domain": "provider_uid"},
),
],
)
def test_get_prowler_provider_kwargs(self, provider_type, expected_extra_kwargs):
+22 -392
View File
@@ -24,11 +24,9 @@ from conftest import (
today_after_n_days,
)
from django.conf import settings
from django.db import connection
from django.db.models import Count
from django.http import JsonResponse
from django.test import RequestFactory
from django.test.utils import CaptureQueriesContext
from django.urls import reverse
from django_celery_results.models import TaskResult
from rest_framework import status
@@ -66,7 +64,6 @@ from api.models import (
ProviderSecret,
Resource,
ResourceFindingMapping,
ResourceTag,
Role,
RoleProviderGroupRelationship,
SAMLConfiguration,
@@ -1628,21 +1625,6 @@ class TestProviderViewSet:
"uid": "C12",
"alias": "Google Workspace Minimum Length",
},
{
"provider": "okta",
"uid": "acme.okta.com",
"alias": "Okta Org",
},
{
"provider": "okta",
"uid": "agency.okta-gov.com",
"alias": "Okta Gov Org",
},
{
"provider": "okta",
"uid": "agency.okta.mil",
"alias": "Okta Mil Org",
},
]
),
)
@@ -2161,24 +2143,6 @@ class TestProviderViewSet:
"googleworkspace-uid",
"uid",
),
(
{
"provider": "okta",
"uid": "https://acme.okta.com",
"alias": "test",
},
"okta-uid",
"uid",
),
(
{
"provider": "okta",
"uid": "acme.example.com",
"alias": "test",
},
"okta-uid",
"uid",
),
]
),
)
@@ -2199,25 +2163,6 @@ class TestProviderViewSet:
== f"/data/attributes/{error_pointer}"
)
@pytest.mark.parametrize(
"input_uid,stored_uid",
[
("Acme.okta.com", "acme.okta.com"),
(" ACME.OKTA.COM ", "acme.okta.com"),
("Agency.Okta-Gov.com", "agency.okta-gov.com"),
],
)
def test_providers_create_okta_uid_normalized(
self, authenticated_client, input_uid, stored_uid
):
response = authenticated_client.post(
reverse("provider-list"),
data={"provider": "okta", "uid": input_uid, "alias": "Okta"},
format="json",
)
assert response.status_code == status.HTTP_201_CREATED
assert Provider.objects.get().uid == stored_uid
def test_providers_partial_update(self, authenticated_client, providers_fixture):
provider1, *_ = providers_fixture
new_alias = "This is the new name"
@@ -2375,17 +2320,17 @@ class TestProviderViewSet:
),
("alias", "aws_testing_1", 1),
("alias.icontains", "aws", 2),
("inserted_at", TODAY, 14),
("inserted_at", TODAY, 13),
(
"inserted_at.gte",
"2024-01-01",
14,
13,
),
("inserted_at.lte", "2024-01-01", 0),
(
"updated_at.gte",
"2024-01-01",
14,
13,
),
("updated_at.lte", "2024-01-01", 0),
]
@@ -3018,19 +2963,6 @@ class TestProviderSecretViewSet:
"api_token": "fake-vercel-api-token-for-testing",
},
),
# Okta with inline private key credentials
(
Provider.ProviderChoices.OKTA.value,
ProviderSecret.TypeChoices.STATIC,
{
"okta_client_id": "0oa123456789abcdef",
"okta_private_key": "-----BEGIN PRIVATE KEY-----\ntest\n-----END PRIVATE KEY-----",
"okta_scopes": [
"okta.policies.read",
"okta.groups.read",
],
},
),
],
)
def test_provider_secrets_create_valid(
@@ -3143,46 +3075,6 @@ class TestProviderSecretViewSet:
== f"/data/attributes/{error_pointer}"
)
def test_provider_secrets_invalid_create_okta_missing_private_key(
self,
providers_fixture,
authenticated_client,
):
okta_provider = next(
provider
for provider in providers_fixture
if provider.provider == Provider.ProviderChoices.OKTA.value
)
data = {
"data": {
"type": "provider-secrets",
"attributes": {
"name": "Okta Secret",
"secret_type": ProviderSecret.TypeChoices.STATIC,
"secret": {
"okta_client_id": "0oa123456789abcdef",
},
},
"relationships": {
"provider": {
"data": {"type": "providers", "id": str(okta_provider.id)}
}
},
}
}
response = authenticated_client.post(
reverse("providersecret-list"),
data=json.dumps(data),
content_type="application/vnd.api+json",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json()["errors"][0]["code"] == "required"
assert response.json()["errors"][0]["source"]["pointer"] == (
"/data/attributes/secret/okta_private_key"
)
def test_provider_secrets_partial_update(
self, authenticated_client, provider_secret_fixture
):
@@ -3859,20 +3751,16 @@ class TestScanViewSet:
scan.output_location = "dummy"
scan.save()
task_result = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan-report",
task_kwargs={"scan_id": str(scan.id)},
)
task = Task.objects.create(
tenant_id=scan.tenant_id,
task_runner_task=task_result,
)
dummy_task_data = {"id": str(task.id), "state": StateChoices.EXECUTING}
dummy_task = Task.objects.create(tenant_id=scan.tenant_id)
dummy_task.id = "dummy-task-id"
dummy_task_data = {"id": dummy_task.id, "state": StateChoices.EXECUTING}
with patch(
"api.v1.views.TaskSerializer",
return_value=type("DummySerializer", (), {"data": dummy_task_data}),
with (
patch("api.v1.views.Task.objects.get", return_value=dummy_task),
patch(
"api.v1.views.TaskSerializer",
return_value=type("DummySerializer", (), {"data": dummy_task_data}),
),
):
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
@@ -4193,88 +4081,6 @@ class TestScanViewSet:
assert resp.status_code == status.HTTP_302_FOUND
assert resp["Location"] == presigned_url
def test_compliance_s3_returns_latest_match(
self, authenticated_client, scans_fixture, monkeypatch
):
"""When several files match, the most recently modified one is served."""
scan = scans_fixture[0]
bucket = "bucket"
scan.output_location = f"s3://{bucket}/path/scan.zip"
scan.state = StateChoices.COMPLETED
scan.save()
monkeypatch.setattr(
"api.v1.views.env",
type("env", (), {"str": lambda self, *args, **kwargs: "test-bucket"})(),
)
old_key = "path/compliance/prowler-output-aws-20240101000000_cis_1.4_aws.csv"
latest_key = "path/compliance/prowler-output-aws-20240202000000_cis_1.4_aws.csv"
class FakeS3Client:
def list_objects_v2(self, Bucket, Prefix):
return {
"Contents": [
{
"Key": old_key,
"LastModified": datetime(2024, 1, 1, tzinfo=timezone.utc),
},
{
"Key": latest_key,
"LastModified": datetime(2024, 2, 2, tzinfo=timezone.utc),
},
]
}
def generate_presigned_url(self, ClientMethod, Params, ExpiresIn):
assert Params["Key"] == latest_key
return "https://test-bucket.s3.amazonaws.com/latest"
monkeypatch.setattr("api.v1.views.get_s3_client", lambda: FakeS3Client())
url = reverse("scan-compliance", kwargs={"pk": scan.id, "name": "cis_1.4_aws"})
resp = authenticated_client.get(url)
assert resp.status_code == status.HTTP_302_FOUND
assert resp["Location"].endswith("/latest")
def test_compliance_local_returns_latest_match(
self, authenticated_client, scans_fixture, monkeypatch
):
"""The local branch serves the most recently modified matching file."""
scan = scans_fixture[0]
scan.state = StateChoices.COMPLETED
with tempfile.TemporaryDirectory() as tmp:
comp_dir = Path(tmp) / "reports" / "compliance"
comp_dir.mkdir(parents=True, exist_ok=True)
old_file = comp_dir / "prowler-output-aws-20240101000000_cis_1.4_aws.csv"
old_file.write_bytes(b"old")
latest_file = comp_dir / "prowler-output-aws-20240202000000_cis_1.4_aws.csv"
latest_file.write_bytes(b"latest")
# Make `latest_file` newer regardless of creation order.
os.utime(old_file, (1_700_000_000, 1_700_000_000))
os.utime(latest_file, (1_700_000_100, 1_700_000_100))
scan.output_location = str(Path(tmp) / "reports" / "scan.zip")
scan.save()
monkeypatch.setattr(
glob,
"glob",
lambda p: [str(old_file), str(latest_file)],
)
url = reverse(
"scan-compliance", kwargs={"pk": scan.id, "name": "cis_1.4_aws"}
)
resp = authenticated_client.get(url)
assert resp.status_code == status.HTTP_200_OK
assert resp.content == b"latest"
assert resp["Content-Disposition"].endswith(
f'filename="{latest_file.name}"'
)
def test_compliance_s3_not_found(
self, authenticated_client, scans_fixture, monkeypatch
):
@@ -4383,24 +4189,18 @@ class TestScanViewSet:
assert cd.startswith('attachment; filename="')
assert cd.endswith(f'filename="{fname.name}"')
@patch("api.v1.views.Task.objects.get")
@patch("api.v1.views.TaskSerializer")
def test__get_task_status_returns_none_if_task_not_executing(
self, mock_task_serializer, authenticated_client, scans_fixture
self, mock_task_serializer, mock_task_get, authenticated_client, scans_fixture
):
scan = scans_fixture[0]
scan.state = StateChoices.COMPLETED
scan.output_location = "dummy"
scan.save()
task_result = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan-report",
task_kwargs={"scan_id": str(scan.id)},
)
task = Task.objects.create(
tenant_id=scan.tenant_id,
task_runner_task=task_result,
)
task = Task.objects.create(tenant_id=scan.tenant_id)
mock_task_get.return_value = task
mock_task_serializer.return_value.data = {
"id": str(task.id),
"state": StateChoices.COMPLETED,
@@ -4421,7 +4221,6 @@ class TestScanViewSet:
scan.save()
task_result = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan-report",
task_kwargs={"scan_id": str(scan.id)},
)
@@ -4442,51 +4241,6 @@ class TestScanViewSet:
assert response.status_code == status.HTTP_202_ACCEPTED
assert response.data["id"] == str(task.id)
@patch("api.v1.views.TaskSerializer")
def test__get_task_status_returns_latest_task(
self, mock_task_serializer, authenticated_client, scans_fixture
):
"""With several scan-report tasks for the scan, the most recent is used."""
scan = scans_fixture[0]
scan.state = StateChoices.COMPLETED
scan.output_location = "dummy"
scan.save()
old_task = Task.objects.create(
tenant_id=scan.tenant_id,
task_runner_task=TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan-report",
task_kwargs={"scan_id": str(scan.id)},
),
)
new_task = Task.objects.create(
tenant_id=scan.tenant_id,
task_runner_task=TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan-report",
task_kwargs={"scan_id": str(scan.id)},
),
)
# `inserted_at` is `auto_now_add`, and within the test transaction the DB
# `now()` is constant, so force distinct timestamps to make order_by stable.
base = datetime(2024, 1, 1, tzinfo=timezone.utc)
Task.objects.filter(pk=old_task.pk).update(inserted_at=base)
Task.objects.filter(pk=new_task.pk).update(
inserted_at=base + timedelta(hours=1)
)
mock_task_serializer.side_effect = lambda instance, *a, **k: SimpleNamespace(
data={"id": str(instance.id), "state": StateChoices.EXECUTING}
)
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_202_ACCEPTED
assert str(new_task.id) in response["Content-Location"]
assert str(old_task.id) not in response["Content-Location"]
@patch("api.v1.views.get_s3_client")
@patch("api.v1.views.sentry_sdk.capture_exception")
def test_compliance_list_objects_client_error(
@@ -7057,80 +6811,6 @@ class TestFindingViewSet:
== findings_fixture[0].status
)
def test_findings_list_resource_tags_no_n_plus_one(
self, authenticated_client, findings_fixture
):
"""Listing findings must load every resource's tags in a constant
number of queries, no matter how many findings/resources are returned.
This guards ``FindingViewSet._optimize_tags_loading`` against
regressions that would reintroduce one extra query per resource (the
N+1 the prefetch was added to remove).
"""
scan = findings_fixture[0].scan
tenant_id = findings_fixture[0].tenant_id
provider = scan.provider
def _create_finding_with_tagged_resource(index):
resource = Resource.objects.create(
tenant_id=tenant_id,
provider=provider,
uid=f"arn:aws:ec2:us-east-1:123456789012:instance/n-plus-one-{index}",
name=f"N+1 Instance {index}",
region="us-east-1",
service="ec2",
type="prowler-test",
)
resource.upsert_or_delete_tags(
[
ResourceTag.objects.create(
tenant_id=tenant_id,
key=f"key-{index}",
value=f"value-{index}",
)
]
)
finding = Finding.objects.create(
tenant_id=tenant_id,
uid=f"n_plus_one_finding_{index}",
scan=scan,
status=Status.FAIL,
status_extended="n+1 status",
impact=Severity.medium,
severity=Severity.medium,
check_id="test_check_id",
check_metadata={"CheckId": "test_check_id", "servicename": "ec2"},
first_seen_at="2024-01-02T00:00:00Z",
)
finding.add_resources([resource])
return finding
params = {"filter[inserted_at]": TODAY, "include": "resources"}
# Baseline: the two findings provided by the fixture.
with CaptureQueriesContext(connection) as baseline:
response = authenticated_client.get(reverse("finding-list"), params)
assert response.status_code == status.HTTP_200_OK
# Add more findings, each with its own resource carrying tags.
extra_findings = 5
for index in range(extra_findings):
_create_finding_with_tagged_resource(index)
with CaptureQueriesContext(connection) as scaled:
response = authenticated_client.get(reverse("finding-list"), params)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == len(findings_fixture) + extra_findings
# The query count must not grow with the number of findings/resources.
assert len(scaled.captured_queries) == len(baseline.captured_queries), (
"Resource tags are not being prefetched: "
f"{len(baseline.captured_queries)} queries for {len(findings_fixture)} "
f"findings vs {len(scaled.captured_queries)} for "
f"{len(findings_fixture) + extra_findings}. Likely an N+1 regression "
"in FindingViewSet._optimize_tags_loading."
)
@pytest.mark.parametrize(
"include_values, expected_resources",
[
@@ -7373,32 +7053,6 @@ class TestFindingViewSet:
"id"
] == str(finding_1.resources.first().id)
def test_findings_retrieve_include_resource_metadata(
self, authenticated_client, findings_fixture
):
finding_1, *_ = findings_fixture
resource = finding_1.resources.first()
resource.metadata = '{"VulnerabilityID": "CVE-2026-0001"}'
resource.details = "Python 3.12 base image"
resource.save()
response = authenticated_client.get(
reverse("finding-detail", kwargs={"pk": finding_1.id}),
{"include": "resources"},
)
assert response.status_code == status.HTTP_200_OK
included_resource = next(
item
for item in response.json()["included"]
if item["type"] == "resources" and item["id"] == str(resource.id)
)
assert (
included_resource["attributes"]["metadata"]
== '{"VulnerabilityID": "CVE-2026-0001"}'
)
assert included_resource["attributes"]["details"] == "Python 3.12 base image"
def test_findings_invalid_retrieve(self, authenticated_client):
response = authenticated_client.get(
reverse("finding-detail", kwargs={"pk": "random_id"}),
@@ -9560,16 +9214,6 @@ class TestComplianceOverviewViewSet:
assert "platforms" in attributes["attributes"]["technique_details"]
assert "technique_url" in attributes["attributes"]["technique_details"]
# Guard against the `_raw_attributes` wrapper leaking through —
# the UI reads metadata[i].Category / .AWSService directly.
metadata = attributes["attributes"]["metadata"]
assert isinstance(metadata, list) and len(metadata) > 0
first_attr = metadata[0]
assert isinstance(first_attr, dict)
assert "_raw_attributes" not in first_attr
assert "Category" in first_attr
assert "AWSService" in first_attr
def test_compliance_overview_attributes_missing_compliance_id(
self, authenticated_client
):
@@ -16146,12 +15790,6 @@ class TestFindingGroupViewSet:
assert attrs["fail_count"] == 0
assert attrs["resources_total"] == 1
assert attrs["resources_fail"] == 0
# check_title / check_description are resolved post-pagination from the
# summary table, not from the finding's check_metadata.
assert attrs["check_title"] == "Ensure EC2 instances do not have public IPs"
assert (
attrs["check_description"] == "EC2 instances should use private IPs only."
)
def test_finding_groups_status_pass_when_no_fail(
self, authenticated_client, finding_groups_fixture
@@ -17393,12 +17031,6 @@ class TestFindingGroupViewSet:
assert attrs["fail_count"] == 0
assert attrs["resources_total"] == 1
assert attrs["resources_fail"] == 0
# check_title / check_description are resolved post-pagination from the
# summary table, not from the finding's check_metadata.
assert attrs["check_title"] == "Ensure EC2 instances do not have public IPs"
assert (
attrs["check_description"] == "EC2 instances should use private IPs only."
)
def test_finding_groups_latest_status_in_filter(
self, authenticated_client, finding_groups_fixture
@@ -17656,20 +17288,18 @@ class TestFindingGroupViewSet:
check_ids = [item["id"] for item in data]
assert check_ids == sorted(check_ids)
def test_finding_groups_latest_sort_by_check_title_not_supported(
def test_finding_groups_latest_sort_by_check_title(
self, authenticated_client, finding_groups_fixture
):
"""check_title is not a sortable field for finding groups.
Titles live in the TOASTed check_metadata blob and are resolved after
pagination from the summary table, so they cannot drive DB-level
ordering. Requesting that sort is rejected.
"""
"""Test /latest supports sorting by check_title."""
response = authenticated_client.get(
reverse("finding-group-latest"),
{"sort": "check_title"},
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
check_titles = [item["attributes"]["check_title"] for item in data]
assert check_titles == sorted(check_titles)
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
-20
View File
@@ -37,7 +37,6 @@ if TYPE_CHECKING:
from prowler.providers.mongodbatlas.mongodbatlas_provider import (
MongodbatlasProvider,
)
from prowler.providers.okta.okta_provider import OktaProvider
from prowler.providers.openstack.openstack_provider import OpenstackProvider
from prowler.providers.oraclecloud.oraclecloud_provider import OraclecloudProvider
from prowler.providers.vercel.vercel_provider import VercelProvider
@@ -94,7 +93,6 @@ def return_prowler_provider(
| KubernetesProvider
| M365Provider
| MongodbatlasProvider
| OktaProvider
| OpenstackProvider
| OraclecloudProvider
| VercelProvider
@@ -183,10 +181,6 @@ def return_prowler_provider(
from prowler.providers.vercel.vercel_provider import VercelProvider
prowler_provider = VercelProvider
case Provider.ProviderChoices.OKTA.value:
from prowler.providers.okta.okta_provider import OktaProvider
prowler_provider = OktaProvider
case _:
raise ValueError(f"Provider type {provider.provider} not supported")
return prowler_provider
@@ -252,11 +246,6 @@ def get_prowler_provider_kwargs(
**prowler_provider_kwargs,
"team_id": provider.uid,
}
elif provider.provider == Provider.ProviderChoices.OKTA.value:
prowler_provider_kwargs = {
**prowler_provider_kwargs,
"okta_org_domain": provider.uid,
}
elif provider.provider == Provider.ProviderChoices.IMAGE.value:
# Detect whether uid is a registry URL (e.g. "docker.io/andoniaf") or
# a concrete image reference (e.g. "docker.io/andoniaf/myimage:latest").
@@ -301,7 +290,6 @@ def initialize_prowler_provider(
| KubernetesProvider
| M365Provider
| MongodbatlasProvider
| OktaProvider
| OpenstackProvider
| OraclecloudProvider
| VercelProvider
@@ -363,14 +351,6 @@ def prowler_provider_connection_test(provider: Provider) -> Connection:
"raise_on_exception": False,
}
return prowler_provider.test_connection(**vercel_kwargs)
elif provider.provider == Provider.ProviderChoices.OKTA.value:
okta_kwargs = {
**prowler_provider_kwargs,
"okta_org_domain": provider.uid,
"provider_id": provider.uid,
"raise_on_exception": False,
}
return prowler_provider.test_connection(**okta_kwargs)
elif provider.provider == Provider.ProviderChoices.IMAGE.value:
image_kwargs = {
"image": provider.uid,
@@ -404,26 +404,6 @@ from rest_framework_json_api import serializers
},
"required": ["clouds_yaml_content", "clouds_yaml_cloud"],
},
{
"type": "object",
"title": "Okta OAuth Credentials",
"properties": {
"okta_client_id": {
"type": "string",
"description": "Client ID of the Okta API Services app used for OAuth 2.0 private-key JWT authentication.",
},
"okta_private_key": {
"type": "string",
"description": "PEM-encoded private key whose matching public key (JWK) is registered on the Okta service app.",
},
"okta_scopes": {
"type": "array",
"items": {"type": "string"},
"description": "OAuth scopes to request. Optional; defaults to the minimum set required to run the currently enabled Okta checks.",
},
},
"required": ["okta_client_id", "okta_private_key"],
},
{
"type": "object",
"title": "Vercel API Token",
-13
View File
@@ -1397,7 +1397,6 @@ class ResourceIncludeSerializer(RLSSerializer):
"service",
"type_",
"tags",
"metadata",
"details",
"partition",
]
@@ -1405,7 +1404,6 @@ class ResourceIncludeSerializer(RLSSerializer):
"id": {"read_only": True},
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
"metadata": {"read_only": True},
"details": {"read_only": True},
"partition": {"read_only": True},
}
@@ -1545,8 +1543,6 @@ class BaseWriteProviderSecretSerializer(BaseWriteSerializer):
serializer = GCPProviderSecret(data=secret)
elif provider_type == Provider.ProviderChoices.GOOGLEWORKSPACE.value:
serializer = GoogleWorkspaceProviderSecret(data=secret)
elif provider_type == Provider.ProviderChoices.OKTA.value:
serializer = OktaProviderSecret(data=secret)
elif provider_type == Provider.ProviderChoices.GITHUB.value:
serializer = GithubProviderSecret(data=secret)
elif provider_type == Provider.ProviderChoices.IAC.value:
@@ -1692,15 +1688,6 @@ class GoogleWorkspaceProviderSecret(serializers.Serializer):
resource_name = "provider-secrets"
class OktaProviderSecret(serializers.Serializer):
okta_client_id = serializers.CharField()
okta_private_key = serializers.CharField()
okta_scopes = serializers.ListField(child=serializers.CharField(), required=False)
class Meta:
resource_name = "provider-secrets"
class MongoDBAtlasProviderSecret(serializers.Serializer):
atlas_public_key = serializers.CharField()
atlas_private_key = serializers.CharField()
+69 -183
View File
@@ -116,7 +116,6 @@ from api.base_views import BaseRLSViewSet, BaseTenantViewset, BaseUserViewset
from api.compliance import (
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE,
get_compliance_frameworks,
get_prowler_provider_compliance,
)
from api.constants import SEVERITY_ORDER
from api.db_router import MainRouter
@@ -1850,42 +1849,7 @@ class ProviderViewSet(DisablePaginationMixin, BaseRLSViewSet):
200: OpenApiResponse(
description="CSV file containing the compliance report"
),
202: OpenApiResponse(description="The task is in progress"),
403: OpenApiResponse(description="There is a problem with credentials"),
404: OpenApiResponse(
description="Compliance report not found, or the scan has no reports yet"
),
},
request=None,
),
compliance_ocsf=extend_schema(
tags=["Scan"],
summary="Retrieve compliance report as OCSF JSON",
description=(
"Download a specific compliance report as an OCSF JSON file. "
"Only universal frameworks that declare an output configuration "
"produce this artifact (currently 'dora' and 'csa_ccm_4.0'); any "
"other framework returns 404."
),
parameters=[
OpenApiParameter(
name="name",
type=str,
location=OpenApiParameter.PATH,
required=True,
description="The compliance report name, like 'dora'",
),
],
responses={
200: OpenApiResponse(
description="OCSF JSON file containing the compliance report"
),
202: OpenApiResponse(description="The task is in progress"),
403: OpenApiResponse(description="There is a problem with credentials"),
404: OpenApiResponse(
description="Compliance report not found, the framework does "
"not provide an OCSF export, or the scan has no reports yet"
),
404: OpenApiResponse(description="Compliance report not found"),
},
request=None,
),
@@ -2028,23 +1992,35 @@ class ScanViewSet(BaseRLSViewSet):
return queryset.select_related("provider", "task")
def get_serializer_class(self):
if self.action == "partial_update":
return ScanUpdateSerializer
action_defaults = {
"create": ScanCreateSerializer,
"report": ScanReportSerializer,
"compliance": ScanComplianceReportSerializer,
"compliance_ocsf": ScanComplianceReportSerializer,
}
response_only_actions = {"threatscore", "ens", "nis2", "csa", "cis"}
if self.action in action_defaults or self.action in response_only_actions:
if self.action == "create":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
return ScanCreateSerializer
elif self.action == "partial_update":
return ScanUpdateSerializer
elif self.action == "report":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
return ScanReportSerializer
elif self.action == "compliance":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
return ScanComplianceReportSerializer
elif self.action == "threatscore":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
elif self.action == "ens":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
elif self.action == "nis2":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
elif self.action == "csa":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
elif self.action == "cis":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
if self.action in action_defaults:
return action_defaults[self.action]
return super().get_serializer_class()
def partial_update(self, request, *args, **kwargs):
@@ -2083,17 +2059,12 @@ class ScanViewSet(BaseRLSViewSet):
if scan_instance.state == StateChoices.EXECUTING and scan_instance.task:
task = scan_instance.task
else:
# A scan can have several `scan-report` tasks (e.g. re-runs); take the
# most recent one. `.first()` also avoids `MultipleObjectsReturned`.
task = (
Task.objects.filter(
try:
task = Task.objects.get(
task_runner_task__task_name="scan-report",
task_runner_task__task_kwargs__contains=str(scan_instance.id),
)
.order_by("-inserted_at")
.first()
)
if task is None:
except Task.DoesNotExist:
return None
self.response_serializer_class = TaskSerializer
@@ -2168,32 +2139,27 @@ class ScanViewSet(BaseRLSViewSet):
status=status.HTTP_502_BAD_GATEWAY,
)
contents = resp.get("Contents", [])
matches = []
keys = []
for obj in contents:
key = obj["Key"]
key_basename = os.path.basename(key)
if any(ch in suffix for ch in ("*", "?", "[")):
if fnmatch.fnmatch(key_basename, suffix):
matches.append(obj)
keys.append(key)
elif key_basename == suffix:
matches.append(obj)
keys.append(key)
elif key.endswith(suffix):
# Backward compatibility if suffix already includes directories
matches.append(obj)
if not matches:
keys.append(key)
if not keys:
return Response(
{
"detail": f"No compliance file found for name '{os.path.splitext(suffix)[0]}'."
},
status=status.HTTP_404_NOT_FOUND,
)
# Return the most recently modified match (latest report) when
# several files share the prefix/suffix. `list_objects_v2` always
# returns `LastModified`; the fallback keeps ordering deterministic
# if it is ever absent.
key = max(matches, key=lambda o: (o.get("LastModified", ""), o["Key"]))[
"Key"
]
# path_pattern here is prefix, but in compliance we build correct suffix check before
key = keys[0]
else:
# path_pattern is exact key; HEAD before presigning to preserve the 404 contract.
key = path_pattern
@@ -2243,9 +2209,7 @@ class ScanViewSet(BaseRLSViewSet):
},
status=status.HTTP_404_NOT_FOUND,
)
# Return the most recently modified match (latest report) when the
# pattern resolves to several files.
filepath = max(files, key=os.path.getmtime)
filepath = files[0]
with open(filepath, "rb") as f:
content = f.read()
filename = os.path.basename(filepath)
@@ -2293,16 +2257,20 @@ class ScanViewSet(BaseRLSViewSet):
content, filename = loader
return self._serve_file(content, filename, "application/x-zip-compressed")
def _serve_compliance_artifact(self, scan, name, file_extension, content_type):
"""Resolve and serve a per-framework compliance artifact from disk/S3.
@action(
detail=True,
methods=["get"],
url_path="compliance/(?P<name>[^/]+)",
url_name="compliance",
)
def compliance(self, request, pk=None, name=None):
scan = self.get_object()
if name not in get_compliance_frameworks(scan.provider.provider):
return Response(
{"detail": f"Compliance '{name}' not found."},
status=status.HTTP_404_NOT_FOUND,
)
Shared by the CSV and OCSF compliance download actions. Both are
path-based (no query params) on purpose: ``get_object`` runs
``filter_queryset``, which triggers JSON:API's
``QueryParameterValidationFilter`` and 400s on any non-JSON:API
query param, so a ``?format=`` / ``?type=`` selector is not viable
here the format is encoded in the route instead.
"""
running_resp = self._get_task_status(scan)
if running_resp:
return running_resp
@@ -2319,66 +2287,25 @@ class ScanViewSet(BaseRLSViewSet):
bucket = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
key_prefix = scan.output_location.removeprefix(f"s3://{bucket}/")
prefix = os.path.join(
os.path.dirname(key_prefix), "compliance", f"{name}.{file_extension}"
os.path.dirname(key_prefix), "compliance", f"{name}.csv"
)
loader = self._load_file(
prefix,
s3=True,
bucket=bucket,
list_objects=True,
content_type=content_type,
content_type="text/csv",
)
else:
base = os.path.dirname(scan.output_location)
pattern = os.path.join(base, "compliance", f"*_{name}.{file_extension}")
pattern = os.path.join(base, "compliance", f"*_{name}.csv")
loader = self._load_file(pattern, s3=False)
if isinstance(loader, HttpResponseBase):
return loader
content, filename = loader
return self._serve_file(content, filename, content_type)
@action(
detail=True,
methods=["get"],
url_path="compliance/(?P<name>[^/]+)",
url_name="compliance",
)
def compliance(self, request, pk=None, name=None):
scan = self.get_object()
if name not in get_compliance_frameworks(scan.provider.provider):
return Response(
{"detail": f"Compliance '{name}' not found."},
status=status.HTTP_404_NOT_FOUND,
)
return self._serve_compliance_artifact(scan, name, "csv", "text/csv")
@action(
detail=True,
methods=["get"],
url_path="compliance/(?P<name>[^/]+)/ocsf",
url_name="compliance-ocsf",
)
def compliance_ocsf(self, request, pk=None, name=None):
scan = self.get_object()
if name not in get_compliance_frameworks(scan.provider.provider):
return Response(
{"detail": f"Compliance '{name}' not found."},
status=status.HTTP_404_NOT_FOUND,
)
universal_bulk = get_prowler_provider_compliance(scan.provider.provider)
framework_obj = universal_bulk.get(name)
if not (framework_obj and getattr(framework_obj, "outputs", None)):
return Response(
{"detail": f"Compliance '{name}' does not provide an OCSF export."},
status=status.HTTP_404_NOT_FOUND,
)
return self._serve_compliance_artifact(
scan, name, "ocsf.json", "application/json"
)
return self._serve_file(content, filename, "text/csv")
@action(
detail=True,
@@ -3822,16 +3749,6 @@ class FindingViewSet(PaginateByPkMixin, BaseRLSViewSet):
return queryset
return super().filter_queryset(queryset)
def _optimize_tags_loading(self, queryset):
"""Prefetch resource tags to avoid N+1 queries when serializing findings"""
return queryset.prefetch_related(
Prefetch(
"resources__tags",
queryset=ResourceTag.objects.filter(tenant_id=self.request.tenant_id),
to_attr="prefetched_tags",
)
)
def list(self, request, *args, **kwargs):
filtered_queryset = self.filter_queryset(self.get_queryset())
return self.paginate_by_pk(
@@ -7452,15 +7369,6 @@ class FindingGroupViewSet(BaseRLSViewSet):
output_field=IntegerField(),
)
# `check_title` / `check_description` are intentionally NOT resolved
# here. They live in the large JSONB `check_metadata` blob (TOASTed),
# so reading them per finding row is very expensive, and pulling them
# in via a correlated subquery makes Django add the subquery to GROUP
# BY, which re-evaluates it once per input row. They are identical for
# every finding of a `check_id`, so `_post_process_aggregation` fills
# them from the summary table's plain columns in a single batched
# lookup scoped to the paginated page.
# `pass_count`, `fail_count` and `manual_count` only count non-muted
# findings. Muted findings are tracked separately via the
# `*_muted_count` fields.
@@ -7531,6 +7439,15 @@ class FindingGroupViewSet(BaseRLSViewSet):
agg_failing_since=Min(
"first_seen_at", filter=Q(status="FAIL", muted=False)
),
check_title=Coalesce(
Max(KeyTextTransform("checktitle", "check_metadata")),
Max(KeyTextTransform("CheckTitle", "check_metadata")),
Max(KeyTextTransform("Checktitle", "check_metadata")),
),
check_description=Coalesce(
Max(KeyTextTransform("description", "check_metadata")),
Max(KeyTextTransform("Description", "check_metadata")),
),
)
.annotate(
# Group is muted only if it has zero non-muted findings.
@@ -7567,17 +7484,14 @@ class FindingGroupViewSet(BaseRLSViewSet):
def _get_latest_findings_per_provider(self, filtered_queryset):
"""Keep only findings from each provider's most recent completed scan."""
# Materialize to a literal IN list. Left as a subquery, Postgres can't
# estimate the match count and picks a serial nested loop on
# resource_finding_mappings when one scan dominates findings
latest_scan_ids = list(
latest_scan_ids = (
Scan.objects.filter(
tenant_id=self.request.tenant_id,
state=StateChoices.COMPLETED,
)
.order_by("provider_id", "-completed_at", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
.values("id")
)
return filtered_queryset.filter(scan_id__in=latest_scan_ids)
@@ -7589,38 +7503,9 @@ class FindingGroupViewSet(BaseRLSViewSet):
- Computes aggregated status (FAIL > PASS > MANUAL); the orthogonal
``muted`` boolean is already on the row from the SQL aggregation
- Converts provider string to list
- Fills check_title / check_description for the findings path
"""
rows = list(aggregated_data)
# The findings-aggregation path omits check_title / check_description
# (they sit in TOASTed JSONB; see _aggregate_findings). Fill them from
# the summary table's plain columns in one query scoped to this page.
# The summary-aggregation path already carries them, so skip it there.
if rows and "check_title" not in rows[0]:
check_ids = [row["check_id"] for row in rows]
role = get_role(self.request.user, self.request.tenant_id)
summaries = FindingGroupDailySummary.objects.filter(
tenant_id=self.request.tenant_id,
check_id__in=check_ids,
)
# Scope to the user's providers, mirroring get_queryset(), so titles
# are read only from providers the user can see.
if not role.unlimited_visibility:
summaries = summaries.filter(provider__in=get_providers(role))
metadata_by_check = {
item["check_id"]: item
for item in summaries.order_by("check_id", "-inserted_at")
.distinct("check_id")
.values("check_id", "check_title", "check_description")
}
for row in rows:
metadata = metadata_by_check.get(row["check_id"], {})
row["check_title"] = metadata.get("check_title")
row["check_description"] = metadata.get("check_description")
results = []
for row in rows:
for row in aggregated_data:
# Convert severity order back to string
severity_order = row.get("severity_order", 1)
row["severity"] = SEVERITY_ORDER_REVERSE.get(
@@ -7666,6 +7551,7 @@ class FindingGroupViewSet(BaseRLSViewSet):
_FINDING_GROUP_SORT_MAP = {
"check_id": "check_id",
"check_title": "check_title",
"severity": "severity_order",
"status": "status_order",
"muted": "muted",
-55
View File
@@ -26,61 +26,6 @@ celery_app.conf.result_backend_transport_options = {
}
celery_app.conf.visibility_timeout = BROKER_VISIBILITY_TIMEOUT
# Durable delivery: keep the message until the task finishes, so a worker killed
# mid-task (deploy/OOM/eviction) does not silently drop it. Reserve one task at a
# time so a crash exposes at most one extra reserved message.
celery_app.conf.task_acks_late = True
celery_app.conf.task_reject_on_worker_lost = True
celery_app.conf.worker_prefetch_multiplier = env.int(
"DJANGO_CELERY_WORKER_PREFETCH_MULTIPLIER", default=1
)
# On SIGTERM, give the worker time to finish or re-queue in-flight tasks before
# it is forcefully killed (Celery 5.5+ soft shutdown).
celery_app.conf.worker_soft_shutdown_timeout = env.int(
"DJANGO_CELERY_WORKER_SOFT_SHUTDOWN_TIMEOUT", default=60
)
# Bound execution so a blocked task cannot pin a worker forever. Connection
# checks get a tight limit; scans and provider/tenant deletions can legitimately
# run for more than a day on large tenants, so they get a much higher cap.
# The default for every other task is set as the global limit, not as a "*"
# annotation: Celery applies the "*" entry AFTER the per-task one, so a "*" in
# task_annotations would silently overwrite every specific limit defined below.
_TASK_HARD_LIMIT = env.int("DJANGO_CELERY_TASK_TIME_LIMIT", default=6 * 60 * 60)
_TASK_SOFT_LIMIT = env.int(
"DJANGO_CELERY_TASK_SOFT_TIME_LIMIT", default=_TASK_HARD_LIMIT - 600
)
_LONG_TASK_HARD_LIMIT = env.int(
"DJANGO_CELERY_LONG_TASK_TIME_LIMIT", default=48 * 60 * 60
)
_LONG_TASK_SOFT_LIMIT = env.int(
"DJANGO_CELERY_LONG_TASK_SOFT_TIME_LIMIT", default=_LONG_TASK_HARD_LIMIT - 600
)
celery_app.conf.task_time_limit = _TASK_HARD_LIMIT
celery_app.conf.task_soft_time_limit = _TASK_SOFT_LIMIT
celery_app.conf.task_annotations = {
**{
name: {"soft_time_limit": 60, "time_limit": 120}
for name in (
"provider-connection-check",
"integration-connection-check",
"lighthouse-connection-check",
"lighthouse-provider-connection-check",
)
},
**{
name: {
"soft_time_limit": _LONG_TASK_SOFT_LIMIT,
"time_limit": _LONG_TASK_HARD_LIMIT,
}
for name in (
"scan-perform",
"scan-perform-scheduled",
"provider-deletion",
"tenant-deletion",
)
},
}
celery_app.autodiscover_tasks(["api"])
-7
View File
@@ -571,12 +571,6 @@ def providers_fixture(tenants_fixture):
alias="vercel_testing",
tenant_id=tenant.id,
)
provider14 = Provider.objects.create(
provider="okta",
uid="acme.okta.com",
alias="okta_testing",
tenant_id=tenant.id,
)
return (
provider1,
@@ -592,7 +586,6 @@ def providers_fixture(tenants_fixture):
provider11,
provider12,
provider13,
provider14,
)
@@ -1,14 +1,12 @@
from datetime import datetime, timedelta, timezone
from celery import states
from celery import current_app, states
from celery.utils.log import get_task_logger
from config.django.base import ATTACK_PATHS_SCAN_STALE_THRESHOLD_MINUTES
from tasks.jobs.attack_paths.db_utils import (
_mark_scan_finished,
recover_graph_data_ready,
)
from tasks.jobs.orphan_recovery import is_worker_alive as _is_worker_alive
from tasks.jobs.orphan_recovery import revoke_task as _revoke_task
from api.attack_paths import database as graph_database
from api.db_router import MainRouter
@@ -152,6 +150,32 @@ def _cleanup_stale_scheduled_scans(cutoff: datetime) -> list[str]:
return cleaned_up
def _is_worker_alive(worker: str) -> bool:
"""Ping a specific Celery worker. Returns `True` if it responds or on error."""
try:
response = current_app.control.inspect(destination=[worker], timeout=1.0).ping()
return response is not None and worker in response
except Exception:
logger.exception(f"Failed to ping worker {worker}, treating as alive")
return True
def _revoke_task(task_result, terminate: bool = True) -> None:
"""Revoke a Celery task. Non-fatal on failure.
`terminate=True` SIGTERMs the worker if the task is mid-execution; use
for EXECUTING cleanup. `terminate=False` only marks the task id revoked
across workers, so any worker pulling the queued message discards it;
use for SCHEDULED cleanup where the task hasn't run yet.
"""
try:
kwargs = {"terminate": True, "signal": "SIGTERM"} if terminate else {}
current_app.control.revoke(task_result.task_id, **kwargs)
logger.info(f"Revoked task {task_result.task_id}")
except Exception:
logger.exception(f"Failed to revoke task {task_result.task_id}")
def _cleanup_scan(scan, task_result, reason: str) -> bool:
"""
Clean up a single stale `AttackPathsScan`:
-9
View File
@@ -11,7 +11,6 @@ from api.db_utils import batch_delete, rls_transaction
from api.models import (
AttackPathsScan,
Finding,
JiraIssueDispatch,
Provider,
ProviderComplianceScore,
Resource,
@@ -81,14 +80,6 @@ def delete_provider(tenant_id: str, pk: str):
deletion_steps = [
("Scan Summaries", ScanSummary.all_objects.filter(scan__provider=instance)),
(
"Jira Issue Dispatches",
JiraIssueDispatch.objects.filter(
finding_id__in=Finding.all_objects.filter(
scan__provider=instance
).values_list("id", flat=True)
),
),
("Findings", Finding.all_objects.filter(scan__provider=instance)),
("Resources", Resource.all_objects.filter(provider=instance)),
("Scans", Scan.all_objects.filter(provider=instance)),
+10
View File
@@ -39,6 +39,11 @@ from prowler.lib.outputs.compliance.cis.cis_oraclecloud import OracleCloudCIS
from prowler.lib.outputs.compliance.cisa_scuba.cisa_scuba_googleworkspace import (
GoogleWorkspaceCISASCuBA,
)
from prowler.lib.outputs.compliance.csa.csa_alibabacloud import AlibabaCloudCSA
from prowler.lib.outputs.compliance.csa.csa_aws import AWSCSA
from prowler.lib.outputs.compliance.csa.csa_azure import AzureCSA
from prowler.lib.outputs.compliance.csa.csa_gcp import GCPCSA
from prowler.lib.outputs.compliance.csa.csa_oraclecloud import OracleCloudCSA
from prowler.lib.outputs.compliance.ens.ens_aws import AWSENS
from prowler.lib.outputs.compliance.ens.ens_azure import AzureENS
from prowler.lib.outputs.compliance.ens.ens_gcp import GCPENS
@@ -97,6 +102,7 @@ COMPLIANCE_CLASS_MAP = {
(lambda name: name == "prowler_threatscore_aws", ProwlerThreatScoreAWS),
(lambda name: name.startswith("ccc_"), CCC_AWS),
(lambda name: name.startswith("c5_"), AWSC5),
(lambda name: name.startswith("csa_"), AWSCSA),
(lambda name: name == "asd_essential_eight_aws", ASDEssentialEightAWS),
],
"azure": [
@@ -107,6 +113,7 @@ COMPLIANCE_CLASS_MAP = {
(lambda name: name.startswith("ccc_"), CCC_Azure),
(lambda name: name == "prowler_threatscore_azure", ProwlerThreatScoreAzure),
(lambda name: name == "c5_azure", AzureC5),
(lambda name: name.startswith("csa_"), AzureCSA),
],
"gcp": [
(lambda name: name.startswith("cis_"), GCPCIS),
@@ -116,6 +123,7 @@ COMPLIANCE_CLASS_MAP = {
(lambda name: name == "prowler_threatscore_gcp", ProwlerThreatScoreGCP),
(lambda name: name.startswith("ccc_"), CCC_GCP),
(lambda name: name == "c5_gcp", GCPC5),
(lambda name: name.startswith("csa_"), GCPCSA),
],
"kubernetes": [
(lambda name: name.startswith("cis_"), KubernetesCIS),
@@ -144,9 +152,11 @@ COMPLIANCE_CLASS_MAP = {
"image": [],
"oraclecloud": [
(lambda name: name.startswith("cis_"), OracleCloudCIS),
(lambda name: name.startswith("csa_"), OracleCloudCSA),
],
"alibabacloud": [
(lambda name: name.startswith("cis_"), AlibabaCloudCIS),
(lambda name: name.startswith("csa_"), AlibabaCloudCSA),
(
lambda name: name == "prowler_threatscore_alibabacloud",
ProwlerThreatScoreAlibaba,
+51 -100
View File
@@ -9,7 +9,7 @@ from tasks.utils import batched
from api.db_router import READ_REPLICA_ALIAS, MainRouter
from api.db_utils import REPLICA_MAX_ATTEMPTS, REPLICA_RETRY_BASE_DELAY, rls_transaction
from api.models import Finding, Integration, JiraIssueDispatch, Provider
from api.models import Finding, Integration, Provider
from api.utils import initialize_prowler_integration, initialize_prowler_provider
from prowler.lib.outputs.asff.asff import ASFF
from prowler.lib.outputs.compliance.generic.generic import GenericCompliance
@@ -482,115 +482,66 @@ def send_findings_to_jira(
with rls_transaction(tenant_id):
integration = Integration.objects.get(id=integration_id)
jira_integration = initialize_prowler_integration(integration)
# Idempotency: findings already ticketed for this integration must not be
# sent again on a re-run (e.g. orphan recovery), to avoid duplicate issues
already_sent = {
str(fid)
for fid in JiraIssueDispatch.objects.filter(
integration_id=integration_id, finding_id__in=finding_ids
).values_list("finding_id", flat=True)
}
num_tickets_created = 0
skipped_count = 0
for finding_id in finding_ids:
if str(finding_id) in already_sent:
skipped_count += 1
continue
# Reserve the finding BEFORE the external call. The unique constraint on
# (tenant, integration, finding) makes the dispatch row the single source of
# truth, so a concurrent run or a retry that raced past the bulk pre-check
# cannot create a duplicate issue: created=False means another run already
# claimed it. The reservation is released below if the send does not succeed.
with rls_transaction(tenant_id):
_, created = JiraIssueDispatch.objects.get_or_create(
tenant_id=tenant_id,
integration_id=integration_id,
finding_id=finding_id,
finding_instance = (
Finding.all_objects.select_related("scan__provider")
.prefetch_related("resources")
.get(id=finding_id)
)
if not created:
skipped_count += 1
continue
sent = False
try:
with rls_transaction(tenant_id):
finding_instance = (
Finding.all_objects.select_related("scan__provider")
.prefetch_related("resources")
.get(id=finding_id)
)
# Extract resource information
resource = (
finding_instance.resources.first()
if finding_instance.resources.exists()
else None
)
resource_uid = resource.uid if resource else ""
resource_name = resource.name if resource else ""
resource_tags = {}
if resource and hasattr(resource, "tags"):
resource_tags = resource.get_tags(tenant_id)
# Extract resource information
resource = (
finding_instance.resources.first()
if finding_instance.resources.exists()
else None
)
resource_uid = resource.uid if resource else ""
resource_name = resource.name if resource else ""
resource_tags = {}
if resource and hasattr(resource, "tags"):
resource_tags = resource.get_tags(tenant_id)
# Get region
region = resource.region if resource and resource.region else ""
# Get region
region = resource.region if resource and resource.region else ""
# Extract remediation information from check_metadata
check_metadata = finding_instance.check_metadata
remediation = check_metadata.get("remediation", {})
recommendation = remediation.get("recommendation", {})
remediation_code = remediation.get("code", {})
# Extract remediation information from check_metadata
check_metadata = finding_instance.check_metadata
remediation = check_metadata.get("remediation", {})
recommendation = remediation.get("recommendation", {})
remediation_code = remediation.get("code", {})
# Send the individual finding to Jira
sent = bool(
jira_integration.send_finding(
check_id=finding_instance.check_id,
check_title=check_metadata.get("checktitle", ""),
severity=finding_instance.severity,
status=finding_instance.status,
status_extended=finding_instance.status_extended or "",
provider=finding_instance.scan.provider.provider,
region=region,
resource_uid=resource_uid,
resource_name=resource_name,
risk=check_metadata.get("risk", ""),
recommendation_text=recommendation.get("text", ""),
recommendation_url=recommendation.get("url", ""),
remediation_code_native_iac=remediation_code.get(
"nativeiac", ""
),
remediation_code_terraform=remediation_code.get(
"terraform", ""
),
remediation_code_cli=remediation_code.get("cli", ""),
remediation_code_other=remediation_code.get("other", ""),
resource_tags=resource_tags,
compliance=finding_instance.compliance or {},
project_key=project_key,
issue_type=issue_type,
)
)
finally:
if not sent:
# Release the reservation so a later run can retry this finding: it
# was not ticketed (send failed or raised), so the row must not block
# a future legitimate send.
with rls_transaction(tenant_id):
JiraIssueDispatch.objects.filter(
tenant_id=tenant_id,
integration_id=integration_id,
finding_id=finding_id,
).delete()
if sent:
num_tickets_created += 1
else:
logger.error(f"Failed to send finding {finding_id} to Jira")
# Send the individual finding to Jira
result = jira_integration.send_finding(
check_id=finding_instance.check_id,
check_title=check_metadata.get("checktitle", ""),
severity=finding_instance.severity,
status=finding_instance.status,
status_extended=finding_instance.status_extended or "",
provider=finding_instance.scan.provider.provider,
region=region,
resource_uid=resource_uid,
resource_name=resource_name,
risk=check_metadata.get("risk", ""),
recommendation_text=recommendation.get("text", ""),
recommendation_url=recommendation.get("url", ""),
remediation_code_native_iac=remediation_code.get("nativeiac", ""),
remediation_code_terraform=remediation_code.get("terraform", ""),
remediation_code_cli=remediation_code.get("cli", ""),
remediation_code_other=remediation_code.get("other", ""),
resource_tags=resource_tags,
compliance=finding_instance.compliance or {},
project_key=project_key,
issue_type=issue_type,
)
if result:
num_tickets_created += 1
else:
logger.error(f"Failed to send finding {finding_id} to Jira")
return {
"created_count": num_tickets_created,
"failed_count": len(finding_ids) - num_tickets_created - skipped_count,
"skipped_count": skipped_count,
"failed_count": len(finding_ids) - num_tickets_created,
}
@@ -1,397 +0,0 @@
"""Detect and recover orphaned Celery tasks.
A task is "orphaned" when its result row is non-terminal (STARTED/RECEIVED) but the
worker that was running it is gone (deploy, OOM, eviction). We tell a real orphan
from a still-running task by pinging the worker recorded on its `TaskResult`:
- worker responds -> the task is in flight, leave it alone (never double-run);
- worker is gone -> real orphan: mark the stale result terminal (so pending/started
alerts clear), then re-enqueue the task from its stored name + kwargs.
This recovers only allowlisted tasks with local, proven idempotency. Celery's
`result_extended=True` gives us the stored `task_name`/`task_kwargs`/`worker` once
the task starts, but external side-effect tasks are failed instead of blindly
re-run. A small recovery cap stops a task that repeatedly kills its worker from
looping forever.
This is the shared engine behind both the periodic Beat watchdog and the
`reconcile_orphan_tasks` management command.
"""
import ast
import json
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from uuid import uuid4
from celery import current_app, states
from celery.utils.log import get_task_logger
from django.db import connections
logger = get_task_logger(__name__)
# Arbitrary constant key for pg_try_advisory_lock so only one reconciliation
# runs at a time across replicas / the watchdog / the command.
ORPHAN_RECOVERY_LOCK_KEY = 0x70726F77 # "prow"
# Non-terminal states that mean "a worker had this and may have died with it".
IN_FLIGHT_STATES = (states.STARTED, states.RECEIVED)
# Scan tasks are recovered by re-running scan-perform on the EXISTING scan row,
# not by re-enqueuing the original task: re-enqueuing scan-perform-scheduled would
# hit its "a scan is already executing" guard and no-op, leaving the scan stuck.
_SCAN_TASKS = ("scan-perform", "scan-perform-scheduled")
# Tasks with proven idempotency are auto re-enqueued. Scans/summaries clear and
# rewrite their own rows. integration-jira is safe too: each finding is reserved in
# JiraIssueDispatch before the external call, so a re-run skips already-ticketed
# findings (worst case one finding missed on a mid-send crash, never a duplicate).
# Other external side effects stay terminal: integration-s3 rebuilds its upload from
# worker-local files that do not survive a crash, and report/Security Hub recovery is
# out of scope.
REENQUEUEABLE_TASKS = {
*_SCAN_TASKS,
"provider-deletion",
"tenant-deletion",
"scan-summary",
"scan-compliance-overviews",
"scan-provider-compliance-scores",
"scan-daily-severity",
"scan-finding-group-summaries",
"scan-reset-ephemeral-resources",
"integration-jira",
}
# Tasks excluded from generic recovery: attack-paths scans are handled by their own
# stale-cleanup (which also drops the temp Neo4j db), and the maintenance tasks must
# not self-recover (they run again on their own schedule).
_SKIP_RECOVERY = {
"attack-paths-scan-perform",
"attack-paths-cleanup-stale-scans",
"reconcile-orphan-tasks",
}
@contextmanager
def advisory_lock(key: int = ORPHAN_RECOVERY_LOCK_KEY, using: str = "default"):
"""Yield True if this session won a Postgres advisory lock, else False.
Non-blocking: losers get False and should no-op. The lock is released on
exit (and implicitly if the session dies).
"""
with connections[using].cursor() as cursor:
cursor.execute("SELECT pg_try_advisory_lock(%s)", [key])
acquired = bool(cursor.fetchone()[0])
try:
yield acquired
finally:
if acquired:
cursor.execute("SELECT pg_advisory_unlock(%s)", [key])
def is_worker_alive(worker: str, timeout: float = 1.0) -> bool:
"""Ping a specific Celery worker. Returns True if it responds, or on error.
Erring on the side of "alive" means an unreachable control bus never causes
a still-running task to be re-enqueued.
"""
try:
response = current_app.control.inspect(
destination=[worker], timeout=timeout
).ping()
return response is not None and worker in response
except Exception:
logger.exception(f"Failed to ping worker {worker}, treating as alive")
return True
def revoke_task(task_result, terminate: bool = True) -> None:
"""Revoke a Celery task by its TaskResult. Non-fatal on failure.
terminate=True SIGTERMs the worker if the task is mid-execution; terminate=False
only marks the id revoked so any worker pulling the queued message discards it
(use before re-enqueuing, so a later broker redelivery of the stale message is
dropped).
"""
try:
kwargs = {"terminate": True, "signal": "SIGTERM"} if terminate else {}
current_app.control.revoke(task_result.task_id, **kwargs)
logger.info(f"Revoked task {task_result.task_id}")
except Exception:
logger.exception(f"Failed to revoke task {task_result.task_id}")
def _decode_celery_field(value, default):
"""Decode django-celery-results' stored task_args/task_kwargs to a Python object.
The backend stores them as a (sometimes double-encoded) repr/JSON string. An
empty or missing field returns ``default``; a non-empty value that cannot be
decoded raises ``ValueError`` so the caller can avoid re-enqueuing a task with
the wrong arguments.
"""
obj = value
for _ in range(2): # values can be double-encoded (a string holding a repr)
if not isinstance(obj, str):
break
text = obj.strip()
if not text:
return default
parsed = None
for parser in (ast.literal_eval, json.loads):
try:
parsed = parser(text)
break
except (ValueError, SyntaxError, TypeError):
continue
if parsed is None:
raise ValueError(f"undecodable celery field: {text[:120]!r}")
obj = parsed
return default if obj is None else obj
def reconcile_orphans(
grace_minutes: int = 2,
max_attempts: int = 3,
window_hours: int = 6,
dry_run: bool = False,
) -> dict:
"""Run the full orphan sweep under a single-flight advisory lock.
Recovers any orphaned in-flight task and delegates attack-paths scans that
never reached a worker to their existing stale-cleanup. Returns a summary;
a no-op (lock not won) is reported too.
"""
with advisory_lock() as acquired:
if not acquired:
logger.info("Orphan reconcile skipped: another run holds the lock")
return {"acquired": False}
# Populate the task registry so we can re-enqueue any task by name.
import tasks.tasks # noqa: F401
result = _reconcile_task_results(
grace_minutes=grace_minutes,
max_attempts=max_attempts,
window_hours=window_hours,
dry_run=dry_run,
)
if not dry_run:
from tasks.jobs.attack_paths.cleanup import cleanup_stale_attack_paths_scans
result["attack_paths"] = cleanup_stale_attack_paths_scans()
return {"acquired": True, **result}
def _reconcile_task_results(
grace_minutes: int, max_attempts: int, window_hours: int, dry_run: bool
) -> dict:
from django_celery_results.models import TaskResult
cutoff = datetime.now(tz=timezone.utc) - timedelta(minutes=grace_minutes)
candidates = list(
TaskResult.objects.filter(status__in=IN_FLIGHT_STATES, date_created__lt=cutoff)
.exclude(worker__isnull=True)
.exclude(worker="")
.exclude(task_name__in=_SKIP_RECOVERY)
)
# Ping each distinct worker at most once.
worker_alive = {w: is_worker_alive(w) for w in {tr.worker for tr in candidates}}
recovered, failed, skipped = [], [], []
for task_result in candidates:
if worker_alive.get(task_result.worker, True):
skipped.append(task_result.task_id) # in flight, do not double-run
continue
if dry_run:
recovered.append(task_result.task_id)
continue
outcome = _recover_task(task_result, max_attempts, window_hours)
(recovered if outcome == "recovered" else failed).append(task_result.task_id)
logger.info(
"Orphan reconcile: recovered=%d failed=%d skipped(in-flight)=%d",
len(recovered),
len(failed),
len(skipped),
)
return {"recovered": recovered, "failed": failed, "skipped": skipped}
def _recovery_attempt_count(name: str, kwargs_repr, window_hours: int) -> int:
"""Increment and return the recovery count for this (task, kwargs) within the
window. Backed by Valkey so it survives result-row churn (a worker processing
the revoke can blank the TaskResult fields). Fail-open if Valkey is down (the
broker being unreachable means nothing is running anyway).
"""
import hashlib
from django.conf import settings
try:
import redis
client = redis.from_url(settings.CELERY_BROKER_URL)
signature = f"{name}|{kwargs_repr}".encode()
key = (
"orphan-recovery:"
+ hashlib.sha1(signature, usedforsecurity=False).hexdigest()
)
count = client.incr(key)
if count == 1:
client.expire(key, max(1, window_hours) * 3600)
return int(count)
except Exception:
logger.exception("Recovery-attempt counter unavailable; allowing recovery")
return 1
def _recover_task(task_result, max_attempts: int, window_hours: int) -> str:
"""Recover one orphaned task. Returns 'recovered' or 'failed'."""
# Capture name/args/kwargs now: revoking can let a worker blank the row.
name = task_result.task_name
args_repr = task_result.task_args
kwargs_repr = task_result.task_kwargs
now = datetime.now(tz=timezone.utc)
# Drop any future broker redelivery of the stale message.
revoke_task(task_result, terminate=False)
# Mark the stale result terminal so "pending/started forever" alerts clear.
task_result.status = states.REVOKED
task_result.date_done = now
task_result.save(update_fields=["status", "date_done"])
attempt = _recovery_attempt_count(name, kwargs_repr, window_hours)
if name not in REENQUEUEABLE_TASKS or attempt > max_attempts:
reason = (
f"{name} is not allowlisted for auto recovery"
if name not in REENQUEUEABLE_TASKS
else f"recovery cap reached ({attempt}/{max_attempts})"
)
_fail_domain_row(task_result.task_id, name, now)
logger.warning(
"Orphan %s (%s) not re-enqueued: %s", task_result.task_id, name, reason
)
return "failed"
# Scan tasks: re-run the EXISTING scan row directly via scan-perform, so the
# scheduled-scan "already executing" guard cannot turn recovery into a no-op.
# Falls through to the generic path only if no scan is linked yet (e.g. a
# scheduled task that died before creating one), where re-running it creates one.
if name in _SCAN_TASKS:
scan = _scan_for_task(task_result.task_id)
if scan is not None:
if not _reenqueue_scan(task_result.task_id, scan):
return "failed"
logger.info(
"Re-enqueued orphaned scan %s (was task %s)",
scan.id,
task_result.task_id,
)
return "recovered"
task_obj = current_app.tasks.get(name)
if task_obj is None:
logger.error(
"Orphan %s: task %s not registered, cannot re-enqueue",
task_result.task_id,
name,
)
return "failed"
try:
args = _decode_celery_field(args_repr, [])
kwargs = _decode_celery_field(kwargs_repr, {})
except ValueError:
logger.error(
"Orphan %s (%s): could not decode stored args/kwargs, not re-enqueuing",
task_result.task_id,
name,
)
_fail_domain_row(task_result.task_id, name, now)
return "failed"
new_task_id = str(uuid4())
task_obj.apply_async(
args=list(args) if isinstance(args, (list, tuple)) else [],
kwargs=kwargs if isinstance(kwargs, dict) else {},
task_id=new_task_id,
)
logger.info(
"Re-enqueued orphan %s (%s) as %s", task_result.task_id, name, new_task_id
)
return "recovered"
def _scan_for_task(task_id: str):
"""Return the Scan linked to a Celery task id, or None (read across tenants)."""
from api.db_router import MainRouter
from api.models import Scan
return Scan.all_objects.using(MainRouter.admin_db).filter(task_id=task_id).first()
def _reenqueue_scan(old_task_id: str, scan) -> bool:
"""Re-run an orphaned scan via scan-perform on the existing row.
Pre-provisions the new task linkage (TaskResult + api.Task) and relinks the
Scan before enqueuing, so the FK is valid and a worker can never outrun the DB.
The relink is conditional on the scan still pointing at the old task, so a stale
orphan can never clobber a newer linkage.
"""
from django_celery_results.models import TaskResult
from api.db_utils import rls_transaction
from api.models import Scan
from api.models import Task as APITask
from tasks.tasks import perform_scan_task
tenant_id = str(scan.tenant_id)
new_task_id = str(uuid4())
with rls_transaction(tenant_id):
locked_scan = Scan.all_objects.select_for_update().filter(id=scan.id).first()
if locked_scan is None or str(locked_scan.task_id) != old_task_id:
logger.info(
"Scan %s no longer points at task %s; skipping recovery re-enqueue",
scan.id,
old_task_id,
)
return False
task_result_new, _ = TaskResult.objects.get_or_create(
task_id=new_task_id,
defaults={"status": states.PENDING, "task_name": "scan-perform"},
)
APITask.objects.update_or_create(
id=new_task_id,
tenant_id=tenant_id,
defaults={"task_runner_task": task_result_new},
)
locked_scan.task_id = new_task_id
locked_scan.recovery_count = (locked_scan.recovery_count or 0) + 1
locked_scan.save(update_fields=["task_id", "recovery_count", "updated_at"])
perform_scan_task.apply_async(
kwargs={
"tenant_id": tenant_id,
"scan_id": str(scan.id),
"provider_id": str(scan.provider_id),
},
task_id=new_task_id,
)
return True
def _fail_domain_row(old_task_id: str, name: str, now: datetime) -> None:
"""Mark a scan terminal when its task is capped/denylisted instead of re-run."""
from api.db_utils import rls_transaction
from api.models import Scan, StateChoices
if name in _SCAN_TASKS:
scan = _scan_for_task(old_task_id)
if scan is not None:
with rls_transaction(str(scan.tenant_id)):
Scan.all_objects.filter(id=scan.id, task_id=old_task_id).update(
state=StateChoices.FAILED, completed_at=now
)
+6 -11
View File
@@ -29,10 +29,7 @@ from api.db_router import READ_REPLICA_ALIAS, MainRouter
from api.db_utils import rls_transaction
from api.models import Provider, Scan, ScanSummary, StateChoices, ThreatScoreSnapshot
from api.utils import initialize_prowler_provider
from prowler.lib.check.compliance_models import (
Compliance,
get_bulk_compliance_frameworks_universal,
)
from prowler.lib.check.compliance_models import Compliance
from prowler.lib.outputs.finding import Finding as FindingOutput
logger = get_task_logger(__name__)
@@ -574,7 +571,7 @@ def generate_csa_report(
Args:
tenant_id: The tenant ID for Row-Level Security context.
scan_id: ID of the scan executed by Prowler.
compliance_id: ID of the compliance framework (e.g., "csa_ccm_4.0").
compliance_id: ID of the compliance framework (e.g., "csa_ccm_4.0_aws").
output_path: Output PDF file path.
provider_id: Provider ID for the scan.
only_failed: If True, only include failed requirements in detailed section.
@@ -886,11 +883,9 @@ def generate_compliance_reports(
frameworks_bulk.get(f"nis2_{provider_type}")
)
if generate_csa:
# csa_ccm_4.0 lives at the top level, not under compliance/{provider}/.
csa_framework = frameworks_bulk.get(
"csa_ccm_4.0"
) or get_bulk_compliance_frameworks_universal(provider_type).get("csa_ccm_4.0")
pending_checks_by_framework["csa"] = _get_compliance_check_ids(csa_framework)
pending_checks_by_framework["csa"] = _get_compliance_check_ids(
frameworks_bulk.get(f"csa_ccm_4.0_{provider_type}")
)
if generate_cis and latest_cis:
pending_checks_by_framework["cis"] = _get_compliance_check_ids(
frameworks_bulk.get(latest_cis)
@@ -1188,7 +1183,7 @@ def generate_compliance_reports(
if generate_csa:
generated_report_keys.append("csa")
csa_path = output_paths["csa"]
compliance_id_csa = "csa_ccm_4.0"
compliance_id_csa = f"csa_ccm_4.0_{provider_type}"
pdf_path_csa = f"{csa_path}_csa_report.pdf"
logger.info("Generating CSA CCM report with compliance %s", compliance_id_csa)
+4 -57
View File
@@ -5,7 +5,6 @@ import time
from abc import ABC, abstractmethod
from contextlib import contextmanager
from dataclasses import dataclass, field
from types import SimpleNamespace
from typing import Any
from celery.utils.log import get_task_logger
@@ -27,10 +26,7 @@ from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Provider, StatusChoices
from api.utils import initialize_prowler_provider
from prowler.lib.check.compliance_models import (
Compliance,
get_bulk_compliance_frameworks_universal,
)
from prowler.lib.check.compliance_models import Compliance
from prowler.lib.outputs.finding import Finding as FindingOutput
from .components import (
@@ -226,46 +222,6 @@ def get_requirement_metadata(
return None
def _universal_attributes_to_list(attributes) -> list:
"""Flatten a universal requirement's ``attributes`` into a list of objects
with attribute access. MITRE wraps its list under ``_raw_attributes``."""
if isinstance(attributes, dict) and "_raw_attributes" in attributes:
entries = attributes.get("_raw_attributes") or []
return [
SimpleNamespace(**entry) for entry in entries if isinstance(entry, dict)
]
if isinstance(attributes, dict):
return [SimpleNamespace(**attributes)] if attributes else []
return list(attributes or [])
def _adapt_universal_to_legacy(framework, provider_type: str) -> SimpleNamespace:
"""Expose a universal ``ComplianceFramework`` under the legacy ``Compliance``
attribute names used by the PDF pipeline."""
provider_key = (provider_type or "").lower()
requirements = []
for requirement in framework.requirements:
checks_by_provider = (
requirement.checks if isinstance(requirement.checks, dict) else {}
)
requirements.append(
SimpleNamespace(
Id=requirement.id,
Description=requirement.description or "",
Checks=list(checks_by_provider.get(provider_key, [])),
Attributes=_universal_attributes_to_list(requirement.attributes),
)
)
return SimpleNamespace(
Framework=framework.framework,
Name=framework.name,
Version=framework.version or "",
Description=framework.description or "",
Provider=framework.provider or provider_type,
Requirements=requirements,
)
# =============================================================================
# PDF Styles Cache
# =============================================================================
@@ -913,18 +869,9 @@ class BaseComplianceReportGenerator(ABC):
prowler_provider = initialize_prowler_provider(provider_obj)
provider_type = provider_obj.provider
# Load compliance framework — fall back to the universal loader
# for top-level JSONs (e.g. csa_ccm_4.0) that Compliance.get_bulk
# does not scan.
compliance_obj = Compliance.get_bulk(provider_type).get(compliance_id)
if not compliance_obj:
universal_framework = get_bulk_compliance_frameworks_universal(
provider_type
).get(compliance_id)
if universal_framework:
compliance_obj = _adapt_universal_to_legacy(
universal_framework, provider_type
)
# Load compliance framework
frameworks_bulk = Compliance.get_bulk(provider_type)
compliance_obj = frameworks_bulk.get(compliance_id)
if not compliance_obj:
raise ValueError(f"Compliance framework not found: {compliance_id}")
+290 -495
View File
@@ -42,6 +42,7 @@ from api.db_utils import (
SET_CONFIG_QUERY,
psycopg_connection,
rls_transaction,
update_objects_in_batches,
)
from api.exceptions import ProviderConnectionError
from api.models import (
@@ -58,7 +59,6 @@ from api.models import (
ResourceFindingMapping,
ResourceScanSummary,
ResourceTag,
ResourceTagMapping,
Scan,
ScanCategorySummary,
ScanGroupSummary,
@@ -97,16 +97,8 @@ COMPLIANCE_REQUIREMENT_COPY_COLUMNS = (
)
# Controls how many findings we process per micro-batch before flushing to DB writes
FINDINGS_MICRO_BATCH_SIZE = env.int("DJANGO_FINDINGS_MICRO_BATCH_SIZE", default=3000)
# Controls how many rows each ORM bulk_create/bulk_update call sends to Postgres.
SCAN_DB_BATCH_SIZE = env.int("DJANGO_SCAN_DB_BATCH_SIZE", default=1000)
# Throttle scan progress persistence: minimum progress delta (fraction 0-1)
# between two persisted progress updates.
PROGRESS_THROTTLE_DELTA = env.float("DJANGO_SCAN_PROGRESS_THROTTLE_DELTA", default=0.01)
# Throttle scan progress persistence: maximum seconds without persisting progress
# regardless of delta (so slow checks still show progress in the UI).
PROGRESS_THROTTLE_SECONDS = env.float(
"DJANGO_SCAN_PROGRESS_THROTTLE_SECONDS", default=10.0
)
# Controls how many rows each ORM bulk_create/bulk_update call sends to Postgres
SCAN_DB_BATCH_SIZE = env.int("DJANGO_SCAN_DB_BATCH_SIZE", default=500)
ATTACK_SURFACE_PROVIDER_COMPATIBILITY = {
"internet-exposed": None, # Compatible with all providers
@@ -118,19 +110,6 @@ ATTACK_SURFACE_PROVIDER_COMPATIBILITY = {
_ATTACK_SURFACE_MAPPING_CACHE: dict[str, dict] = {}
def _clear_scan_rerun_state(tenant_id: str, scan_id: str) -> None:
"""Remove rows derived from a previous execution of this scan."""
with rls_transaction(tenant_id):
Finding.all_objects.filter(scan_id=scan_id).delete()
ResourceScanSummary.objects.filter(scan_id=scan_id).delete()
ScanCategorySummary.objects.filter(scan_id=scan_id).delete()
ScanGroupSummary.objects.filter(scan_id=scan_id).delete()
ScanSummary.objects.filter(scan_id=scan_id).delete()
AttackSurfaceOverview.objects.filter(scan_id=scan_id).delete()
ComplianceRequirementOverview.objects.filter(scan_id=scan_id).delete()
ComplianceOverviewSummary.objects.filter(scan_id=scan_id).delete()
def aggregate_category_counts(
categories: list[str],
severity: str,
@@ -489,13 +468,9 @@ def _create_compliance_summaries(
)
)
# Idempotent re-run: clear this scan's prior summaries before re-inserting, so
# a recovered scan's summary always reflects its own (re-derived) requirement
# rows rather than keeping a stale row (bulk_create ignore_conflicts alone would
# keep the old one).
with rls_transaction(tenant_id):
ComplianceOverviewSummary.objects.filter(scan_id=scan_id).delete()
if summary_objects:
# Bulk insert summaries
if summary_objects:
with rls_transaction(tenant_id):
ComplianceOverviewSummary.objects.bulk_create(
summary_objects, batch_size=500, ignore_conflicts=True
)
@@ -553,26 +528,16 @@ def _process_finding_micro_batch(
"""
# Accumulate objects for bulk operations
findings_to_create = []
mappings_to_create = []
dirty_resources = {}
resources_with_new_tag_mappings: set[str] = set()
resource_denormalized_data = [] # (finding_instance, resource_instance) pairs
tag_mappings_to_create: list[ResourceTagMapping] = []
skipped_findings_count = 0 # Track findings skipped due to UID length
# Separate findings into those persistable (uid <= 300) and over-limit.
# Resources/tags ARE still resolved for over-limit findings to preserve the
# original behavior (resources are persisted even when their finding is dropped).
non_null_findings = [f for f in findings_batch if f is not None]
persistable_findings = [f for f in non_null_findings if len(f.uid) <= 300]
skipped_findings_count = len(non_null_findings) - len(persistable_findings)
none_count = len(findings_batch) - len(non_null_findings)
if none_count:
logger.error(
f"{none_count} None finding(s) detected on scan {scan_instance.id}."
)
# Prefetch last statuses for all persistable findings in this batch (read replica)
finding_uids = [f.uid for f in persistable_findings]
# Prefetch last statuses for all findings in this batch
# TEMPORARY WORKAROUND: Filter out UIDs > 300 chars to avoid query errors
finding_uids = [
f.uid for f in findings_batch if f is not None and len(f.uid) <= 300
]
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
last_statuses = {
item["uid"]: (item["status"], item["first_seen_at"])
@@ -583,411 +548,281 @@ def _process_finding_micro_batch(
.order_by("uid", "-inserted_at")
.distinct("uid")
}
# Update cache
for uid, data in last_statuses.items():
if uid not in last_status_cache:
last_status_cache[uid] = data
# All DB writes for this micro-batch run inside ONE rls_transaction,
# with deadlock-retry at micro-batch granularity instead of per-finding.
for attempt in range(CELERY_DEADLOCK_ATTEMPTS):
try:
with rls_transaction(tenant_id):
# 1) Pre-resolve Resources in bulk
# Collect all uids referenced by this batch that are not in cache yet.
# NOTE: we intentionally include empty-string uids here. The SDK
# explicitly emits findings with `resource_uid=""` for some flows
# (IaC scans, some Azure/GCP/K8s checks). The original
# `get_or_create` behavior was to create/share a Resource with
# uid="" for these findings rather than dropping them. Preserve
# that behavior; do NOT filter by truthiness.
batch_resource_uids: set[str] = set()
for f in non_null_findings:
if f.resource_uid not in resource_cache:
batch_resource_uids.add(f.resource_uid)
# Process each finding in the batch
for finding in findings_batch:
if finding is None:
logger.error(f"None finding detected on scan {scan_instance.id}.")
continue
if batch_resource_uids:
existing_resources = {
r.uid: r
for r in Resource.objects.filter(
tenant_id=tenant_id,
provider_id=provider_instance.id,
uid__in=batch_resource_uids,
)
}
missing_uids = batch_resource_uids - existing_resources.keys()
if missing_uids:
# Build defaults from the first finding referencing each uid.
first_finding_per_uid: dict[str, ProwlerFinding] = {}
for f in non_null_findings:
if f.resource_uid in missing_uids:
first_finding_per_uid.setdefault(f.resource_uid, f)
resources_to_create = []
for uid in missing_uids:
f = first_finding_per_uid[uid]
check_metadata = f.get_metadata()
group = check_metadata.get("resourcegroup") or None
resources_to_create.append(
Resource(
tenant_id=tenant_id,
provider=provider_instance,
uid=uid,
region=f.region,
service=f.service_name,
type=f.resource_type,
name=f.resource_name,
groups=[group] if group else None,
)
)
Resource.objects.bulk_create(
resources_to_create,
batch_size=SCAN_DB_BATCH_SIZE,
ignore_conflicts=True,
unique_fields=["tenant_id", "provider_id", "uid"],
)
# Re-fetch to obtain instances we just created AND any
# created concurrently by another scan against the same provider.
existing_resources.update(
{
r.uid: r
for r in Resource.objects.filter(
tenant_id=tenant_id,
provider_id=provider_instance.id,
uid__in=missing_uids,
)
}
)
for uid, r in existing_resources.items():
resource_cache[uid] = r
resource_failed_findings_cache.setdefault(uid, 0)
# 2) Pre-resolve ResourceTags in bulk
batch_tag_kv: set[tuple[str, str]] = set()
for f in non_null_findings:
for k, v in f.resource_tags.items():
if (k, v) not in tag_cache:
batch_tag_kv.add((k, v))
if batch_tag_kv:
keys_to_query = {k for k, _ in batch_tag_kv}
existing_tags = {
(t.key, t.value): t
for t in ResourceTag.objects.filter(
tenant_id=tenant_id, key__in=keys_to_query
)
if (t.key, t.value) in batch_tag_kv
}
missing_kv = batch_tag_kv - existing_tags.keys()
if missing_kv:
ResourceTag.objects.bulk_create(
[
ResourceTag(tenant_id=tenant_id, key=k, value=v)
for k, v in missing_kv
],
batch_size=SCAN_DB_BATCH_SIZE,
ignore_conflicts=True,
unique_fields=["tenant_id", "key", "value"],
)
existing_tags.update(
{
(t.key, t.value): t
for t in ResourceTag.objects.filter(
tenant_id=tenant_id,
key__in={k for k, _ in missing_kv},
)
if (t.key, t.value) in missing_kv
}
)
tag_cache.update(existing_tags)
# 3) Per-finding in-memory processing
for finding in non_null_findings:
# Process resource with deadlock retry
for attempt in range(CELERY_DEADLOCK_ATTEMPTS):
try:
with rls_transaction(tenant_id):
resource_uid = finding.resource_uid
resource_instance = resource_cache.get(resource_uid)
if resource_instance is None:
# Should be unreachable after the pre-resolve step. Defensive log.
logger.error(
f"Resource {resource_uid} missing from cache after pre-resolve "
f"on scan {scan_instance.id}; skipping finding."
)
continue
# Detect resource field changes (defer save until end-of-batch bulk_update).
check_metadata = finding.get_metadata()
group = check_metadata.get("resourcegroup") or None
updated = False
if finding.region and resource_instance.region != finding.region:
resource_instance.region = finding.region
updated = True
if resource_instance.service != finding.service_name:
resource_instance.service = finding.service_name
updated = True
if resource_instance.type != finding.resource_type:
resource_instance.type = finding.resource_type
updated = True
if resource_instance.metadata != finding.resource_metadata:
resource_instance.metadata = json.dumps(
finding.resource_metadata, cls=CustomEncoder
)
updated = True
if resource_instance.details != finding.resource_details:
resource_instance.details = finding.resource_details
updated = True
if resource_instance.partition != finding.partition:
resource_instance.partition = finding.partition
updated = True
if group and (
not resource_instance.groups
or group not in resource_instance.groups
):
resource_instance.groups = (resource_instance.groups or []) + [
group
]
updated = True
if updated:
dirty_resources[resource_uid] = resource_instance
# Accumulate ResourceTagMapping rows; bulk_create at end of block.
for k, v in finding.resource_tags.items():
tag_instance = tag_cache.get((k, v))
if tag_instance is None:
# Should not happen after pre-resolve; skip defensively.
continue
tag_mappings_to_create.append(
ResourceTagMapping(
tenant_id=tenant_id,
resource=resource_instance,
tag=tag_instance,
)
)
unique_resources.add(
(resource_instance.uid, resource_instance.region)
)
# TEMPORARY WORKAROUND: Skip findings with UID > 300 chars
# TODO: Remove this after implementing text field migration for finding.uid
if len(finding.uid) > 300:
logger.warning(
f"Skipping finding with UID exceeding 300 characters. "
f"Length: {len(finding.uid)}, "
f"Check: {finding.check_id}, "
f"Resource: {finding.resource_name}, "
f"UID: {finding.uid}"
)
continue
finding_uid = finding.uid
last_status, last_first_seen_at = last_status_cache.get(
finding_uid, (None, None)
)
status = FindingStatus[finding.status]
delta = _create_finding_delta(last_status, status)
if not last_first_seen_at:
last_first_seen_at = datetime.now(tz=timezone.utc)
# Determine if finding should be muted and why
# Priority: mutelist processor (highest) > manual mute rules
is_muted = False
muted_reason = None
if finding.muted:
is_muted = True
muted_reason = "Muted by mutelist"
elif finding_uid in mute_rules_cache:
is_muted = True
muted_reason = mute_rules_cache[finding_uid]
if status == FindingStatus.FAIL and not is_muted:
resource_failed_findings_cache[resource_uid] += 1
check_metadata["compliance"] = finding.compliance
finding_instance = Finding(
tenant_id=tenant_id,
uid=finding_uid,
delta=delta,
check_metadata=check_metadata,
status=status,
status_extended=finding.status_extended,
severity=finding.severity,
impact=finding.severity,
raw_result=finding.raw,
check_id=finding.check_id,
scan=scan_instance,
first_seen_at=last_first_seen_at,
muted=is_muted,
muted_at=datetime.now(tz=timezone.utc) if is_muted else None,
muted_reason=muted_reason,
compliance=finding.compliance,
categories=check_metadata.get("categories", []) or [],
resource_groups=check_metadata.get("resourcegroup") or None,
# Denormalized resource arrays populated directly on insert
# (was previously a separate bulk_update; saves a CASE WHEN
# over thousands of rows per micro-batch).
resource_regions=[resource_instance.region]
if resource_instance.region
else [],
resource_services=[resource_instance.service]
if resource_instance.service
else [],
resource_types=[resource_instance.type]
if resource_instance.type
else [],
)
findings_to_create.append(finding_instance)
resource_denormalized_data.append(
(finding_instance, resource_instance)
)
scan_resource_cache.add(
(
str(resource_instance.id),
resource_instance.service,
resource_instance.region,
resource_instance.type,
)
)
aggregate_category_counts(
categories=check_metadata.get("categories", []) or [],
severity=finding.severity.value,
status=status.value,
delta=delta.value if delta else None,
muted=is_muted,
cache=scan_categories_cache,
)
aggregate_resource_group_counts(
resource_group=check_metadata.get("resourcegroup") or None,
severity=finding.severity.value,
status=status.value,
delta=delta.value if delta else None,
muted=is_muted,
resource_uid=resource_instance.uid if resource_instance else "",
cache=scan_resource_groups_cache,
group_resources_cache=group_resources_cache,
)
# 4) Bulk create ResourceTagMappings
# Replaces the original per-resource `upsert_or_delete_tags`
# (which did one `update_or_create` + SELECT FOR UPDATE per mapping).
if tag_mappings_to_create:
# Pre-SELECT existing pairs: `bulk_create(ignore_conflicts=True)`
# does not populate `pk`, so we cannot tell new vs existing from
# the result; we need that to bump `updated_at` only on resources
# that actually gain a mapping.
candidate_resource_ids = {
m.resource_id for m in tag_mappings_to_create
}
candidate_tag_ids = {m.tag_id for m in tag_mappings_to_create}
existing_pairs = set(
ResourceTagMapping.objects.filter(
if resource_uid not in resource_cache:
check_metadata = finding.get_metadata()
group = check_metadata.get("resourcegroup") or None
resource_instance, _ = Resource.objects.get_or_create(
tenant_id=tenant_id,
resource_id__in=candidate_resource_ids,
tag_id__in=candidate_tag_ids,
).values_list("resource_id", "tag_id")
)
resource_uid_by_id = {
str(r.id): uid for uid, r in resource_cache.items()
}
for m in tag_mappings_to_create:
if (m.resource_id, m.tag_id) not in existing_pairs:
uid = resource_uid_by_id.get(str(m.resource_id))
if uid is not None:
resources_with_new_tag_mappings.add(uid)
ResourceTagMapping.objects.bulk_create(
tag_mappings_to_create,
batch_size=SCAN_DB_BATCH_SIZE,
ignore_conflicts=True,
unique_fields=["tenant_id", "resource_id", "tag_id"],
)
# 5) Bulk create Findings
if findings_to_create:
Finding.objects.bulk_create(
findings_to_create, batch_size=SCAN_DB_BATCH_SIZE
)
# 6) Bulk create ResourceFindingMapping rows
mappings_to_create = [
ResourceFindingMapping(
tenant_id=tenant_id,
resource=resource_instance,
finding=finding_instance,
)
for finding_instance, resource_instance in resource_denormalized_data
]
if mappings_to_create:
created_mappings = ResourceFindingMapping.objects.bulk_create(
mappings_to_create,
batch_size=SCAN_DB_BATCH_SIZE,
ignore_conflicts=True,
unique_fields=["tenant_id", "resource_id", "finding_id"],
)
inserted = sum(1 for m in created_mappings if m.pk)
if inserted != len(mappings_to_create):
logger.error(
f"scan {scan_instance.id}: expected "
f"{len(mappings_to_create)} ResourceFindingMapping rows, "
f"inserted {inserted}. Rolling back micro-batch."
provider=provider_instance,
uid=resource_uid,
defaults={
"region": finding.region,
"service": finding.service_name,
"type": finding.resource_type,
"name": finding.resource_name,
"groups": [group] if group else None,
},
)
resource_cache[resource_uid] = resource_instance
resource_failed_findings_cache[resource_uid] = 0
else:
resource_instance = resource_cache[resource_uid]
break
except (OperationalError, IntegrityError) as db_err:
if attempt < CELERY_DEADLOCK_ATTEMPTS - 1:
logger.warning(
f"{'Deadlock error' if isinstance(db_err, OperationalError) else 'Integrity error'} "
f"detected when processing resource {resource_uid} on scan {scan_instance.id}. Retrying..."
)
time.sleep(0.1 * (2**attempt))
continue
else:
raise db_err
# 7) Bulk update Resources
# Union of:
# - resources whose fields changed (dirty_resources)
# - resources that got new tag mappings (need updated_at bump,
# preserving the original `self.save(update_fields=["updated_at"])`
# behavior of `upsert_or_delete_tags`)
all_resource_uids_to_touch = (
set(dirty_resources.keys()) | resources_with_new_tag_mappings
# Track resource field changes (defer save)
updated = False
check_metadata = finding.get_metadata()
group = check_metadata.get("resourcegroup") or None
if finding.region and resource_instance.region != finding.region:
resource_instance.region = finding.region
updated = True
if resource_instance.service != finding.service_name:
resource_instance.service = finding.service_name
updated = True
if resource_instance.type != finding.resource_type:
resource_instance.type = finding.resource_type
updated = True
if resource_instance.metadata != finding.resource_metadata:
resource_instance.metadata = json.dumps(
finding.resource_metadata, cls=CustomEncoder
)
updated = True
if resource_instance.details != finding.resource_details:
resource_instance.details = finding.resource_details
updated = True
if resource_instance.partition != finding.partition:
resource_instance.partition = finding.partition
updated = True
if group and (
not resource_instance.groups or group not in resource_instance.groups
):
resource_instance.groups = (resource_instance.groups or []) + [group]
updated = True
if updated:
dirty_resources[resource_uid] = resource_instance
# Process tags
tags = []
with rls_transaction(tenant_id):
for key, value in finding.resource_tags.items():
tag_key = (key, value)
if tag_key not in tag_cache:
tag_instance, _ = ResourceTag.objects.get_or_create(
tenant_id=tenant_id, key=key, value=value
)
tag_cache[tag_key] = tag_instance
else:
tag_instance = tag_cache[tag_key]
tags.append(tag_instance)
resource_instance.upsert_or_delete_tags(tags=tags)
unique_resources.add((resource_instance.uid, resource_instance.region))
# Prepare finding data
finding_uid = finding.uid
# TEMPORARY WORKAROUND: Skip findings with UID > 300 chars
# TODO: Remove this after implementing text field migration for finding.uid
if len(finding_uid) > 300:
skipped_findings_count += 1
logger.warning(
f"Skipping finding with UID exceeding 300 characters. "
f"Length: {len(finding_uid)}, "
f"Check: {finding.check_id}, "
f"Resource: {finding.resource_name}, "
f"UID: {finding_uid}"
)
continue
last_status, last_first_seen_at = last_status_cache.get(
finding_uid, (None, None)
)
status = FindingStatus[finding.status]
delta = _create_finding_delta(last_status, status)
if not last_first_seen_at:
last_first_seen_at = datetime.now(tz=timezone.utc)
# Determine if finding should be muted and why
# Priority: mutelist processor (highest) > manual mute rules
is_muted = False
muted_reason = None
# Check mutelist processor first (highest priority)
if finding.muted:
is_muted = True
muted_reason = "Muted by mutelist"
# If not muted by mutelist, check manual mute rules
elif finding_uid in mute_rules_cache:
is_muted = True
muted_reason = mute_rules_cache[finding_uid]
# Increment failed_findings_count cache if needed
if status == FindingStatus.FAIL and not is_muted:
resource_failed_findings_cache[resource_uid] += 1
# Create finding object (don't save yet)
check_metadata = finding.get_metadata()
check_metadata["compliance"] = finding.compliance
finding_instance = Finding(
tenant_id=tenant_id,
uid=finding_uid,
delta=delta,
check_metadata=check_metadata,
status=status,
status_extended=finding.status_extended,
severity=finding.severity,
impact=finding.severity,
raw_result=finding.raw,
check_id=finding.check_id,
scan=scan_instance,
first_seen_at=last_first_seen_at,
muted=is_muted,
muted_at=datetime.now(tz=timezone.utc) if is_muted else None,
muted_reason=muted_reason,
compliance=finding.compliance,
categories=check_metadata.get("categories", []) or [],
resource_groups=check_metadata.get("resourcegroup") or None,
)
findings_to_create.append(finding_instance)
resource_denormalized_data.append((finding_instance, resource_instance))
# Track for scan summary
scan_resource_cache.add(
(
str(resource_instance.id),
resource_instance.service,
resource_instance.region,
resource_instance.type,
)
)
# Track categories with counts for ScanCategorySummary by (category, severity)
aggregate_category_counts(
categories=check_metadata.get("categories", []) or [],
severity=finding.severity.value,
status=status.value,
delta=delta.value if delta else None,
muted=is_muted,
cache=scan_categories_cache,
)
# Track resource groups with counts for ScanGroupSummary
aggregate_resource_group_counts(
resource_group=check_metadata.get("resourcegroup") or None,
severity=finding.severity.value,
status=status.value,
delta=delta.value if delta else None,
muted=is_muted,
resource_uid=resource_instance.uid if resource_instance else "",
cache=scan_resource_groups_cache,
group_resources_cache=group_resources_cache,
)
# Bulk operations within single transaction
with rls_transaction(tenant_id):
# Bulk create findings
if findings_to_create:
Finding.objects.bulk_create(
findings_to_create, batch_size=SCAN_DB_BATCH_SIZE
)
# Bulk create resource-finding mappings
for finding_instance, resource_instance in resource_denormalized_data:
mappings_to_create.append(
ResourceFindingMapping(
tenant_id=tenant_id,
resource=resource_instance,
finding=finding_instance,
)
if all_resource_uids_to_touch:
now_utc = datetime.now(tz=timezone.utc)
resources_to_bulk_update = []
for uid in all_resource_uids_to_touch:
# Use the instance from dirty_resources if present (has mutated
# fields), otherwise the cached one (for updated_at bump only).
r = dirty_resources.get(uid) or resource_cache.get(uid)
if r is None:
continue
# Manually bump updated_at since bulk_update bypasses auto_now.
r.updated_at = now_utc
resources_to_bulk_update.append(r)
if resources_to_bulk_update:
Resource.objects.bulk_update(
resources_to_bulk_update,
[
"metadata",
"details",
"partition",
"region",
"service",
"type",
"groups",
"updated_at",
],
batch_size=1000,
)
# Successful execution: leave deadlock retry loop.
break
except (OperationalError, IntegrityError) as db_err:
if attempt < CELERY_DEADLOCK_ATTEMPTS - 1:
logger.warning(
f"{'Deadlock error' if isinstance(db_err, OperationalError) else 'Integrity error'} "
f"on micro-batch for scan {scan_instance.id}. Retrying (attempt {attempt + 1})..."
)
if mappings_to_create:
created_mappings = ResourceFindingMapping.objects.bulk_create(
mappings_to_create,
batch_size=SCAN_DB_BATCH_SIZE,
ignore_conflicts=True,
unique_fields=["tenant_id", "resource_id", "finding_id"],
)
inserted = sum(1 for m in created_mappings if m.pk)
if inserted != len(mappings_to_create):
logger.error(
f"scan {scan_instance.id}: expected "
f"{len(mappings_to_create)} ResourceFindingMapping rows, "
f"inserted {inserted}. Rolling back micro-batch."
)
time.sleep(0.1 * (2**attempt))
# Clear accumulators that we appended to inside the failed transaction
# so the retry produces consistent results.
findings_to_create.clear()
resource_denormalized_data.clear()
tag_mappings_to_create.clear()
dirty_resources.clear()
resources_with_new_tag_mappings.clear()
continue
raise
# Update finding denormalized arrays
findings_to_update = []
for finding_instance, resource_instance in resource_denormalized_data:
if not finding_instance.resource_regions:
finding_instance.resource_regions = []
if not finding_instance.resource_services:
finding_instance.resource_services = []
if not finding_instance.resource_types:
finding_instance.resource_types = []
if resource_instance.region not in finding_instance.resource_regions:
finding_instance.resource_regions.append(resource_instance.region)
if resource_instance.service not in finding_instance.resource_services:
finding_instance.resource_services.append(resource_instance.service)
if resource_instance.type not in finding_instance.resource_types:
finding_instance.resource_types.append(resource_instance.type)
findings_to_update.append(finding_instance)
if findings_to_update:
Finding.objects.bulk_update(
findings_to_update,
["resource_regions", "resource_services", "resource_types"],
batch_size=SCAN_DB_BATCH_SIZE,
)
# Bulk update dirty resources
if dirty_resources:
update_objects_in_batches(
tenant_id=tenant_id,
model=Resource,
objects=list(dirty_resources.values()),
fields=[
"metadata",
"details",
"partition",
"region",
"service",
"type",
"groups",
],
batch_size=1000,
)
# Log skipped findings summary
if skipped_findings_count > 0:
@@ -1038,8 +873,7 @@ def perform_prowler_scan(
scan_instance = Scan.objects.get(pk=scan_id)
scan_instance.state = StateChoices.EXECUTING
scan_instance.started_at = datetime.now(tz=timezone.utc)
scan_instance.save(update_fields=["state", "started_at", "updated_at"])
_clear_scan_rerun_state(tenant_id, scan_id)
scan_instance.save()
# Find the mutelist processor if it exists
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
@@ -1084,13 +918,7 @@ def perform_prowler_scan(
provider_instance.connection_last_checked_at = datetime.now(
tz=timezone.utc
)
provider_instance.save(
update_fields=[
"connected",
"connection_last_checked_at",
"updated_at",
]
)
provider_instance.save()
# If the provider is not connected, raise an exception outside the transaction.
# If raised within the transaction, the transaction will be rolled back and the provider will not be marked
@@ -1105,13 +933,6 @@ def perform_prowler_scan(
last_status_cache = {}
resource_failed_findings_cache = defaultdict(int)
# Throttle scan_instance progress writes to avoid hammering the writer:
# only persist when progress moves by at least `PROGRESS_THROTTLE_DELTA`
# OR `PROGRESS_THROTTLE_SECONDS` have elapsed. The final progress (1.0)
# always persists in the `finally` block below.
last_persisted_progress = -1.0
last_persisted_progress_at = 0.0
for progress, findings in prowler_scan.scan():
# Process findings in micro-batches
findings_list = list(findings)
@@ -1138,20 +959,10 @@ def perform_prowler_scan(
group_resources_cache=group_resources_cache,
)
# Throttled progress save (the final save in the `finally` block
# below always runs regardless of throttle).
now = time.time()
progress_delta = progress - last_persisted_progress
elapsed = now - last_persisted_progress_at
if (
progress_delta >= PROGRESS_THROTTLE_DELTA
or elapsed >= PROGRESS_THROTTLE_SECONDS
):
with rls_transaction(tenant_id):
scan_instance.progress = progress
scan_instance.save(update_fields=["progress", "updated_at"])
last_persisted_progress = progress
last_persisted_progress_at = now
# Update scan progress
with rls_transaction(tenant_id):
scan_instance.progress = progress
scan_instance.save()
scan_instance.state = StateChoices.COMPLETED
@@ -1165,16 +976,13 @@ def perform_prowler_scan(
resources_to_update.append(resource_instance)
if resources_to_update:
# Single rls_transaction wrapping the bulk_update (previously
# `update_objects_in_batches` opened one rls_transaction per
# chunk; for tenants with many resources this collapsed N
# BEGINs/COMMITs into 1).
with rls_transaction(tenant_id):
Resource.objects.bulk_update(
resources_to_update,
["failed_findings_count"],
batch_size=SCAN_DB_BATCH_SIZE,
)
update_objects_in_batches(
tenant_id=tenant_id,
model=Resource,
objects=resources_to_update,
fields=["failed_findings_count"],
batch_size=1000,
)
except Exception as e:
logger.error(f"Error performing scan {scan_id}: {e}")
@@ -1186,16 +994,7 @@ def perform_prowler_scan(
scan_instance.duration = time.time() - start_time
scan_instance.completed_at = datetime.now(tz=timezone.utc)
scan_instance.unique_resource_count = len(unique_resources)
scan_instance.save(
update_fields=[
"state",
"duration",
"completed_at",
"unique_resource_count",
"progress",
"updated_at",
]
)
scan_instance.save()
if exception is not None:
raise exception
@@ -1669,10 +1468,6 @@ def create_compliance_requirements(tenant_id: str, scan_id: str):
elif requirement_status == "PASS":
requirement_statuses[key]["pass_count"] += 1
# Idempotent re-run: COPY can't ON CONFLICT, so clear this scan's rows first.
with rls_transaction(tenant_id):
ComplianceRequirementOverview.objects.filter(scan_id=scan_id).delete()
# Bulk create requirement records using PostgreSQL COPY
_persist_compliance_requirement_rows(tenant_id, compliance_requirement_rows)
+22 -27
View File
@@ -359,40 +359,35 @@ def _load_findings_for_requirement_checks(
def _get_compliance_check_ids(compliance_obj) -> set[str]:
"""Return the union of all check_ids referenced by a compliance framework.
Used by the master report orchestrator to evict entries from
``findings_cache`` once no pending framework needs them (PROWLER-1733).
Used by the master report orchestrator to know which checks each
framework consumes from the shared ``findings_cache``, so that once a
framework finishes the entries no other pending framework needs can be
evicted from the cache (PROWLER-1733).
Accepts the legacy ``Compliance`` shape (``Requirements`` / ``Checks``
lists) and the universal ``ComplianceFramework`` shape (``requirements``
/ ``checks`` dict keyed by provider). ``None`` returns an empty set so
callers can pass ``frameworks_bulk.get(...)`` directly.
Args:
compliance_obj: A loaded Compliance framework object exposing a
``Requirements`` iterable, each requirement carrying ``Checks``.
``None`` is treated as "no checks" rather than raising, so the
caller can pass ``frameworks_bulk.get(...)`` directly without
an extra existence check.
Returns:
Set of check_id strings (empty if ``compliance_obj`` is ``None``).
"""
if compliance_obj is None:
return set()
requirements = getattr(compliance_obj, "Requirements", None) or getattr(
compliance_obj, "requirements", None
)
if not requirements:
return set()
check_ids: set[str] = set()
checks: set[str] = set()
requirements = getattr(compliance_obj, "Requirements", None) or []
try:
# Mock objects in unit tests return another Mock for any attribute
# access — truthy but not iterable. Treat that as "no checks".
for requirement in requirements:
requirement_checks = getattr(requirement, "Checks", None)
if requirement_checks is None:
checks_by_provider = getattr(requirement, "checks", None) or {}
requirement_checks = [
check_id
for check_ids_list in checks_by_provider.values()
for check_id in check_ids_list
]
# Defensive: Mock objects (used in unit tests) return another Mock
# for any attribute access, which is truthy but not iterable. Treat
# any non-iterable Requirements value as "no checks".
for req in requirements:
req_checks = getattr(req, "Checks", None) or []
try:
check_ids.update(requirement_checks)
checks.update(req_checks)
except TypeError:
continue
except TypeError:
return set()
return check_ids
return checks
+4 -78
View File
@@ -46,7 +46,6 @@ from tasks.jobs.lighthouse_providers import (
refresh_lighthouse_provider_models,
)
from tasks.jobs.muting import mute_historical_findings
from tasks.jobs.orphan_recovery import reconcile_orphans
from tasks.jobs.report import (
STALE_TMP_OUTPUT_MAX_AGE_HOURS,
_cleanup_stale_tmp_output_directories,
@@ -68,10 +67,7 @@ from tasks.utils import (
get_next_execution_datetime,
)
from api.compliance import (
get_compliance_frameworks,
get_prowler_provider_compliance,
)
from api.compliance import get_compliance_frameworks
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import delete_related_daily_task, rls_transaction
from api.decorators import handle_provider_deletion, set_tenant
@@ -79,9 +75,6 @@ from api.models import Finding, Integration, Provider, Scan, ScanSummary, StateC
from api.utils import initialize_prowler_provider
from api.v1.serializers import ScanTaskSerializer
from prowler.lib.check.compliance_models import Compliance
from prowler.lib.outputs.compliance.compliance import (
process_universal_compliance_frameworks,
)
from prowler.lib.outputs.compliance.generic.generic import GenericCompliance
from prowler.lib.outputs.finding import Finding as FindingOutput
@@ -469,42 +462,13 @@ def cleanup_stale_attack_paths_scans_task():
return cleanup_stale_attack_paths_scans()
@shared_task(name="reconcile-orphan-tasks", queue="celery")
def reconcile_orphan_tasks_task():
"""Periodic watchdog: recover tasks whose worker is gone (deploys, crashes)."""
return reconcile_orphans()
@shared_task(name="tenant-deletion", queue="deletion", autoretry_for=(Exception,))
def delete_tenant_task(tenant_id: str):
return delete_tenant(pk=tenant_id)
def _scan_tmp_output_directory(tenant_id: str, scan_id: str) -> Path:
"""Root tmp output directory for a scan ({tmp}/{tenant_id}/{scan_id})."""
return Path(DJANGO_TMP_OUTPUT_DIRECTORY) / str(tenant_id) / str(scan_id)
class ScanReportRLSTask(RLSTask):
"""
RLS task that removes the scan's tmp output directory when the task fails.
Covers failures both inside and outside the task body (e.g. ENOSPC mid-write,
or setup errors) so partial artifacts do not accumulate on the worker disk.
"""
def on_failure(self, exc, task_id, args, kwargs, _einfo): # noqa: ARG002
del args # Required by Celery's Task.on_failure signature; not used.
tenant_id = kwargs.get("tenant_id")
scan_id = kwargs.get("scan_id")
if tenant_id and scan_id:
logger.error(f"Scan report task {task_id} failed: {exc}")
rmtree(_scan_tmp_output_directory(tenant_id, scan_id), ignore_errors=True)
@shared_task(
base=ScanReportRLSTask,
base=RLSTask,
name="scan-report",
queue="scan-reports",
)
@@ -549,23 +513,11 @@ def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
provider_uid = provider_obj.uid
provider_type = provider_obj.provider
# Per-framework exporters in `COMPLIANCE_CLASS_MAP` consume the legacy bulk.
frameworks_bulk = Compliance.get_bulk(provider_type)
# Universal-only frameworks (top-level JSONs like `dora.json`) are emitted
# via `process_universal_compliance_frameworks` below.
universal_bulk = get_prowler_provider_compliance(provider_type)
universal_only_names = {
name
for name in universal_bulk
if name not in frameworks_bulk and universal_bulk[name].outputs
}
frameworks_avail = get_compliance_frameworks(provider_type)
out_dir, comp_dir = _generate_output_directory(
DJANGO_TMP_OUTPUT_DIRECTORY, provider_uid, tenant_id, scan_id
)
# Removed on success here and on failure by ScanReportRLSTask.on_failure,
# so partial artifacts do not accumulate and fill the disk (ENOSPC).
scan_tmp_dir = _scan_tmp_output_directory(tenant_id, scan_id)
def get_writer(writer_map, name, factory, is_last):
"""
@@ -583,10 +535,6 @@ def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
output_writers = {}
compliance_writers = {}
# Shared across batches so universal writers are created once and reused.
universal_compliance_state: dict[str, list] = {"compliance": []}
universal_base_dir = os.path.dirname(out_dir)
universal_output_filename = os.path.basename(out_dir)
scan_summary = FindingOutput._transform_findings_stats(
ScanSummary.objects.filter(scan_id=scan_id)
@@ -641,30 +589,8 @@ def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
writer.batch_write_data_to_file(**extra)
writer._data.clear()
# Universal-only frameworks (e.g. `dora.json`).
if universal_only_names:
process_universal_compliance_frameworks(
input_compliance_frameworks=universal_only_names,
universal_frameworks=universal_bulk,
finding_outputs=fos,
output_directory=universal_base_dir,
output_filename=universal_output_filename,
provider=provider_type,
generated_outputs=universal_compliance_state,
from_cli=False,
is_last=is_last,
)
# Compliance CSVs (per-framework exporters).
# Compliance CSVs
for name in frameworks_avail:
if name in universal_only_names:
continue
if name not in frameworks_bulk:
logger.warning(
"Compliance framework '%s' missing from bulk; skipping CSV export",
name,
)
continue
compliance_obj = frameworks_bulk[name]
klass = GenericCompliance
@@ -740,7 +666,7 @@ def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
# TODO: We need to create a new periodic task to delete the output files
# This task shouldn't be responsible for deleting the output files
try:
rmtree(scan_tmp_dir, ignore_errors=True)
rmtree(Path(compressed).parent, ignore_errors=True)
except Exception as e:
logger.error(f"Error deleting output files: {e}")
final_location, did_upload = upload_uri, True
+1 -39
View File
@@ -1,12 +1,11 @@
from unittest.mock import call, patch
from uuid import uuid4
import pytest
from django.core.exceptions import ObjectDoesNotExist
from tasks.jobs.deletion import delete_provider, delete_tenant
from api.attack_paths import database as graph_database
from api.models import JiraIssueDispatch, Provider, Tenant, TenantComplianceSummary
from api.models import Provider, Tenant, TenantComplianceSummary
@pytest.mark.django_db
@@ -35,43 +34,6 @@ class TestDeleteProvider:
str(instance.id),
)
def test_delete_provider_removes_jira_dispatches(
self,
providers_fixture,
findings_fixture,
integrations_fixture,
):
"""Deleting a provider removes JiraIssueDispatch rows for its findings only."""
instance = providers_fixture[0]
tenant_id = str(instance.tenant_id)
finding = findings_fixture[0]
integration = integrations_fixture[0]
# Dispatch for one of the provider's findings: must be removed with it.
JiraIssueDispatch.objects.create(
tenant_id=tenant_id,
integration=integration,
finding_id=finding.id,
)
# Dispatch for an unrelated finding: must survive the provider deletion.
unrelated = JiraIssueDispatch.objects.create(
tenant_id=tenant_id,
integration=integration,
finding_id=uuid4(),
)
with (
patch(
"tasks.jobs.deletion.graph_database.get_database_name",
return_value="tenant-db",
),
patch("tasks.jobs.deletion.graph_database.drop_subgraph"),
):
delete_provider(tenant_id, instance.id)
assert not JiraIssueDispatch.objects.filter(finding_id=finding.id).exists()
assert JiraIssueDispatch.objects.filter(pk=unrelated.pk).exists()
def test_delete_provider_does_not_exist(self, tenants_fixture):
with (
patch(
@@ -1640,74 +1640,14 @@ class TestJiraIntegration:
@patch("tasks.jobs.integrations.Finding")
@patch("tasks.jobs.integrations.Integration")
@patch("tasks.jobs.integrations.initialize_prowler_integration")
@patch("tasks.jobs.integrations.JiraIssueDispatch")
def test_send_findings_to_jira_skips_already_dispatched(
self,
mock_jira_dispatch,
mock_initialize_integration,
mock_integration_model,
mock_finding_model,
mock_rls_transaction,
):
"""A re-run skips findings already ticketed (no duplicate Jira issues)."""
mock_rls_transaction.return_value.__enter__ = MagicMock()
mock_rls_transaction.return_value.__exit__ = MagicMock()
mock_integration_model.objects.get.return_value = MagicMock()
# finding-1 was already dispatched in a prior run; finding-2 is new.
mock_jira_dispatch.objects.filter.return_value.values_list.return_value = [
"finding-1"
]
mock_jira_dispatch.objects.get_or_create.return_value = (MagicMock(), True)
mock_jira_integration = MagicMock()
mock_jira_integration.send_finding.return_value = True
mock_initialize_integration.return_value = mock_jira_integration
finding2 = MagicMock()
finding2.id = "finding-2"
finding2.check_id = "check_002"
finding2.severity = "low"
finding2.status = "FAIL"
finding2.status_extended = ""
finding2.compliance = {}
finding2.resources.exists.return_value = False
finding2.resources.first.return_value = None
finding2.scan.provider.provider = "aws"
finding2.check_metadata = {
"checktitle": "C2",
"risk": "",
"remediation": {"recommendation": {}, "code": {}},
}
mock_finding_model.all_objects.select_related.return_value.prefetch_related.return_value.get.return_value = finding2
result = send_findings_to_jira(
"tenant-123", "integration-456", "PROJ", "Task", ["finding-1", "finding-2"]
)
# finding-1 skipped (already sent); only finding-2 sent -> no duplicate.
assert result == {"created_count": 1, "failed_count": 0, "skipped_count": 1}
mock_jira_integration.send_finding.assert_called_once()
assert (
mock_jira_integration.send_finding.call_args.kwargs["check_id"]
== "check_002"
)
@patch("tasks.jobs.integrations.rls_transaction")
@patch("tasks.jobs.integrations.Finding")
@patch("tasks.jobs.integrations.Integration")
@patch("tasks.jobs.integrations.initialize_prowler_integration")
@patch("tasks.jobs.integrations.JiraIssueDispatch")
def test_send_findings_to_jira_success(
self,
mock_jira_dispatch,
mock_initialize_integration,
mock_integration_model,
mock_finding_model,
mock_rls_transaction,
):
"""Test successful sending of findings to Jira using send_finding method"""
mock_jira_dispatch.objects.filter.return_value.values_list.return_value = []
mock_jira_dispatch.objects.get_or_create.return_value = (MagicMock(), True)
tenant_id = "tenant-123"
integration_id = "integration-456"
project_key = "PROJ"
@@ -1799,7 +1739,7 @@ class TestJiraIntegration:
)
# Assertions
assert result == {"created_count": 2, "failed_count": 0, "skipped_count": 0}
assert result == {"created_count": 2, "failed_count": 0}
# Verify Jira integration was initialized
mock_initialize_integration.assert_called_once_with(integration)
@@ -1831,10 +1771,8 @@ class TestJiraIntegration:
@patch("tasks.jobs.integrations.Integration")
@patch("tasks.jobs.integrations.initialize_prowler_integration")
@patch("tasks.jobs.integrations.logger")
@patch("tasks.jobs.integrations.JiraIssueDispatch")
def test_send_findings_to_jira_partial_failure(
self,
mock_jira_dispatch,
mock_logger,
mock_initialize_integration,
mock_integration_model,
@@ -1842,8 +1780,6 @@ class TestJiraIntegration:
mock_rls_transaction,
):
"""Test partial failure when sending findings to Jira"""
mock_jira_dispatch.objects.filter.return_value.values_list.return_value = []
mock_jira_dispatch.objects.get_or_create.return_value = (MagicMock(), True)
tenant_id = "tenant-123"
integration_id = "integration-456"
project_key = "PROJ"
@@ -1897,35 +1833,23 @@ class TestJiraIntegration:
)
# Assertions
assert result == {"created_count": 2, "failed_count": 1, "skipped_count": 0}
assert result == {"created_count": 2, "failed_count": 1}
# Verify error was logged for the failed finding
mock_logger.error.assert_called_with("Failed to send finding finding-2 to Jira")
# The failed finding's reservation is released so a later run can retry it.
mock_jira_dispatch.objects.filter.assert_any_call(
tenant_id=tenant_id,
integration_id=integration_id,
finding_id="finding-2",
)
mock_jira_dispatch.objects.filter.return_value.delete.assert_called_once()
@patch("tasks.jobs.integrations.rls_transaction")
@patch("tasks.jobs.integrations.Finding")
@patch("tasks.jobs.integrations.Integration")
@patch("tasks.jobs.integrations.initialize_prowler_integration")
@patch("tasks.jobs.integrations.JiraIssueDispatch")
def test_send_findings_to_jira_no_resources(
self,
mock_jira_dispatch,
mock_initialize_integration,
mock_integration_model,
mock_finding_model,
mock_rls_transaction,
):
"""Test sending findings to Jira when finding has no resources"""
mock_jira_dispatch.objects.filter.return_value.values_list.return_value = []
mock_jira_dispatch.objects.get_or_create.return_value = (MagicMock(), True)
tenant_id = "tenant-123"
integration_id = "integration-456"
project_key = "PROJ"
@@ -1983,7 +1907,7 @@ class TestJiraIntegration:
)
# Assertions
assert result == {"created_count": 1, "failed_count": 0, "skipped_count": 0}
assert result == {"created_count": 1, "failed_count": 0}
# Verify send_finding was called with empty resource fields
call_kwargs = mock_jira_integration.send_finding.call_args.kwargs
@@ -1996,18 +1920,14 @@ class TestJiraIntegration:
@patch("tasks.jobs.integrations.Finding")
@patch("tasks.jobs.integrations.Integration")
@patch("tasks.jobs.integrations.initialize_prowler_integration")
@patch("tasks.jobs.integrations.JiraIssueDispatch")
def test_send_findings_to_jira_with_empty_check_metadata(
self,
mock_jira_dispatch,
mock_initialize_integration,
mock_integration_model,
mock_finding_model,
mock_rls_transaction,
):
"""Test sending findings to Jira when check_metadata is empty or missing fields"""
mock_jira_dispatch.objects.filter.return_value.values_list.return_value = []
mock_jira_dispatch.objects.get_or_create.return_value = (MagicMock(), True)
tenant_id = "tenant-123"
integration_id = "integration-456"
project_key = "PROJ"
@@ -2050,7 +1970,7 @@ class TestJiraIntegration:
)
# Assertions
assert result == {"created_count": 1, "failed_count": 0, "skipped_count": 0}
assert result == {"created_count": 1, "failed_count": 0}
# Verify send_finding was called with default/empty values
call_kwargs = mock_jira_integration.send_finding.call_args.kwargs
@@ -2063,94 +1983,3 @@ class TestJiraIntegration:
assert call_kwargs["remediation_code_cli"] == ""
assert call_kwargs["remediation_code_other"] == ""
assert call_kwargs["compliance"] == {}
@patch("tasks.jobs.integrations.rls_transaction")
@patch("tasks.jobs.integrations.Finding")
@patch("tasks.jobs.integrations.Integration")
@patch("tasks.jobs.integrations.initialize_prowler_integration")
@patch("tasks.jobs.integrations.JiraIssueDispatch")
def test_send_findings_to_jira_reserves_before_sending(
self,
mock_jira_dispatch,
mock_initialize_integration,
mock_integration_model,
mock_finding_model,
mock_rls_transaction,
):
"""The dispatch row is reserved before the external Jira call (reserve-then-act)."""
mock_rls_transaction.return_value.__enter__ = MagicMock()
mock_rls_transaction.return_value.__exit__ = MagicMock()
mock_integration_model.objects.get.return_value = MagicMock()
mock_jira_dispatch.objects.filter.return_value.values_list.return_value = []
order = []
mock_jira_dispatch.objects.get_or_create.side_effect = lambda **kw: (
order.append(("reserve", kw)) or (MagicMock(), True)
)
mock_jira_integration = MagicMock()
mock_jira_integration.send_finding.side_effect = lambda **kw: (
order.append(("send", kw)) or True
)
mock_initialize_integration.return_value = mock_jira_integration
finding = MagicMock()
finding.id = "finding-1"
finding.check_id = "check_001"
finding.severity = "low"
finding.status = "FAIL"
finding.status_extended = ""
finding.compliance = {}
finding.resources.exists.return_value = False
finding.resources.first.return_value = None
finding.scan.provider.provider = "aws"
finding.check_metadata = {
"checktitle": "C1",
"risk": "",
"remediation": {"recommendation": {}, "code": {}},
}
mock_finding_model.all_objects.select_related.return_value.prefetch_related.return_value.get.return_value = finding
result = send_findings_to_jira(
"tenant-123", "integration-456", "PROJ", "Task", ["finding-1"]
)
assert result == {"created_count": 1, "failed_count": 0, "skipped_count": 0}
# Reservation must precede the external send.
assert [entry[0] for entry in order] == ["reserve", "send"]
# A successful send keeps the reservation (no rollback delete).
mock_jira_dispatch.objects.filter.return_value.delete.assert_not_called()
@patch("tasks.jobs.integrations.rls_transaction")
@patch("tasks.jobs.integrations.Finding")
@patch("tasks.jobs.integrations.Integration")
@patch("tasks.jobs.integrations.initialize_prowler_integration")
@patch("tasks.jobs.integrations.JiraIssueDispatch")
def test_send_findings_to_jira_skips_when_already_reserved(
self,
mock_jira_dispatch,
mock_initialize_integration,
mock_integration_model,
mock_finding_model,
mock_rls_transaction,
):
"""A finding that races past the bulk pre-check but loses the reservation
(created=False) is skipped without a second issue, leaving the row intact."""
mock_rls_transaction.return_value.__enter__ = MagicMock()
mock_rls_transaction.return_value.__exit__ = MagicMock()
mock_integration_model.objects.get.return_value = MagicMock()
mock_jira_dispatch.objects.filter.return_value.values_list.return_value = []
# Another concurrent run already created the dispatch row.
mock_jira_dispatch.objects.get_or_create.return_value = (MagicMock(), False)
mock_jira_integration = MagicMock()
mock_initialize_integration.return_value = mock_jira_integration
result = send_findings_to_jira(
"tenant-123", "integration-456", "PROJ", "Task", ["finding-1"]
)
assert result == {"created_count": 0, "failed_count": 0, "skipped_count": 1}
mock_jira_integration.send_finding.assert_not_called()
# The reservation belongs to the run that won the race; do not delete it.
mock_jira_dispatch.objects.filter.return_value.delete.assert_not_called()
@@ -1,372 +0,0 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
from uuid import uuid4
import pytest
from celery import states
from django_celery_results.models import TaskResult
from api.models import Scan, StateChoices
from api.models import Task as APITask
from tasks.jobs.orphan_recovery import (
_decode_celery_field,
_reconcile_task_results,
_recovery_attempt_count,
_reenqueue_scan,
advisory_lock,
is_worker_alive,
)
def _orphan_result(*, name, kwargs, worker, created_minutes_ago, status=states.STARTED):
"""Create a TaskResult mimicking an in-flight task, backdated past the grace."""
tr = TaskResult.objects.create(
task_id=str(uuid4()),
status=status,
task_name=name,
worker=worker,
task_kwargs=repr(kwargs),
task_args=repr([]),
)
TaskResult.objects.filter(pk=tr.pk).update(
date_created=datetime.now(tz=timezone.utc)
- timedelta(minutes=created_minutes_ago)
)
tr.refresh_from_db()
return tr
@pytest.mark.django_db
class TestDecodeCeleryField:
def test_decodes_single_encoded_repr(self):
assert _decode_celery_field("{'tenant_id': 'abc'}", {}) == {"tenant_id": "abc"}
def test_decodes_double_encoded(self):
import json
stored = json.dumps(repr({"tenant_id": "abc", "scan_id": "s1"}))
assert _decode_celery_field(stored, {}) == {"tenant_id": "abc", "scan_id": "s1"}
def test_empty_returns_default(self):
assert _decode_celery_field(None, {}) == {}
assert _decode_celery_field("", []) == []
def test_unparseable_raises(self):
with pytest.raises(ValueError):
_decode_celery_field("<<not a literal>>", {})
@pytest.mark.django_db
class TestReconcileTaskResults:
def _patches(self, alive):
"""Patch worker liveness, revoke, and the task registry for re-enqueue."""
mock_app = MagicMock()
mock_task = MagicMock()
mock_app.tasks.get.return_value = mock_task
return (
patch("tasks.jobs.orphan_recovery.is_worker_alive", return_value=alive),
patch("tasks.jobs.orphan_recovery.revoke_task"),
patch("tasks.jobs.orphan_recovery.current_app", mock_app),
mock_task,
)
def test_recovers_non_scan_task(self, tenants_fixture):
"""A NON-scan task (tenant-deletion) left orphaned is re-enqueued too."""
tenant = tenants_fixture[0]
tr = _orphan_result(
name="tenant-deletion",
kwargs={"tenant_id": str(tenant.id)},
worker="dead@gone",
created_minutes_ago=60,
)
p_alive, p_revoke, p_app, mock_task = self._patches(alive=False)
with (
p_alive,
p_revoke,
p_app,
patch("tasks.jobs.orphan_recovery._recovery_attempt_count", return_value=1),
):
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
assert tr.task_id in result["recovered"]
tr.refresh_from_db()
assert tr.status == states.REVOKED # stale result cleared (no pending alert)
mock_task.apply_async.assert_called_once()
call = mock_task.apply_async.call_args.kwargs
assert call["kwargs"] == {"tenant_id": str(tenant.id)}
assert call["task_id"] != tr.task_id # fresh task id
def test_external_integration_task_is_not_reenqueued_by_default(
self, tenants_fixture
):
"""External side-effect tasks without proven idempotency stay terminal.
integration-s3 rebuilds its upload from worker-local files that do not
survive the crash, so re-enqueuing it would upload nothing.
"""
tr = _orphan_result(
name="integration-s3",
kwargs={
"tenant_id": str(tenants_fixture[0].id),
"provider_id": str(uuid4()),
"output_directory": "/tmp/gone",
},
worker="dead@gone",
created_minutes_ago=60,
)
p_alive, p_revoke, p_app, mock_task = self._patches(alive=False)
with (
p_alive,
p_revoke,
p_app,
patch("tasks.jobs.orphan_recovery._recovery_attempt_count", return_value=1),
):
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
assert tr.task_id in result["failed"]
mock_task.apply_async.assert_not_called()
def test_jira_integration_task_is_reenqueued(self, tenants_fixture):
"""integration-jira is re-enqueued: its JiraIssueDispatch reservation makes a
re-run skip already-ticketed findings, so recovery cannot duplicate issues."""
tenant = tenants_fixture[0]
kwargs = {
"tenant_id": str(tenant.id),
"integration_id": str(uuid4()),
"project_key": "PROWLER",
"issue_type": "Task",
"finding_ids": [str(uuid4()), str(uuid4())],
}
tr = _orphan_result(
name="integration-jira",
kwargs=kwargs,
worker="dead@gone",
created_minutes_ago=60,
)
p_alive, p_revoke, p_app, mock_task = self._patches(alive=False)
with (
p_alive,
p_revoke,
p_app,
patch("tasks.jobs.orphan_recovery._recovery_attempt_count", return_value=1),
):
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
assert tr.task_id in result["recovered"]
tr.refresh_from_db()
assert tr.status == states.REVOKED # stale result cleared (no pending alert)
mock_task.apply_async.assert_called_once()
call = mock_task.apply_async.call_args.kwargs
assert call["kwargs"] == kwargs
assert call["task_id"] != tr.task_id # fresh task id
def test_skips_live_worker(self, tenants_fixture):
tr = _orphan_result(
name="tenant-deletion",
kwargs={"tenant_id": str(tenants_fixture[0].id)},
worker="alive@host",
created_minutes_ago=60,
)
p_alive, p_revoke, p_app, mock_task = self._patches(alive=True)
with p_alive, p_revoke, p_app:
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
assert tr.task_id in result["skipped"]
mock_task.apply_async.assert_not_called()
def test_skips_recently_created(self, tenants_fixture):
tr = _orphan_result(
name="tenant-deletion",
kwargs={"tenant_id": str(tenants_fixture[0].id)},
worker="dead@gone",
created_minutes_ago=0,
)
p_alive, p_revoke, p_app, mock_task = self._patches(alive=False)
with p_alive, p_revoke, p_app:
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
# too recent: excluded by the grace window (not even a candidate)
assert tr.task_id not in result["recovered"]
mock_task.apply_async.assert_not_called()
def test_denylisted_task_failed_not_reenqueued(self, tenants_fixture):
"""A non-allowlisted task is failed, never blind re-run."""
tr = _orphan_result(
name="some-non-idempotent-task",
kwargs={"tenant_id": str(tenants_fixture[0].id)},
worker="dead@gone",
created_minutes_ago=60,
)
p_alive, p_revoke, p_app, mock_task = self._patches(alive=False)
with (
p_alive,
p_revoke,
p_app,
patch("tasks.jobs.orphan_recovery._recovery_attempt_count", return_value=1),
):
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
assert tr.task_id in result["failed"]
tr.refresh_from_db()
assert tr.status == states.REVOKED
mock_task.apply_async.assert_not_called()
def test_recovery_cap_marks_failed(self, tenants_fixture):
"""When the recovery counter exceeds the cap, the task is failed not re-run."""
tr = _orphan_result(
name="tenant-deletion",
kwargs={"tenant_id": str(tenants_fixture[0].id)},
worker="dead@gone",
created_minutes_ago=60,
)
p_alive, p_revoke, p_app, mock_task = self._patches(alive=False)
with (
p_alive,
p_revoke,
p_app,
patch("tasks.jobs.orphan_recovery._recovery_attempt_count", return_value=4),
):
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
assert tr.task_id in result["failed"]
mock_task.apply_async.assert_not_called()
@pytest.mark.django_db
class TestScanRecovery:
"""Scans are recovered by re-running scan-perform on the EXISTING scan row,
so even a scheduled-scan orphan (whose own task would no-op on its guard) is
actually re-executed."""
def _scan_orphan(self, tenant, provider, name):
old_id = str(uuid4())
tr = TaskResult.objects.create(
task_id=old_id,
status=states.STARTED,
task_name=name,
worker="dead@gone",
task_kwargs=repr(
{"tenant_id": str(tenant.id), "provider_id": str(provider.id)}
),
task_args=repr([]),
)
TaskResult.objects.filter(pk=tr.pk).update(
date_created=datetime.now(tz=timezone.utc) - timedelta(minutes=60)
)
APITask.objects.create(id=old_id, tenant_id=tenant.id, task_runner_task=tr)
scan = Scan.objects.create(
name="scan-orphan",
provider=provider,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
tenant_id=tenant.id,
task_id=old_id,
recovery_count=0,
)
return old_id, scan
@pytest.mark.parametrize("name", ["scan-perform", "scan-perform-scheduled"])
def test_scan_recovered_via_scan_perform(
self, tenants_fixture, providers_fixture, name
):
tenant, provider = tenants_fixture[0], providers_fixture[0]
old_id, scan = self._scan_orphan(tenant, provider, name)
with (
patch("tasks.jobs.orphan_recovery.is_worker_alive", return_value=False),
patch("tasks.jobs.orphan_recovery.revoke_task"),
patch("tasks.jobs.orphan_recovery._recovery_attempt_count", return_value=1),
patch("tasks.tasks.perform_scan_task") as mock_scan_task,
):
result = _reconcile_task_results(
grace_minutes=2, max_attempts=3, window_hours=6, dry_run=False
)
assert old_id in result["recovered"]
scan.refresh_from_db()
assert str(scan.task_id) != old_id # relinked to a fresh task
assert scan.recovery_count == 1
assert TaskResult.objects.get(task_id=old_id).status == states.REVOKED
# Recovered by re-running scan-perform on the existing scan row (so the
# scheduled guard cannot no-op it), regardless of the original task name.
mock_scan_task.apply_async.assert_called_once()
assert mock_scan_task.apply_async.call_args.kwargs["kwargs"]["scan_id"] == str(
scan.id
)
def test_reenqueue_skips_when_scan_already_repointed(
self, tenants_fixture, providers_fixture
):
# The scan already points at a newer task, so a stale orphan must not relink
# it or launch a second concurrent run against the same scan row.
tenant, provider = tenants_fixture[0], providers_fixture[0]
newer_id = str(uuid4())
tr = TaskResult.objects.create(
task_id=newer_id, status=states.STARTED, task_name="scan-perform"
)
APITask.objects.create(id=newer_id, tenant_id=tenant.id, task_runner_task=tr)
scan = Scan.objects.create(
name="scan-orphan",
provider=provider,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
tenant_id=tenant.id,
task_id=newer_id,
recovery_count=0,
)
with patch("tasks.tasks.perform_scan_task") as mock_scan_task:
recovered = _reenqueue_scan(str(uuid4()), scan)
assert recovered is False
mock_scan_task.apply_async.assert_not_called()
scan.refresh_from_db()
assert scan.recovery_count == 0
@pytest.mark.django_db
class TestOrphanRecoveryHelpers:
def test_advisory_lock_acquires_and_releases(self):
with advisory_lock() as acquired:
assert acquired is True
def test_is_worker_alive_true_when_responds(self):
inspect = MagicMock()
inspect.ping.return_value = {"w@h": {"ok": "pong"}}
with patch(
"tasks.jobs.orphan_recovery.current_app.control.inspect",
return_value=inspect,
):
assert is_worker_alive("w@h") is True
def test_is_worker_alive_false_when_silent(self):
inspect = MagicMock()
inspect.ping.return_value = None
with patch(
"tasks.jobs.orphan_recovery.current_app.control.inspect",
return_value=inspect,
):
assert is_worker_alive("w@h") is False
def test_recovery_attempt_count_increments(self):
# Unique signature so the Valkey counter starts fresh for this test.
kwargs_repr = repr({"probe": str(uuid4())})
redis_client = MagicMock()
redis_client.incr.side_effect = [1, 2]
with patch("redis.from_url", return_value=redis_client):
assert _recovery_attempt_count("probe-task", kwargs_repr, 6) == 1
assert _recovery_attempt_count("probe-task", kwargs_repr, 6) == 2
@@ -80,7 +80,7 @@ def basic_csa_compliance_data():
tenant_id="tenant-123",
scan_id="scan-456",
provider_id="provider-789",
compliance_id="csa_ccm_4.0",
compliance_id="csa_ccm_4.0_aws",
framework="CSA-CCM",
name="CSA Cloud Controls Matrix v4.0",
version="4.0",

Some files were not shown because too many files have changed in this diff Show More