Compare commits

..

4 Commits

Author SHA1 Message Date
Prowler Bot
c172f75f1a fix(dep): address compatibility issues (#6557)
Co-authored-by: Pablo Lara <larabjj@gmail.com>
2025-01-16 14:35:06 +01:00
Prowler Bot
ec492fa13a feat(filters): add resource type filter for findings (#6525)
Co-authored-by: Pablo Lara <larabjj@gmail.com>
2025-01-15 08:43:49 +01:00
Prowler Bot
702659959c fix(Azure TDE): add filter for master DB (#6514)
Co-authored-by: johannes-engler-mw <132657752+johannes-engler-mw@users.noreply.github.com>
2025-01-14 15:25:27 -05:00
Pepe Fagoaga
fef332a591 chore(version): set next fixes 2025-01-14 18:05:04 +01:00
2854 changed files with 36348 additions and 214164 deletions

45
.env
View File

@@ -3,8 +3,8 @@
# For production, it is recommended to use a secure method to store these variables and change the default secret keys.
#### Prowler UI Configuration ####
PROWLER_UI_VERSION="stable"
AUTH_URL=http://localhost:3000
PROWLER_UI_VERSION="latest"
SITE_URL=http://localhost:3000
API_BASE_URL=http://prowler-api:8080/api/v1
NEXT_PUBLIC_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
AUTH_TRUST_HOST=true
@@ -30,30 +30,6 @@ VALKEY_HOST=valkey
VALKEY_PORT=6379
VALKEY_DB=0
# API scan settings
# The path to the directory where scan output should be stored
DJANGO_TMP_OUTPUT_DIRECTORY="/tmp/prowler_api_output"
# The maximum number of findings to process in a single batch
DJANGO_FINDINGS_BATCH_SIZE=1000
# The AWS access key to be used when uploading scan output to an S3 bucket
# If left empty, default AWS credentials resolution behavior will be used
DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID=""
# The AWS secret key to be used when uploading scan output to an S3 bucket
DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY=""
# An optional AWS session token
DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN=""
# The AWS region where your S3 bucket is located (e.g., "us-east-1")
DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION=""
# The name of the S3 bucket where scan output should be stored
DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET=""
# Django settings
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1,prowler-api
DJANGO_BIND_ADDRESS=0.0.0.0
@@ -116,20 +92,3 @@ jQIDAQAB
# openssl rand -base64 32
DJANGO_SECRETS_ENCRYPTION_KEY="oE/ltOhp/n1TdbHjVmzcjDPLcLA41CVI/4Rk+UB5ESc="
DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
DJANGO_SENTRY_DSN=
# Sentry settings
SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.6.0
# Social login credentials
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
SOCIAL_GOOGLE_OAUTH_CLIENT_ID=""
SOCIAL_GOOGLE_OAUTH_CLIENT_SECRET=""
SOCIAL_GITHUB_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/github"
SOCIAL_GITHUB_OAUTH_CLIENT_ID=""
SOCIAL_GITHUB_OAUTH_CLIENT_SECRET=""

156
.github/dependabot.yml vendored
View File

@@ -9,112 +9,108 @@ updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 25
interval: "daily"
open-pull-requests-limit: 10
target-branch: master
labels:
- "dependencies"
- "pip"
# Dependabot Updates are temporary disabled - 2025/03/19
# - package-ecosystem: "pip"
# directory: "/api"
# schedule:
# interval: "daily"
# open-pull-requests-limit: 10
# target-branch: master
# labels:
# - "dependencies"
# - "pip"
# - "component/api"
- package-ecosystem: "pip"
directory: "/api"
schedule:
interval: "daily"
open-pull-requests-limit: 10
target-branch: master
labels:
- "dependencies"
- "pip"
- "component/api"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 25
interval: "daily"
open-pull-requests-limit: 10
target-branch: master
labels:
- "dependencies"
- "github_actions"
# Dependabot Updates are temporary disabled - 2025/03/19
# - package-ecosystem: "npm"
# directory: "/ui"
# schedule:
# interval: "daily"
# open-pull-requests-limit: 10
# target-branch: master
# labels:
# - "dependencies"
# - "npm"
# - "component/ui"
- package-ecosystem: "npm"
directory: "/ui"
schedule:
interval: "daily"
open-pull-requests-limit: 10
target-branch: master
labels:
- "dependencies"
- "npm"
- "component/ui"
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 25
interval: "weekly"
open-pull-requests-limit: 10
target-branch: master
labels:
- "dependencies"
- "docker"
# Dependabot Updates are temporary disabled - 2025/04/15
# v4.6
# - package-ecosystem: "pip"
# directory: "/"
# schedule:
# interval: "weekly"
# open-pull-requests-limit: 10
# target-branch: v4.6
# labels:
# - "dependencies"
# - "pip"
# - "v4"
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 10
target-branch: v4.6
labels:
- "dependencies"
- "pip"
- "v4"
# - package-ecosystem: "github-actions"
# directory: "/"
# schedule:
# interval: "weekly"
# open-pull-requests-limit: 10
# target-branch: v4.6
# labels:
# - "dependencies"
# - "github_actions"
# - "v4"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 10
target-branch: v4.6
labels:
- "dependencies"
- "github_actions"
- "v4"
# - package-ecosystem: "docker"
# directory: "/"
# schedule:
# interval: "weekly"
# open-pull-requests-limit: 10
# target-branch: v4.6
# labels:
# - "dependencies"
# - "docker"
# - "v4"
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 10
target-branch: v4.6
labels:
- "dependencies"
- "docker"
- "v4"
# Dependabot Updates are temporary disabled - 2025/03/19
# v3
# - package-ecosystem: "pip"
# directory: "/"
# schedule:
# interval: "monthly"
# open-pull-requests-limit: 10
# target-branch: v3
# labels:
# - "dependencies"
# - "pip"
# - "v3"
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 10
target-branch: v3
labels:
- "dependencies"
- "pip"
- "v3"
# - package-ecosystem: "github-actions"
# directory: "/"
# schedule:
# interval: "monthly"
# open-pull-requests-limit: 10
# target-branch: v3
# labels:
# - "dependencies"
# - "github_actions"
# - "v3"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 10
target-branch: v3
labels:
- "dependencies"
- "github_actions"
- "v3"

10
.github/labeler.yml vendored
View File

@@ -92,13 +92,3 @@ component/api:
component/ui:
- changed-files:
- any-glob-to-any-file: "ui/**"
compliance:
- changed-files:
- any-glob-to-any-file: "prowler/compliance/**"
- any-glob-to-any-file: "prowler/lib/outputs/compliance/**"
- any-glob-to-any-file: "tests/lib/outputs/compliance/**"
review-django-migrations:
- changed-files:
- any-glob-to-any-file: "api/src/backend/api/migrations/**"

View File

@@ -15,14 +15,7 @@ Please include a summary of the change and which issue is fixed. List any depend
- [ ] Review if the code is being covered by tests.
- [ ] Review if code is being documented following this specification https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings
- [ ] Review if backport is needed.
- [ ] Review if is needed to change the [Readme.md](https://github.com/prowler-cloud/prowler/blob/master/README.md)
- [ ] Ensure new entries are added to [CHANGELOG.md](https://github.com/prowler-cloud/prowler/blob/master/prowler/CHANGELOG.md), if applicable.
#### API
- [ ] Verify if API specs need to be regenerated.
- [ ] Check if version updates are required (e.g., specs, Poetry, etc.).
- [ ] Ensure new entries are added to [CHANGELOG.md](https://github.com/prowler-cloud/prowler/blob/master/api/CHANGELOG.md), if applicable.
### License
By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.

View File

@@ -61,40 +61,33 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set short git commit SHA
id: vars
run: |
shortSha=$(git rev-parse --short ${{ github.sha }})
echo "SHORT_SHA=${shortSha}" >> $GITHUB_ENV
uses: actions/checkout@v4
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
uses: docker/setup-buildx-action@v3
- name: Build and push container image (latest)
# Comment the following line for testing
if: github.event_name == 'push'
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
# Set push: false for testing
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.LATEST_TAG }}
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.SHORT_SHA }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
push: true
@@ -103,12 +96,3 @@ jobs:
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.STABLE_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Trigger deployment
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0 # v3.0.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: prowler-api-deploy
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ env.SHORT_SHA }}"}'

View File

@@ -44,16 +44,16 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/api-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -6,7 +6,6 @@ on:
- "master"
- "v5.*"
paths:
- ".github/workflows/api-pull-request.yml"
- "api/**"
pull_request:
branches:
@@ -15,6 +14,7 @@ on:
paths:
- "api/**"
env:
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432
@@ -26,8 +26,7 @@ env:
VALKEY_HOST: localhost
VALKEY_PORT: 6379
VALKEY_DB: 0
API_WORKING_DIR: ./api
IMAGE_NAME: prowler-api
jobs:
test:
@@ -71,11 +70,11 @@ jobs:
--health-retries 5
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
- name: Test if changes are in not ignored paths
id: are-non-ignored-files-changed
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
uses: tj-actions/changed-files@v45
with:
files: api/**
files_ignore: |
@@ -85,30 +84,16 @@ jobs:
api/README.md
api/mkdocs.yml
- name: Replace @master with current branch in pyproject.toml
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
BRANCH_NAME="${GITHUB_HEAD_REF:-${GITHUB_REF_NAME}}"
echo "Using branch: $BRANCH_NAME"
sed -i "s|@master|@$BRANCH_NAME|g" pyproject.toml
- name: Install poetry
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
python -m pip install --upgrade pip
pipx install poetry==2.1.1
- name: Update poetry.lock after the branch name change
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry lock
pipx install poetry==1.8.5
- name: Set up Python ${{ matrix.python-version }}
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
@@ -117,7 +102,7 @@ jobs:
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry install --no-root
poetry install
poetry run pip list
VERSION=$(curl --silent "https://api.github.com/repos/hadolint/hadolint/releases/latest" | \
grep '"tag_name":' | \
@@ -159,7 +144,7 @@ jobs:
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry run safety check --ignore 70612,66963,74429
poetry run safety check --ignore 70612,66963
- name: Vulture
working-directory: ./api
@@ -181,23 +166,8 @@ jobs:
- name: Upload coverage reports to Codecov
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: codecov/codecov-action@ad3126e916f78f00edff4ed0317cf185271ccc2d # v5.4.2
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: api
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.API_WORKING_DIR }}
push: false
tags: ${{ env.IMAGE_NAME }}:latest
outputs: type=docker
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -23,7 +23,7 @@ jobs:
steps:
- name: Check labels
id: preview_label_check
uses: agilepathway/label-checker@c3d16ad512e7cea5961df85ff2486bb774caf3c5 # v1.6.65
uses: docker://agilepathway/pull-request-label-checker:v1.6.55
with:
allow_failure: true
prefix_mode: true
@@ -33,7 +33,7 @@ jobs:
- name: Backport Action
if: steps.preview_label_check.outputs.label_check == 'success'
uses: sorenlouv/backport-github-action@ad888e978060bc1b2798690dd9d03c4036560947 # v9.5.1
uses: sorenlouv/backport-github-action@v9.5.1
with:
github_token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
auto_backport_label_prefix: ${{ env.BACKPORT_LABEL_PREFIX }}

View File

@@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Leave PR comment with the Prowler Documentation URI
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
uses: peter-evans/create-or-update-comment@v4
with:
issue-number: ${{ env.PR_NUMBER }}
body: |

View File

@@ -1,23 +0,0 @@
name: Prowler - Conventional Commit
on:
pull_request:
types:
- "opened"
- "edited"
- "synchronize"
branches:
- "master"
- "v3"
- "v4.*"
- "v5.*"
jobs:
conventional-commit-check:
runs-on: ubuntu-latest
steps:
- name: conventional-commit-check
id: conventional-commit-check
uses: agenthunt/conventional-commit-checker-action@9e552d650d0e205553ec7792d447929fc78e012b # v2.0.0
with:
pr-title-regex: '^([^\s(]+)(?:\(([^)]+)\))?: (.+)'

View File

@@ -7,11 +7,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: TruffleHog OSS
uses: trufflesecurity/trufflehog@b06f6d72a3791308bb7ba59c2b8cb7a083bd17e4 # v3.88.26
uses: trufflesecurity/trufflehog@v3.88.2
with:
path: ./
base: ${{ github.event.repository.default_branch }}

View File

@@ -14,4 +14,4 @@ jobs:
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
- uses: actions/labeler@v5

View File

@@ -1,34 +0,0 @@
name: Prowler - Merged Pull Request
on:
pull_request_target:
branches: ['master']
types: ['closed']
jobs:
trigger-cloud-pull-request:
name: Trigger Cloud Pull Request
if: github.event.pull_request.merged == true && github.repository == 'prowler-cloud/prowler'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set short git commit SHA
id: vars
run: |
shortSha=$(git rev-parse --short ${{ github.sha }})
echo "SHORT_SHA=${shortSha}" >> $GITHUB_ENV
- name: Trigger pull request
uses: peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0 # v3.0.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: prowler-pull-request-merged
client-payload: '{
"PROWLER_COMMIT_SHA": "${{ github.sha }}",
"PROWLER_COMMIT_SHORT_SHA": "${{ env.SHORT_SHA }}",
"PROWLER_PR_TITLE": "${{ github.event.pull_request.title }}",
"PROWLER_PR_LABELS": ${{ toJson(github.event.pull_request.labels.*.name) }},
"PROWLER_PR_BODY": ${{ toJson(github.event.pull_request.body) }}
}'

View File

@@ -59,16 +59,16 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Poetry
run: |
pipx install poetry==2.*
pipx install poetry==1.8.5
pipx inject poetry poetry-bumpversion
- name: Get Prowler version
@@ -108,13 +108,13 @@ jobs:
esac
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to Public ECR
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
registry: public.ecr.aws
username: ${{ secrets.PUBLIC_ECR_AWS_ACCESS_KEY_ID }}
@@ -123,11 +123,11 @@ jobs:
AWS_REGION: ${{ env.AWS_REGION }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
uses: docker/setup-buildx-action@v3
- name: Build and push container image (latest)
if: github.event_name == 'push'
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
uses: docker/build-push-action@v6
with:
push: true
tags: |
@@ -140,7 +140,7 @@ jobs:
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
uses: docker/build-push-action@v6
with:
# Use local context to get changes
# https://github.com/docker/build-push-action#path-context

View File

@@ -1,94 +0,0 @@
name: SDK - Bump Version
on:
release:
types: [published]
env:
PROWLER_VERSION: ${{ github.event.release.tag_name }}
BASE_BRANCH: master
jobs:
bump-version:
name: Bump Version
if: github.repository == 'prowler-cloud/prowler'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Get Prowler version
shell: bash
run: |
if [[ $PROWLER_VERSION =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)$ ]]; then
MAJOR_VERSION=${BASH_REMATCH[1]}
MINOR_VERSION=${BASH_REMATCH[2]}
FIX_VERSION=${BASH_REMATCH[3]}
if (( MAJOR_VERSION == 5 )); then
if (( FIX_VERSION == 0 )); then
echo "Minor Release: $PROWLER_VERSION"
BUMP_VERSION_TO=${MAJOR_VERSION}.$((MINOR_VERSION + 1)).${FIX_VERSION}
echo "BUMP_VERSION_TO=${BUMP_VERSION_TO}" >> "${GITHUB_ENV}"
TARGET_BRANCH=${BASE_BRANCH}
echo "TARGET_BRANCH=${TARGET_BRANCH}" >> "${GITHUB_ENV}"
echo "Bumping to next minor version: ${BUMP_VERSION_TO} in branch ${TARGET_BRANCH}"
else
echo "Patch Release: $PROWLER_VERSION"
BUMP_VERSION_TO=${MAJOR_VERSION}.${MINOR_VERSION}.$((FIX_VERSION + 1))
echo "BUMP_VERSION_TO=${BUMP_VERSION_TO}" >> "${GITHUB_ENV}"
TARGET_BRANCH=v${MAJOR_VERSION}.${MINOR_VERSION}
echo "TARGET_BRANCH=${TARGET_BRANCH}" >> "${GITHUB_ENV}"
echo "Bumping to next patch version: ${BUMP_VERSION_TO} in branch ${TARGET_BRANCH}"
fi
else
echo "Releasing another Prowler major version, aborting..."
exit 1
fi
else
echo "Invalid version syntax: '$PROWLER_VERSION' (must be N.N.N)" >&2
exit 1
fi
- name: Bump versions in files
run: |
echo "Using PROWLER_VERSION=$PROWLER_VERSION"
echo "Using BUMP_VERSION_TO=$BUMP_VERSION_TO"
set -e
echo "Bumping version in pyproject.toml ..."
sed -i "s|version = \"${PROWLER_VERSION}\"|version = \"${BUMP_VERSION_TO}\"|" pyproject.toml
echo "Bumping version in prowler/config/config.py ..."
sed -i "s|prowler_version = \"${PROWLER_VERSION}\"|prowler_version = \"${BUMP_VERSION_TO}\"|" prowler/config/config.py
echo "Bumping version in .env ..."
sed -i "s|NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v${PROWLER_VERSION}|NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v${BUMP_VERSION_TO}|" .env
git --no-pager diff
- name: Create Pull Request
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
with:
author: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
base: ${{ env.TARGET_BRANCH }}
commit-message: "chore(release): Bump version to v${{ env.BUMP_VERSION_TO }}"
branch: "version-bump-to-v${{ env.BUMP_VERSION_TO }}"
title: "chore(release): Bump version to v${{ env.BUMP_VERSION_TO }}"
body: |
### Description
Bump Prowler version to v${{ env.BUMP_VERSION_TO }}
### License
By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.

View File

@@ -21,7 +21,6 @@ on:
paths-ignore:
- 'ui/**'
- 'api/**'
- '.github/**'
pull_request:
branches:
- "master"
@@ -31,7 +30,6 @@ on:
paths-ignore:
- 'ui/**'
- 'api/**'
- '.github/**'
schedule:
- cron: '00 12 * * *'
@@ -52,16 +50,16 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/sdk-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -21,11 +21,11 @@ jobs:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
- name: Test if changes are in not ignored paths
id: are-non-ignored-files-changed
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
uses: tj-actions/changed-files@v45
with:
files: ./**
files_ignore: |
@@ -34,24 +34,21 @@ jobs:
permissions/**
api/**
ui/**
prowler/CHANGELOG.md
README.md
mkdocs.yml
.backportrc.json
.env
docker-compose*
examples/**
.gitignore
- name: Install poetry
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
python -m pip install --upgrade pip
pipx install poetry==2.1.1
pipx install poetry==1.8.5
- name: Set up Python ${{ matrix.python-version }}
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
@@ -59,7 +56,7 @@ jobs:
- name: Install dependencies
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry install --no-root
poetry install
poetry run pip list
VERSION=$(curl --silent "https://api.github.com/repos/hadolint/hadolint/releases/latest" | \
grep '"tag_name":' | \
@@ -107,113 +104,15 @@ jobs:
run: |
/tmp/hadolint Dockerfile --ignore=DL3013
# Test AWS
- name: AWS - Check if any file has changed
id: aws-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/aws/**
./tests/providers/aws/**
.poetry.lock
- name: AWS - Test
if: steps.aws-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/aws --cov-report=xml:aws_coverage.xml tests/providers/aws
# Test Azure
- name: Azure - Check if any file has changed
id: azure-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/azure/**
./tests/providers/azure/**
.poetry.lock
- name: Azure - Test
if: steps.azure-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/azure --cov-report=xml:azure_coverage.xml tests/providers/azure
# Test GCP
- name: GCP - Check if any file has changed
id: gcp-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/gcp/**
./tests/providers/gcp/**
.poetry.lock
- name: GCP - Test
if: steps.gcp-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/gcp --cov-report=xml:gcp_coverage.xml tests/providers/gcp
# Test Kubernetes
- name: Kubernetes - Check if any file has changed
id: kubernetes-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/kubernetes/**
./tests/providers/kubernetes/**
.poetry.lock
- name: Kubernetes - Test
if: steps.kubernetes-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/kubernetes --cov-report=xml:kubernetes_coverage.xml tests/providers/kubernetes
# Test NHN
- name: NHN - Check if any file has changed
id: nhn-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/nhn/**
./tests/providers/nhn/**
.poetry.lock
- name: NHN - Test
if: steps.nhn-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/nhn --cov-report=xml:nhn_coverage.xml tests/providers/nhn
# Test M365
- name: M365 - Check if any file has changed
id: m365-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/m365/**
./tests/providers/m365/**
.poetry.lock
- name: M365 - Test
if: steps.m365-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/m365 --cov-report=xml:m365_coverage.xml tests/providers/m365
# Common Tests
- name: Lib - Test
- name: Test with pytest
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/lib --cov-report=xml:lib_coverage.xml tests/lib
poetry run pytest -n auto --cov=./prowler --cov-report=xml tests
- name: Config - Test
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/config --cov-report=xml:config_coverage.xml tests/config
# Codecov
- name: Upload coverage reports to Codecov
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: codecov/codecov-action@ad3126e916f78f00edff4ed0317cf185271ccc2d # v5.4.2
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: prowler
files: ./aws_coverage.xml,./azure_coverage.xml,./gcp_coverage.xml,./kubernetes_coverage.xml,./nhn_coverage.xml,./m365_coverage.xml,./lib_coverage.xml,./config_coverage.xml

View File

@@ -7,7 +7,7 @@ on:
env:
RELEASE_TAG: ${{ github.event.release.tag_name }}
PYTHON_VERSION: 3.11
# CACHE: "poetry"
CACHE: "poetry"
jobs:
repository-check:
@@ -64,17 +64,17 @@ jobs:
;;
esac
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
- name: Install dependencies
run: |
pipx install poetry==2.1.1
pipx install poetry==1.8.5
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
# cache: ${{ env.CACHE }}
cache: ${{ env.CACHE }}
- name: Build Prowler package
run: |

View File

@@ -4,7 +4,7 @@ name: SDK - Refresh AWS services' regions
on:
schedule:
- cron: "0 9 * * 1" # runs at 09:00 UTC every Monday
- cron: "0 9 * * *" #runs at 09:00 UTC everyday
env:
GITHUB_BRANCH: "master"
@@ -23,12 +23,12 @@ jobs:
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
with:
ref: ${{ env.GITHUB_BRANCH }}
- name: setup python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
uses: actions/setup-python@v5
with:
python-version: 3.9 #install the python needed
@@ -38,7 +38,7 @@ jobs:
pip install boto3
- name: Configure AWS Credentials -- DEV
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: ${{ env.AWS_REGION_DEV }}
role-to-assume: ${{ secrets.DEV_IAM_ROLE_ARN }}
@@ -50,13 +50,12 @@ jobs:
# Create pull request
- name: Create Pull Request
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
uses: peter-evans/create-pull-request@v7
with:
author: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
commit-message: "feat(regions_update): Update regions for AWS services"
branch: "aws-services-regions-updated-${{ github.sha }}"
labels: "status/waiting-for-revision, severity/low, provider/aws"
labels: "status/waiting-for-revision, severity/low, provider/aws, backport-to-v3"
title: "chore(regions_update): Changes in regions for AWS services"
body: |
### Description

View File

@@ -61,58 +61,38 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set short git commit SHA
id: vars
run: |
shortSha=$(git rev-parse --short ${{ github.sha }})
echo "SHORT_SHA=${shortSha}" >> $GITHUB_ENV
uses: actions/checkout@v4
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
uses: docker/setup-buildx-action@v3
- name: Build and push container image (latest)
# Comment the following line for testing
if: github.event_name == 'push'
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=${{ env.SHORT_SHA }}
# Set push: false for testing
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.LATEST_TAG }}
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.SHORT_SHA }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v${{ env.RELEASE_TAG }}
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.RELEASE_TAG }}
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.STABLE_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Trigger deployment
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0 # v3.0.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: prowler-ui-deploy
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ env.SHORT_SHA }}"}'

View File

@@ -44,16 +44,16 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/ui-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -1,80 +0,0 @@
name: UI - E2E Tests
on:
pull_request:
branches:
- master
- "v5.*"
paths:
- 'ui/**'
env:
SERVICES_TO_START: "api-dev postgres valkey worker-beat worker-dev"
DOCKER_COMPOSE_FILE: "docker-compose-dev.yml"
jobs:
e2e:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 20
cache: 'npm'
cache-dependency-path: './ui/package-lock.json'
# - name: Cache Playwright Browsers
# uses: actions/cache@v4
# with:
# path: ~/.cache/ms-playwright
# key: playwright-${{ runner.os }}-${{ hashFiles('**/package-lock.json') }}
# restore-keys: |
# playwright-${{ runner.os }}-
- name: Install dependencies
run: npm ci
working-directory: ./ui
- name: Install Playwright Browsers
run: npx playwright install --with-deps
working-directory: ./ui
- name: Set up Docker Compose
uses: docker/setup-compose-action@364cc21a5de5b1ee4a7f5f9d3fa374ce0ccde746 #v1.2.0
- name: Start Docker Compose
run: docker compose -f ${DOCKER_COMPOSE_FILE} up -d ${SERVICES_TO_START}
- name: Wait for API to be ready
run: |
for i in {1..30}; do
if curl -s http://localhost:8000/api/v1; then
echo "API is up!"
break
fi
echo "Waiting for API..."
sleep 5
done
- name: Run Playwright tests
run: npx playwright test
working-directory: ./ui
- name: Upload Playwright report
uses: actions/upload-artifact@v4
with:
name: playwright-report
path: ./ui/playwright-report
- name: Upload Playwright videos
uses: actions/upload-artifact@v4
with:
name: test-videos
path: ./ui/test-results/**/*.webm
- name: Docker Compose Down
if: always()
run: docker compose -f ${DOCKER_COMPOSE_FILE} down

View File

@@ -6,7 +6,6 @@ on:
- "master"
- "v5.*"
paths:
- ".github/workflows/ui-pull-request.yml"
- "ui/**"
pull_request:
branches:
@@ -14,9 +13,6 @@ on:
- "v5.*"
paths:
- 'ui/**'
env:
UI_WORKING_DIR: ./ui
IMAGE_NAME: prowler-ui
jobs:
test-and-coverage:
@@ -27,11 +23,11 @@ jobs:
node-version: [20.x]
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Setup Node.js ${{ matrix.node-version }}
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- name: Install dependencies
@@ -43,20 +39,3 @@ jobs:
- name: Build the application
working-directory: ./ui
run: npm run build
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.UI_WORKING_DIR }}
# Always build using `prod` target
target: prod
push: false
tags: ${{ env.IMAGE_NAME }}:latest
outputs: type=docker
build-args: |
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=pk_test_51LwpXXXX

15
.gitignore vendored
View File

@@ -31,7 +31,7 @@ tags
*.DS_Store
# Prowler output
/output
output/
# Prowler found secrets
secrets-*/
@@ -42,19 +42,13 @@ junit-reports/
# VSCode files
.vscode/
# Cursor files
.cursorignore
.cursor/
# Terraform
.terraform*
*.tfstate
*.tfstate.*
# .env
ui/.env*
api/.env*
.env.local
# Coverage
.coverage*
@@ -66,10 +60,3 @@ node_modules
# Persistent data
_data/
# Playwright
node_modules/
/test-results/
/playwright-report/
/blob-report/
/playwright/.cache/

View File

@@ -27,7 +27,6 @@ repos:
hooks:
- id: shellcheck
exclude: contrib
## PYTHON
- repo: https://github.com/myint/autoflake
rev: v2.3.1
@@ -59,28 +58,11 @@ repos:
args: ["--ignore=E266,W503,E203,E501,W605"]
- repo: https://github.com/python-poetry/poetry
rev: 2.1.1
rev: 1.8.0
hooks:
- id: poetry-check
name: API - poetry-check
args: ["--directory=./api"]
pass_filenames: false
- id: poetry-lock
name: API - poetry-lock
args: ["--directory=./api"]
pass_filenames: false
- id: poetry-check
name: SDK - poetry-check
args: ["--directory=./"]
pass_filenames: false
- id: poetry-lock
name: SDK - poetry-lock
args: ["--directory=./"]
pass_filenames: false
args: ["--no-update"]
- repo: https://github.com/hadolint/hadolint
rev: v2.13.0-beta
@@ -115,7 +97,7 @@ repos:
- id: safety
name: safety
description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities"
entry: bash -c 'safety check --ignore 70612,66963,74429'
entry: bash -c 'safety check --ignore 70612,66963'
language: system
- id: vulture

View File

@@ -1,64 +1,38 @@
FROM python:3.12.10-slim-bookworm AS build
FROM python:3.12.8-alpine3.20
LABEL maintainer="https://github.com/prowler-cloud/prowler"
LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
ARG POWERSHELL_VERSION=7.5.0
# hadolint ignore=DL3008
RUN apt-get update && apt-get install -y --no-install-recommends wget libicu72 \
&& rm -rf /var/lib/apt/lists/*
# Install PowerShell
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
wget --progress=dot:giga https://github.com/PowerShell/PowerShell/releases/download/v${POWERSHELL_VERSION}/powershell-${POWERSHELL_VERSION}-linux-x64.tar.gz -O /tmp/powershell.tar.gz ; \
elif [ "$ARCH" = "aarch64" ]; then \
wget --progress=dot:giga https://github.com/PowerShell/PowerShell/releases/download/v${POWERSHELL_VERSION}/powershell-${POWERSHELL_VERSION}-linux-arm64.tar.gz -O /tmp/powershell.tar.gz ; \
else \
echo "Unsupported architecture: $ARCH" && exit 1 ; \
fi && \
mkdir -p /opt/microsoft/powershell/7 && \
tar zxf /tmp/powershell.tar.gz -C /opt/microsoft/powershell/7 && \
chmod +x /opt/microsoft/powershell/7/pwsh && \
ln -s /opt/microsoft/powershell/7/pwsh /usr/bin/pwsh && \
rm /tmp/powershell.tar.gz
# Add prowler user
RUN addgroup --gid 1000 prowler && \
adduser --uid 1000 --gid 1000 --disabled-password --gecos "" prowler
# Update system dependencies and install essential tools
#hadolint ignore=DL3018
RUN apk --no-cache upgrade && apk --no-cache add curl git
# Create non-root user
RUN mkdir -p /home/prowler && \
echo 'prowler:x:1000:1000:prowler:/home/prowler:' > /etc/passwd && \
echo 'prowler:x:1000:' > /etc/group && \
chown -R prowler:prowler /home/prowler
USER prowler
WORKDIR /home/prowler
# Copy necessary files
WORKDIR /home/prowler
COPY prowler/ /home/prowler/prowler/
COPY dashboard/ /home/prowler/dashboard/
COPY pyproject.toml /home/prowler
COPY README.md /home/prowler/
COPY prowler/providers/m365/lib/powershell/m365_powershell.py /home/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py
COPY README.md /home/prowler
# Install Python dependencies
ENV HOME='/home/prowler'
ENV PATH="${HOME}/.local/bin:${PATH}"
#hadolint ignore=DL3013
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry
# By default poetry does not compile Python source files to bytecode during installation.
# This speeds up the installation process, but the first execution may take a little more
# time because Python then compiles source files to bytecode automatically. If you want to
# compile source files to bytecode during installation, you can use the --compile option
RUN poetry install --compile && \
rm -rf ~/.cache/pip
# Install PowerShell modules
RUN poetry run python prowler/providers/m365/lib/powershell/m365_powershell.py
ENV PATH="$HOME/.local/bin:$PATH"
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --no-cache-dir .
# Remove deprecated dash dependencies
RUN pip uninstall dash-html-components -y && \
pip uninstall dash-core-components -y
# Remove Prowler directory and build files
USER 0
RUN rm -rf /home/prowler/prowler /home/prowler/pyproject.toml /home/prowler/README.md /home/prowler/build /home/prowler/prowler.egg-info
USER prowler
ENTRYPOINT ["poetry", "run", "prowler"]
ENTRYPOINT ["prowler"]

View File

@@ -71,14 +71,10 @@ It contains hundreds of controls covering CIS, NIST 800, NIST CSF, CISA, RBI, Fe
| Provider | Checks | Services | [Compliance Frameworks](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/compliance/) | [Categories](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/misc/#categories) |
|---|---|---|---|---|
| AWS | 564 | 82 | 33 | 10 |
| GCP | 79 | 13 | 7 | 3 |
| Azure | 140 | 18 | 8 | 3 |
| Kubernetes | 83 | 7 | 4 | 7 |
| M365 | 44 | 2 | 1 | 0 |
| NHN (Unofficial) | 6 | 2 | 1 | 0 |
> You can list the checks, services, compliance frameworks and categories with `prowler <provider> --list-checks`, `prowler <provider> --list-services`, `prowler <provider> --list-compliance` and `prowler <provider> --list-categories`.
| AWS | 561 | 81 -> `prowler aws --list-services` | 30 -> `prowler aws --list-compliance` | 9 -> `prowler aws --list-categories` |
| GCP | 77 | 13 -> `prowler gcp --list-services` | 4 -> `prowler gcp --list-compliance` | 2 -> `prowler gcp --list-categories`|
| Azure | 139 | 18 -> `prowler azure --list-services` | 4 -> `prowler azure --list-compliance` | 2 -> `prowler azure --list-categories` |
| Kubernetes | 83 | 7 -> `prowler kubernetes --list-services` | 1 -> `prowler kubernetes --list-compliance` | 7 -> `prowler kubernetes --list-categories` |
# 💻 Installation
@@ -110,7 +106,7 @@ docker compose up -d
**Requirements**
* `git` installed.
* `poetry` v2 installed: [poetry installation](https://python-poetry.org/docs/#installation).
* `poetry` installed: [poetry installation](https://python-poetry.org/docs/#installation).
* `npm` installed: [npm installation](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm).
* `Docker Compose` installed: https://docs.docker.com/compose/install/.
@@ -120,7 +116,7 @@ docker compose up -d
git clone https://github.com/prowler-cloud/prowler
cd prowler/api
poetry install
eval $(poetry env activate)
poetry shell
set -a
source .env
docker compose up postgres valkey -d
@@ -128,11 +124,6 @@ cd src/backend
python manage.py migrate --database admin
gunicorn -c config/guniconf.py config.wsgi:application
```
> [!IMPORTANT]
> Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
>
> If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
> In case you have any doubts, consult the Poetry environment activation guide: https://python-poetry.org/docs/managing-environments/#activating-the-environment
> Now, you can access the API documentation at http://localhost:8080/api/v1/docs.
@@ -142,7 +133,7 @@ gunicorn -c config/guniconf.py config.wsgi:application
git clone https://github.com/prowler-cloud/prowler
cd prowler/api
poetry install
eval $(poetry env activate)
poetry shell
set -a
source .env
cd src/backend
@@ -155,7 +146,7 @@ python -m celery -A config.celery worker -l info -E
git clone https://github.com/prowler-cloud/prowler
cd prowler/api
poetry install
eval $(poetry env activate)
poetry shell
set -a
source .env
cd src/backend
@@ -176,7 +167,7 @@ npm start
## Prowler CLI
### Pip package
Prowler CLI is available as a project in [PyPI](https://pypi.org/project/prowler-cloud/), thus can be installed using pip with Python > 3.9.1, < 3.13:
Prowler CLI is available as a project in [PyPI](https://pypi.org/project/prowler-cloud/), thus can be installed using pip with Python >= 3.9, < 3.13:
```console
pip install prowler
@@ -206,21 +197,15 @@ The container images are available here:
### From GitHub
Python > 3.9.1, < 3.13 is required with pip and poetry:
Python >= 3.9, < 3.13 is required with pip and poetry:
``` console
git clone https://github.com/prowler-cloud/prowler
cd prowler
eval $(poetry env activate)
poetry shell
poetry install
python prowler-cli.py -v
python prowler.py -v
```
> [!IMPORTANT]
> Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
>
> If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
> In case you have any doubts, consult the Poetry environment activation guide: https://python-poetry.org/docs/managing-environments/#activating-the-environment
> If you want to clone Prowler from Windows, use `git config core.longpaths true` to allow long file paths.
# 📐✏️ High level architecture

View File

@@ -23,7 +23,6 @@ DJANGO_SECRETS_ENCRYPTION_KEY=""
DJANGO_MANAGE_DB_PARTITIONS=[True|False]
DJANGO_CELERY_DEADLOCK_ATTEMPTS=5
DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
DJANGO_SENTRY_DSN=
# PostgreSQL settings
# If running django and celery on host, use 'localhost', else use 'postgres-db'
@@ -40,19 +39,3 @@ POSTGRES_DB=prowler_db
VALKEY_HOST=[localhost|valkey]
VALKEY_PORT=6379
VALKEY_DB=0
# Sentry settings
SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
# Social login credentials
DJANGO_GOOGLE_OAUTH_CLIENT_ID=""
DJANGO_GOOGLE_OAUTH_CLIENT_SECRET=""
DJANGO_GOOGLE_OAUTH_CALLBACK_URL=""
DJANGO_GITHUB_OAUTH_CLIENT_ID=""
DJANGO_GITHUB_OAUTH_CLIENT_SECRET=""
DJANGO_GITHUB_OAUTH_CALLBACK_URL=""
# Deletion Task Batch Size
DJANGO_DELETION_BATCH_SIZE=5000

View File

@@ -80,7 +80,7 @@ repos:
- id: safety
name: safety
description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities"
entry: bash -c 'poetry run safety check --ignore 70612,66963,74429'
entry: bash -c 'poetry run safety check --ignore 70612,66963'
language: system
- id: vulture

View File

@@ -1,80 +0,0 @@
# Prowler API Changelog
All notable changes to the **Prowler API** are documented in this file.
## [v1.7.0] (Prowler v5.6.0)
### Added
- Added M365 as a new provider [(#7563)](https://github.com/prowler-cloud/prowler/pull/7563).
- Added a `compliance/` folder and ZIPexport functionality for all compliance reports.[(#7653)](https://github.com/prowler-cloud/prowler/pull/7653).
- Added a new API endpoint to fetch and download any specific compliance file by name [(#7653)](https://github.com/prowler-cloud/prowler/pull/7653).
---
## [v1.6.0] (Prowler v5.5.0)
### Added
- Support for developing new integrations [(#7167)](https://github.com/prowler-cloud/prowler/pull/7167).
- HTTP Security Headers [(#7289)](https://github.com/prowler-cloud/prowler/pull/7289).
- New endpoint to get the compliance overviews metadata [(#7333)](https://github.com/prowler-cloud/prowler/pull/7333).
- Support for muted findings [(#7378)](https://github.com/prowler-cloud/prowler/pull/7378).
- Added missing fields to API findings and resources [(#7318)](https://github.com/prowler-cloud/prowler/pull/7318).
---
## [v1.5.4] (Prowler v5.4.4)
### Fixed
- Fixed a bug with periodic tasks when trying to delete a provider ([#7466])(https://github.com/prowler-cloud/prowler/pull/7466).
---
## [v1.5.3] (Prowler v5.4.3)
### Fixed
- Added duplicated scheduled scans handling ([#7401])(https://github.com/prowler-cloud/prowler/pull/7401).
- Added environment variable to configure the deletion task batch size ([#7423])(https://github.com/prowler-cloud/prowler/pull/7423).
---
## [v1.5.2] (Prowler v5.4.2)
### Changed
- Refactored deletion logic and implemented retry mechanism for deletion tasks [(#7349)](https://github.com/prowler-cloud/prowler/pull/7349).
---
## [v1.5.1] (Prowler v5.4.1)
### Fixed
- Added a handled response in case local files are missing [(#7183)](https://github.com/prowler-cloud/prowler/pull/7183).
- Fixed a race condition when deleting export files after the S3 upload [(#7172)](https://github.com/prowler-cloud/prowler/pull/7172).
- Handled exception when a provider has no secret in test connection [(#7283)](https://github.com/prowler-cloud/prowler/pull/7283).
---
## [v1.5.0] (Prowler v5.4.0)
### Added
- Social login integration with Google and GitHub [(#6906)](https://github.com/prowler-cloud/prowler/pull/6906)
- Add API scan report system, now all scans launched from the API will generate a compressed file with the report in OCSF, CSV and HTML formats [(#6878)](https://github.com/prowler-cloud/prowler/pull/6878).
- Configurable Sentry integration [(#6874)](https://github.com/prowler-cloud/prowler/pull/6874)
### Changed
- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019).
---
## [v1.4.0] (Prowler v5.3.0)
### Changed
- Daily scheduled scan instances are now created beforehand with `SCHEDULED` state [(#6700)](https://github.com/prowler-cloud/prowler/pull/6700).
- Findings endpoints now require at least one date filter [(#6800)](https://github.com/prowler-cloud/prowler/pull/6800).
- Findings metadata endpoint received a performance improvement [(#6863)](https://github.com/prowler-cloud/prowler/pull/6863).
- Increased the allowed length of the provider UID for Kubernetes providers [(#6869)](https://github.com/prowler-cloud/prowler/pull/6869).
---

View File

@@ -1,33 +1,13 @@
FROM python:3.12.10-slim-bookworm AS build
FROM python:3.12.8-alpine3.20 AS build
LABEL maintainer="https://github.com/prowler-cloud/api"
ARG POWERSHELL_VERSION=7.5.0
ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}
# hadolint ignore=DL3008
RUN apt-get update && apt-get install -y --no-install-recommends wget libicu72 \
&& rm -rf /var/lib/apt/lists/*
# Install PowerShell
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
wget --progress=dot:giga https://github.com/PowerShell/PowerShell/releases/download/v${POWERSHELL_VERSION}/powershell-${POWERSHELL_VERSION}-linux-x64.tar.gz -O /tmp/powershell.tar.gz ; \
elif [ "$ARCH" = "aarch64" ]; then \
wget --progress=dot:giga https://github.com/PowerShell/PowerShell/releases/download/v${POWERSHELL_VERSION}/powershell-${POWERSHELL_VERSION}-linux-arm64.tar.gz -O /tmp/powershell.tar.gz ; \
else \
echo "Unsupported architecture: $ARCH" && exit 1 ; \
fi && \
mkdir -p /opt/microsoft/powershell/7 && \
tar zxf /tmp/powershell.tar.gz -C /opt/microsoft/powershell/7 && \
chmod +x /opt/microsoft/powershell/7/pwsh && \
ln -s /opt/microsoft/powershell/7/pwsh /usr/bin/pwsh && \
rm /tmp/powershell.tar.gz
# Add prowler user
RUN addgroup --gid 1000 prowler && \
adduser --uid 1000 --gid 1000 --disabled-password --gecos "" prowler
# hadolint ignore=DL3018
RUN apk --no-cache add gcc python3-dev musl-dev linux-headers curl-dev
RUN apk --no-cache upgrade && \
addgroup -g 1000 prowler && \
adduser -D -u 1000 -G prowler prowler
USER prowler
WORKDIR /home/prowler
@@ -37,23 +17,27 @@ COPY pyproject.toml ./
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry
COPY src/backend/ ./backend/
COPY src/backend/ ./backend/
ENV PATH="/home/prowler/.local/bin:$PATH"
# Add `--no-root` to avoid installing the current project as a package
RUN poetry install --no-root && \
RUN poetry install && \
rm -rf ~/.cache/pip
COPY docker-entrypoint.sh ./docker-entrypoint.sh
RUN poetry run python "$(poetry env info --path)/src/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py"
WORKDIR /home/prowler/backend
# Development image
# hadolint ignore=DL3006
FROM build AS dev
USER 0
# hadolint ignore=DL3018
RUN apk --no-cache add curl vim
USER prowler
ENTRYPOINT ["../docker-entrypoint.sh", "dev"]
# Production image

View File

@@ -235,7 +235,6 @@ To view the logs for any component (e.g., Django, Celery worker), you can use th
```console
docker logs -f $(docker ps --format "{{.Names}}" | grep 'api-')
```
## Applying migrations
@@ -270,66 +269,3 @@ poetry shell
cd src/backend
pytest
```
# Custom commands
Django provides a way to create custom commands that can be run from the command line.
> These commands can be found in: ```prowler/api/src/backend/api/management/commands```
To run a custom command, you need to be in the `prowler/api/src/backend` directory and run:
```console
poetry shell
python manage.py <command_name>
```
## Generate dummy data
```console
python manage.py findings --tenant
<TENANT_ID> --findings <NUM_FINDINGS> --re
sources <NUM_RESOURCES> --batch <TRANSACTION_BATCH_SIZE> --alias <ALIAS>
```
This command creates, for a given tenant, a provider, scan and a set of findings and resources related altogether.
> Scan progress and state are updated in real time.
> - 0-33%: Create resources.
> - 33-66%: Create findings.
> - 66%: Create resource-finding mapping.
>
> The last step is required to access the findings details, since the UI needs that to print all the information.
### Example
```console
~/backend $ poetry run python manage.py findings --tenant
fffb1893-3fc7-4623-a5d9-fae47da1c528 --findings 25000 --re
sources 1000 --batch 5000 --alias test-script
Starting data population
Tenant: fffb1893-3fc7-4623-a5d9-fae47da1c528
Alias: test-script
Resources: 1000
Findings: 25000
Batch size: 5000
Creating resources...
100%|███████████████████████| 1/1 [00:00<00:00, 7.72it/s]
Resources created successfully.
Creating findings...
100%|███████████████████████| 5/5 [00:05<00:00, 1.09s/it]
Findings created successfully.
Creating resource-finding mappings...
100%|███████████████████████| 5/5 [00:02<00:00, 1.81it/s]
Resource-finding mappings created successfully.
Successfully populated test data.
```

View File

@@ -28,7 +28,7 @@ start_prod_server() {
start_worker() {
echo "Starting the worker..."
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans,scan-reports,deletion -E --max-tasks-per-child 1
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans -E
}
start_worker_beat() {

3297
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -2,51 +2,43 @@
build-backend = "poetry.core.masonry.api"
requires = ["poetry-core"]
[project]
authors = [{name = "Prowler Engineering", email = "engineering@prowler.com"}]
dependencies = [
"celery[pytest] (>=5.4.0,<6.0.0)",
"dj-rest-auth[with_social,jwt] (==7.0.1)",
"django==5.1.8",
"django-allauth==65.4.1",
"django-celery-beat (>=2.7.0,<3.0.0)",
"django-celery-results (>=2.5.1,<3.0.0)",
"django-cors-headers==4.4.0",
"django-environ==0.11.2",
"django-filter==24.3",
"django-guid==3.5.0",
"django-postgres-extra (>=2.0.8,<3.0.0)",
"djangorestframework==3.15.2",
"djangorestframework-jsonapi==7.0.2",
"djangorestframework-simplejwt (>=5.3.1,<6.0.0)",
"drf-nested-routers (>=0.94.1,<1.0.0)",
"drf-spectacular==0.27.2",
"drf-spectacular-jsonapi==0.5.1",
"gunicorn==23.0.0",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (>=1.0.1,<2.0.0)",
"sentry-sdk[django] (>=2.20.0,<3.0.0)",
"uuid6==2024.7.10"
]
[tool.poetry]
authors = ["Prowler Team"]
description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
version = "1.7.0"
version = "1.1.0"
[project.scripts]
celery = "src.backend.config.settings.celery"
[tool.poetry.dependencies]
celery = {extras = ["pytest"], version = "^5.4.0"}
django = "5.1.4"
django-celery-beat = "^2.7.0"
django-celery-results = "^2.5.1"
django-cors-headers = "4.4.0"
django-environ = "0.11.2"
django-filter = "24.3"
django-guid = "3.5.0"
django-postgres-extra = "^2.0.8"
djangorestframework = "3.15.2"
djangorestframework-jsonapi = "7.0.2"
djangorestframework-simplejwt = "^5.3.1"
drf-nested-routers = "^0.94.1"
drf-spectacular = "0.27.2"
drf-spectacular-jsonapi = "0.5.1"
gunicorn = "23.0.0"
prowler = "^5.0"
psycopg2-binary = "2.9.9"
pytest-celery = {extras = ["redis"], version = "^1.0.1"}
# Needed for prowler compatibility
python = ">=3.11,<3.13"
uuid6 = "2024.7.10"
[tool.poetry.group.dev.dependencies]
bandit = "1.7.9"
coverage = "7.5.4"
django-silk = "5.3.2"
docker = "7.1.0"
freezegun = "1.5.1"
marshmallow = ">=3.15.0,<4.0.0"
mypy = "1.10.1"
pylint = "3.2.5"
pytest = "8.2.2"
@@ -57,5 +49,7 @@ pytest-randomly = "3.15.0"
pytest-xdist = "3.6.1"
ruff = "0.5.0"
safety = "3.2.9"
tqdm = "4.67.1"
vulture = "2.14"
[tool.poetry.scripts]
celery = "src.backend.config.settings.celery"

View File

@@ -1,61 +0,0 @@
from allauth.socialaccount.adapter import DefaultSocialAccountAdapter
from django.db import transaction
from api.db_router import MainRouter
from api.db_utils import rls_transaction
from api.models import Membership, Role, Tenant, User, UserRoleRelationship
class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
@staticmethod
def get_user_by_email(email: str):
try:
return User.objects.get(email=email)
except User.DoesNotExist:
return None
def pre_social_login(self, request, sociallogin):
# Link existing accounts with the same email address
email = sociallogin.account.extra_data.get("email")
if email:
existing_user = self.get_user_by_email(email)
if existing_user:
sociallogin.connect(request, existing_user)
def save_user(self, request, sociallogin, form=None):
"""
Called after the user data is fully populated from the provider
and is about to be saved to the DB for the first time.
"""
with transaction.atomic(using=MainRouter.admin_db):
user = super().save_user(request, sociallogin, form)
user.save(using=MainRouter.admin_db)
social_account_name = sociallogin.account.extra_data.get("name")
if social_account_name:
user.name = social_account_name
user.save(using=MainRouter.admin_db)
tenant = Tenant.objects.using(MainRouter.admin_db).create(
name=f"{user.email.split('@')[0]} default tenant"
)
with rls_transaction(str(tenant.id)):
Membership.objects.using(MainRouter.admin_db).create(
user=user, tenant=tenant, role=Membership.RoleChoices.OWNER
)
role = Role.objects.using(MainRouter.admin_db).create(
name="admin",
tenant_id=tenant.id,
manage_users=True,
manage_account=True,
manage_billing=True,
manage_providers=True,
manage_integrations=True,
manage_scans=True,
unlimited_visibility=True,
)
UserRoleRelationship.objects.using(MainRouter.admin_db).create(
user=user,
role=role,
tenant_id=tenant.id,
)
return user

View File

@@ -1,38 +1,12 @@
from types import MappingProxyType
from api.models import Provider
from prowler.config.config import get_available_compliance_frameworks
from prowler.lib.check.compliance_models import Compliance
from prowler.lib.check.models import CheckMetadata
from api.models import Provider
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE = {}
PROWLER_CHECKS = {}
AVAILABLE_COMPLIANCE_FRAMEWORKS = {}
def get_compliance_frameworks(provider_type: Provider.ProviderChoices) -> list[str]:
"""
Retrieve and cache the list of available compliance frameworks for a specific cloud provider.
This function lazily loads and caches the available compliance frameworks (e.g., CIS, MITRE, ISO)
for each provider type (AWS, Azure, GCP, etc.) on first access. Subsequent calls for the same
provider will return the cached result.
Args:
provider_type (Provider.ProviderChoices): The cloud provider type for which to retrieve
available compliance frameworks (e.g., "aws", "azure", "gcp", "m365").
Returns:
list[str]: A list of framework identifiers (e.g., "cis_1.4_aws", "mitre_attack_azure") available
for the given provider.
"""
global AVAILABLE_COMPLIANCE_FRAMEWORKS
if provider_type not in AVAILABLE_COMPLIANCE_FRAMEWORKS:
AVAILABLE_COMPLIANCE_FRAMEWORKS[provider_type] = (
get_available_compliance_frameworks(provider_type)
)
return AVAILABLE_COMPLIANCE_FRAMEWORKS[provider_type]
def get_prowler_provider_checks(provider_type: Provider.ProviderChoices):

View File

@@ -1,29 +1,18 @@
ALLOWED_APPS = ("django", "socialaccount", "account", "authtoken", "silk")
class MainRouter:
default_db = "default"
admin_db = "admin"
def db_for_read(self, model, **hints): # noqa: F841
model_table_name = model._meta.db_table
if model_table_name.startswith("django_") or any(
model_table_name.startswith(f"{app}_") for app in ALLOWED_APPS
):
if model_table_name.startswith("django_"):
return self.admin_db
return None
def db_for_write(self, model, **hints): # noqa: F841
model_table_name = model._meta.db_table
if any(model_table_name.startswith(f"{app}_") for app in ALLOWED_APPS):
if model_table_name.startswith("django_"):
return self.admin_db
return None
def allow_migrate(self, db, app_label, model_name=None, **hints): # noqa: F841
return db == self.admin_db
def allow_relation(self, obj1, obj2, **hints): # noqa: F841
# Allow relations if both objects are in either "default" or "admin" db connectors
if {obj1._state.db, obj2._state.db} <= {self.default_db, self.admin_db}:
return True
return None

View File

@@ -6,7 +6,6 @@ from datetime import datetime, timedelta, timezone
from django.conf import settings
from django.contrib.auth.models import BaseUserManager
from django.db import connection, models, transaction
from django_celery_beat.models import PeriodicTask
from psycopg2 import connect as psycopg2_connect
from psycopg2.extensions import AsIs, new_type, register_adapter, register_type
from rest_framework_json_api.serializers import ValidationError
@@ -106,12 +105,11 @@ def generate_random_token(length: int = 14, symbols: str | None = None) -> str:
return "".join(secrets.choice(symbols or _symbols) for _ in range(length))
def batch_delete(tenant_id, queryset, batch_size=settings.DJANGO_DELETION_BATCH_SIZE):
def batch_delete(queryset, batch_size=5000):
"""
Deletes objects in batches and returns the total number of deletions and a summary.
Args:
tenant_id (str): Tenant ID the queryset belongs to.
queryset (QuerySet): The queryset of objects to delete.
batch_size (int): The number of objects to delete in each batch.
@@ -122,16 +120,15 @@ def batch_delete(tenant_id, queryset, batch_size=settings.DJANGO_DELETION_BATCH_
deletion_summary = {}
while True:
with rls_transaction(tenant_id, POSTGRES_TENANT_VAR):
# Get a batch of IDs to delete
batch_ids = set(
queryset.values_list("id", flat=True).order_by("id")[:batch_size]
)
if not batch_ids:
# No more objects to delete
break
# Get a batch of IDs to delete
batch_ids = set(
queryset.values_list("id", flat=True).order_by("id")[:batch_size]
)
if not batch_ids:
# No more objects to delete
break
deleted_count, deleted_info = queryset.filter(id__in=batch_ids).delete()
deleted_count, deleted_info = queryset.filter(id__in=batch_ids).delete()
total_deleted += deleted_count
for model_label, count in deleted_info.items():
@@ -140,18 +137,6 @@ def batch_delete(tenant_id, queryset, batch_size=settings.DJANGO_DELETION_BATCH_
return total_deleted, deletion_summary
def delete_related_daily_task(provider_id: str):
"""
Deletes the periodic task associated with a specific provider.
Args:
provider_id (str): The unique identifier for the provider
whose related periodic task should be deleted.
"""
task_name = f"scan-perform-scheduled-{provider_id}"
PeriodicTask.objects.filter(name=task_name).delete()
# Postgres Enums
@@ -333,15 +318,3 @@ class InvitationStateEnum(EnumType):
class InvitationStateEnumField(PostgresEnumField):
def __init__(self, *args, **kwargs):
super().__init__("invitation_state", *args, **kwargs)
# Postgres enum definition for Integration type
class IntegrationTypeEnum(EnumType):
enum_type_name = "integration_type"
class IntegrationTypeEnumField(PostgresEnumField):
def __init__(self, *args, **kwargs):
super().__init__("integration_type", *args, **kwargs)

View File

@@ -7,7 +7,7 @@ from rest_framework_json_api.serializers import ValidationError
from api.db_utils import POSTGRES_TENANT_VAR, SET_CONFIG_QUERY
def set_tenant(func=None, *, keep_tenant=False):
def set_tenant(func):
"""
Decorator to set the tenant context for a Celery task based on the provided tenant_id.
@@ -40,29 +40,20 @@ def set_tenant(func=None, *, keep_tenant=False):
# The tenant context will be set before the task logic executes.
"""
def decorator(func):
@wraps(func)
@transaction.atomic
def wrapper(*args, **kwargs):
try:
if not keep_tenant:
tenant_id = kwargs.pop("tenant_id")
else:
tenant_id = kwargs["tenant_id"]
except KeyError:
raise KeyError("This task requires the tenant_id")
try:
uuid.UUID(tenant_id)
except ValueError:
raise ValidationError("Tenant ID must be a valid UUID")
with connection.cursor() as cursor:
cursor.execute(SET_CONFIG_QUERY, [POSTGRES_TENANT_VAR, tenant_id])
@wraps(func)
@transaction.atomic
def wrapper(*args, **kwargs):
try:
tenant_id = kwargs.pop("tenant_id")
except KeyError:
raise KeyError("This task requires the tenant_id")
try:
uuid.UUID(tenant_id)
except ValueError:
raise ValidationError("Tenant ID must be a valid UUID")
with connection.cursor() as cursor:
cursor.execute(SET_CONFIG_QUERY, [POSTGRES_TENANT_VAR, tenant_id])
return func(*args, **kwargs)
return func(*args, **kwargs)
return wrapper
if func is None:
return decorator
else:
return decorator(func)
return wrapper

View File

@@ -1,4 +1,4 @@
from datetime import date, datetime, timedelta, timezone
from datetime import date, datetime, timezone
from django.conf import settings
from django.db.models import Q
@@ -24,7 +24,6 @@ from api.db_utils import (
from api.models import (
ComplianceOverview,
Finding,
Integration,
Invitation,
Membership,
PermissionChoices,
@@ -287,9 +286,6 @@ class FindingFilter(FilterSet):
status = ChoiceFilter(choices=StatusChoices.choices)
severity = ChoiceFilter(choices=SeverityChoices)
impact = ChoiceFilter(choices=SeverityChoices)
muted = BooleanFilter(
help_text="If this filter is not provided, muted and non-muted findings will be returned."
)
resources = UUIDInFilter(field_name="resource__id", lookup_expr="in")
@@ -323,41 +319,13 @@ class FindingFilter(FilterSet):
field_name="resources__type", lookup_expr="icontains"
)
# Temporarily disabled until we implement tag filtering in the UI
# resource_tag_key = CharFilter(field_name="resources__tags__key")
# resource_tag_key__in = CharInFilter(
# field_name="resources__tags__key", lookup_expr="in"
# )
# resource_tag_key__icontains = CharFilter(
# field_name="resources__tags__key", lookup_expr="icontains"
# )
# resource_tag_value = CharFilter(field_name="resources__tags__value")
# resource_tag_value__in = CharInFilter(
# field_name="resources__tags__value", lookup_expr="in"
# )
# resource_tag_value__icontains = CharFilter(
# field_name="resources__tags__value", lookup_expr="icontains"
# )
# resource_tags = CharInFilter(
# method="filter_resource_tag",
# lookup_expr="in",
# help_text="Filter by resource tags `key:value` pairs.\nMultiple values may be "
# "separated by commas.",
# )
scan = UUIDFilter(method="filter_scan_id")
scan__in = UUIDInFilter(method="filter_scan_id_in")
inserted_at = DateFilter(method="filter_inserted_at", lookup_expr="date")
inserted_at__date = DateFilter(method="filter_inserted_at", lookup_expr="date")
inserted_at__gte = DateFilter(
method="filter_inserted_at_gte",
help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
)
inserted_at__lte = DateFilter(
method="filter_inserted_at_lte",
help_text=f"Maximum date range is {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
)
inserted_at__gte = DateFilter(method="filter_inserted_at_gte")
inserted_at__lte = DateFilter(method="filter_inserted_at_lte")
class Meta:
model = Finding
@@ -385,52 +353,6 @@ class FindingFilter(FilterSet):
},
}
def filter_queryset(self, queryset):
if not (self.data.get("scan") or self.data.get("scan__in")) and not (
self.data.get("inserted_at")
or self.data.get("inserted_at__date")
or self.data.get("inserted_at__gte")
or self.data.get("inserted_at__lte")
):
raise ValidationError(
[
{
"detail": "At least one date filter is required: filter[inserted_at], filter[inserted_at.gte], "
"or filter[inserted_at.lte].",
"status": 400,
"source": {"pointer": "/data/attributes/inserted_at"},
"code": "required",
}
]
)
gte_date = (
datetime.strptime(self.data.get("inserted_at__gte"), "%Y-%m-%d").date()
if self.data.get("inserted_at__gte")
else datetime.now(timezone.utc).date()
)
lte_date = (
datetime.strptime(self.data.get("inserted_at__lte"), "%Y-%m-%d").date()
if self.data.get("inserted_at__lte")
else datetime.now(timezone.utc).date()
)
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
):
raise ValidationError(
[
{
"detail": f"The date range cannot exceed {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
"status": 400,
"source": {"pointer": "/data/attributes/inserted_at"},
"code": "invalid",
}
]
)
return super().filter_queryset(queryset)
# Convert filter values to UUIDv7 values for use with partitioning
def filter_scan_id(self, queryset, name, value):
try:
@@ -451,7 +373,9 @@ class FindingFilter(FilterSet):
)
return (
queryset.filter(id__gte=start).filter(id__lt=end).filter(scan_id=value_uuid)
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan__id=value_uuid)
)
def filter_scan_id_in(self, queryset, name, value):
@@ -476,42 +400,31 @@ class FindingFilter(FilterSet):
]
)
if start == end:
return queryset.filter(id__gte=start).filter(scan_id__in=uuid_list)
return queryset.filter(id__gte=start).filter(scan__id__in=uuid_list)
else:
return (
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan_id__in=uuid_list)
.filter(scan__id__in=uuid_list)
)
def filter_inserted_at(self, queryset, name, value):
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
return queryset.filter(id__gte=start, id__lt=end)
return queryset.filter(id__gte=start).filter(inserted_at__date=value)
def filter_inserted_at_gte(self, queryset, name, value):
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
return queryset.filter(id__gte=start)
return queryset.filter(id__gte=start).filter(inserted_at__gte=value)
def filter_inserted_at_lte(self, queryset, name, value):
datetime_value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(value))
return queryset.filter(id__lt=end)
def filter_resource_tag(self, queryset, name, value):
overall_query = Q()
for key_value_pair in value:
tag_key, tag_value = key_value_pair.split(":", 1)
overall_query |= Q(
resources__tags__key__icontains=tag_key,
resources__tags__value__icontains=tag_value,
)
return queryset.filter(overall_query).distinct()
return queryset.filter(id__lte=end).filter(inserted_at__lte=value)
@staticmethod
def maybe_date_to_datetime(value):
@@ -617,6 +530,12 @@ class ScanSummaryFilter(FilterSet):
field_name="scan__provider__provider", choices=Provider.ProviderChoices.choices
)
region = CharFilter(field_name="region")
muted_findings = BooleanFilter(method="filter_muted_findings")
def filter_muted_findings(self, queryset, name, value):
if not value:
return queryset.exclude(muted__gt=0)
return queryset
class Meta:
model = ScanSummary
@@ -627,6 +546,8 @@ class ScanSummaryFilter(FilterSet):
class ServiceOverviewFilter(ScanSummaryFilter):
muted_findings = None
def is_valid(self):
# Check if at least one of the inserted_at filters is present
inserted_at_filters = [
@@ -644,19 +565,3 @@ class ServiceOverviewFilter(ScanSummaryFilter):
}
)
return super().is_valid()
class IntegrationFilter(FilterSet):
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
integration_type = ChoiceFilter(choices=Integration.IntegrationChoices.choices)
integration_type__in = ChoiceInFilter(
choices=Integration.IntegrationChoices.choices,
field_name="integration_type",
lookup_expr="in",
)
class Meta:
model = Integration
fields = {
"inserted_at": ["date", "gte", "lte"],
}

View File

@@ -122,22 +122,6 @@
"scanner_args": {}
}
},
{
"model": "api.provider",
"pk": "7791914f-d646-4fe2-b2ed-73f2c6499a36",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:45:26.352Z",
"updated_at": "2024-10-18T11:16:23.533Z",
"provider": "kubernetes",
"uid": "gke_lucky-coast-419309_us-central1_autopilot-cluster-2",
"alias": "k8s_testing_2",
"connected": true,
"connection_last_checked_at": "2024-10-18T11:16:23.503Z",
"metadata": {},
"scanner_args": {}
}
},
{
"model": "api.providersecret",
"pk": "11491b47-75ae-4f71-ad8d-3e630a72182e",

View File

@@ -11,7 +11,9 @@
"unique_resource_count": 1,
"duration": 5,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"inserted_at": "2024-09-01T17:25:27.050Z",
"started_at": "2024-09-01T17:25:27.050Z",
@@ -31,7 +33,9 @@
"unique_resource_count": 1,
"duration": 20,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"inserted_at": "2024-09-02T17:24:27.050Z",
"started_at": "2024-09-02T17:24:27.050Z",
@@ -51,7 +55,9 @@
"unique_resource_count": 10,
"duration": 10,
"scanner_args": {
"checks_to_execute": ["cloudsql_instance_automated_backups"]
"checks_to_execute": [
"cloudsql_instance_automated_backups"
]
},
"inserted_at": "2024-09-02T19:26:27.050Z",
"started_at": "2024-09-02T19:26:27.050Z",
@@ -71,7 +77,9 @@
"unique_resource_count": 1,
"duration": 35,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"inserted_at": "2024-09-02T19:27:27.050Z",
"started_at": "2024-09-02T19:27:27.050Z",
@@ -89,7 +97,9 @@
"name": "test scheduled aws scan",
"state": "available",
"scanner_args": {
"checks_to_execute": ["cloudformation_stack_outputs_find_secrets"]
"checks_to_execute": [
"cloudformation_stack_outputs_find_secrets"
]
},
"scheduled_at": "2030-09-02T19:20:27.050Z",
"inserted_at": "2024-09-02T19:24:27.050Z",
@@ -168,7 +178,9 @@
"unique_resource_count": 19,
"progress": 100,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"duration": 7,
"scheduled_at": null,
@@ -178,56 +190,6 @@
"completed_at": "2024-10-18T10:46:05.127Z"
}
},
{
"model": "api.scan",
"pk": "6dd8925f-a52d-48de-a546-d2d90db30ab1",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"name": "real scan azure",
"provider": "1b59e032-3eb6-4694-93a5-df84cd9b3ce2",
"trigger": "manual",
"state": "completed",
"unique_resource_count": 20,
"progress": 100,
"scanner_args": {
"checks_to_execute": [
"accessanalyzer_enabled",
"account_security_contact_information_is_registered"
]
},
"duration": 4,
"scheduled_at": null,
"inserted_at": "2024-10-18T11:16:21.358Z",
"updated_at": "2024-10-18T11:16:26.060Z",
"started_at": "2024-10-18T11:16:21.593Z",
"completed_at": "2024-10-18T11:16:26.060Z"
}
},
{
"model": "api.scan",
"pk": "4ca7ce89-3236-41a8-a369-8937bc152af5",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"name": "real scan k8s",
"provider": "7791914f-d646-4fe2-b2ed-73f2c6499a36",
"trigger": "manual",
"state": "completed",
"unique_resource_count": 20,
"progress": 100,
"scanner_args": {
"checks_to_execute": [
"accessanalyzer_enabled",
"account_security_contact_information_is_registered"
]
},
"duration": 4,
"scheduled_at": null,
"inserted_at": "2024-10-18T11:16:21.358Z",
"updated_at": "2024-10-18T11:16:26.060Z",
"started_at": "2024-10-18T11:16:21.593Z",
"completed_at": "2024-10-18T11:16:26.060Z"
}
},
{
"model": "api.scan",
"pk": "01929f57-c0ee-7553-be0b-cbde006fb6f7",

View File

@@ -6,7 +6,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.823Z",
"updated_at": "2024-10-18T10:46:04.841Z",
"first_seen_at": "2024-10-18T10:46:04.823Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-south-2-112233445566",
"delta": "new",
"status": "FAIL",
@@ -62,7 +61,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.855Z",
"updated_at": "2024-10-18T10:46:04.858Z",
"first_seen_at": "2024-10-18T10:46:04.855Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-west-3-112233445566",
"delta": "new",
"status": "FAIL",
@@ -118,7 +116,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.869Z",
"updated_at": "2024-10-18T10:46:04.876Z",
"first_seen_at": "2024-10-18T10:46:04.869Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-central-2-112233445566",
"delta": "new",
"status": "FAIL",
@@ -174,7 +171,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.888Z",
"updated_at": "2024-10-18T10:46:04.892Z",
"first_seen_at": "2024-10-18T10:46:04.888Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-west-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -230,7 +226,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.901Z",
"updated_at": "2024-10-18T10:46:04.905Z",
"first_seen_at": "2024-10-18T10:46:04.901Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-east-2-112233445566",
"delta": "new",
"status": "FAIL",
@@ -286,7 +281,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.915Z",
"updated_at": "2024-10-18T10:46:04.919Z",
"first_seen_at": "2024-10-18T10:46:04.915Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-south-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -342,7 +336,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.929Z",
"updated_at": "2024-10-18T10:46:04.934Z",
"first_seen_at": "2024-10-18T10:46:04.929Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-west-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -398,7 +391,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.944Z",
"updated_at": "2024-10-18T10:46:04.947Z",
"first_seen_at": "2024-10-18T10:46:04.944Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ca-central-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -454,7 +446,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.957Z",
"updated_at": "2024-10-18T10:46:04.962Z",
"first_seen_at": "2024-10-18T10:46:04.957Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-east-1-ConsoleAnalyzer-83b66ad7-d024-454e-b851-52d11cc1cf7c",
"delta": "new",
"status": "PASS",
@@ -510,7 +501,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.971Z",
"updated_at": "2024-10-18T10:46:04.975Z",
"first_seen_at": "2024-10-18T10:46:04.971Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-west-2-112233445566",
"delta": "new",
"status": "FAIL",
@@ -566,7 +556,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.984Z",
"updated_at": "2024-10-18T10:46:04.989Z",
"first_seen_at": "2024-10-18T10:46:04.984Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-sa-east-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -622,7 +611,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:04.999Z",
"updated_at": "2024-10-18T10:46:05.003Z",
"first_seen_at": "2024-10-18T10:46:04.999Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-north-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -678,7 +666,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:05.013Z",
"updated_at": "2024-10-18T10:46:05.018Z",
"first_seen_at": "2024-10-18T10:46:05.013Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-west-2-112233445566",
"delta": "new",
"status": "FAIL",
@@ -734,7 +721,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:05.029Z",
"updated_at": "2024-10-18T10:46:05.033Z",
"first_seen_at": "2024-10-18T10:46:05.029Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-southeast-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -790,7 +776,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:05.045Z",
"updated_at": "2024-10-18T10:46:05.050Z",
"first_seen_at": "2024-10-18T10:46:05.045Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-central-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -846,7 +831,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:05.061Z",
"updated_at": "2024-10-18T10:46:05.065Z",
"first_seen_at": "2024-10-18T10:46:05.061Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-northeast-1-112233445566",
"delta": "new",
"status": "FAIL",
@@ -902,7 +886,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:05.080Z",
"updated_at": "2024-10-18T10:46:05.085Z",
"first_seen_at": "2024-10-18T10:46:05.080Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-southeast-2-112233445566",
"delta": "new",
"status": "FAIL",
@@ -958,7 +941,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:05.099Z",
"updated_at": "2024-10-18T10:46:05.104Z",
"first_seen_at": "2024-10-18T10:46:05.099Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-northeast-2-112233445566",
"delta": "new",
"status": "FAIL",
@@ -1014,7 +996,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:46:05.115Z",
"updated_at": "2024-10-18T10:46:05.121Z",
"first_seen_at": "2024-10-18T10:46:05.115Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-northeast-3-112233445566",
"delta": "new",
"status": "FAIL",
@@ -1070,7 +1051,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.489Z",
"updated_at": "2024-10-18T11:16:24.506Z",
"first_seen_at": "2024-10-18T10:46:04.823Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-south-2-112233445566",
"delta": null,
"status": "FAIL",
@@ -1126,7 +1106,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.518Z",
"updated_at": "2024-10-18T11:16:24.521Z",
"first_seen_at": "2024-10-18T10:46:04.855Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-west-3-112233445566",
"delta": null,
"status": "FAIL",
@@ -1182,7 +1161,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.526Z",
"updated_at": "2024-10-18T11:16:24.529Z",
"first_seen_at": "2024-10-18T10:46:04.869Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-central-2-112233445566",
"delta": null,
"status": "FAIL",
@@ -1238,7 +1216,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.535Z",
"updated_at": "2024-10-18T11:16:24.538Z",
"first_seen_at": "2024-10-18T10:46:04.888Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-west-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1294,7 +1271,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.544Z",
"updated_at": "2024-10-18T11:16:24.546Z",
"first_seen_at": "2024-10-18T10:46:04.901Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-east-2-112233445566",
"delta": null,
"status": "FAIL",
@@ -1350,7 +1326,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.551Z",
"updated_at": "2024-10-18T11:16:24.554Z",
"first_seen_at": "2024-10-18T10:46:04.915Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-south-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1406,7 +1381,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.560Z",
"updated_at": "2024-10-18T11:16:24.562Z",
"first_seen_at": "2024-10-18T10:46:04.929Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-west-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1462,7 +1436,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.567Z",
"updated_at": "2024-10-18T11:16:24.569Z",
"first_seen_at": "2024-10-18T10:46:04.944Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ca-central-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1518,7 +1491,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.573Z",
"updated_at": "2024-10-18T11:16:24.575Z",
"first_seen_at": "2024-10-18T10:46:04.957Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-east-1-ConsoleAnalyzer-83b66ad7-d024-454e-b851-52d11cc1cf7c",
"delta": null,
"status": "PASS",
@@ -1574,7 +1546,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.580Z",
"updated_at": "2024-10-18T11:16:24.582Z",
"first_seen_at": "2024-10-18T10:46:04.971Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-west-2-112233445566",
"delta": null,
"status": "FAIL",
@@ -1630,7 +1601,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.587Z",
"updated_at": "2024-10-18T11:16:24.589Z",
"first_seen_at": "2024-10-18T10:46:04.984Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-sa-east-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1686,7 +1656,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.595Z",
"updated_at": "2024-10-18T11:16:24.597Z",
"first_seen_at": "2024-10-18T10:46:04.999Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-north-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1742,7 +1711,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.602Z",
"updated_at": "2024-10-18T11:16:24.604Z",
"first_seen_at": "2024-10-18T10:46:05.013Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-us-west-2-112233445566",
"delta": null,
"status": "FAIL",
@@ -1798,7 +1766,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.610Z",
"updated_at": "2024-10-18T11:16:24.612Z",
"first_seen_at": "2024-10-18T10:46:05.029Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-southeast-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1854,7 +1821,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.617Z",
"updated_at": "2024-10-18T11:16:24.620Z",
"first_seen_at": "2024-10-18T10:46:05.045Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-eu-central-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1910,7 +1876,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.625Z",
"updated_at": "2024-10-18T11:16:24.627Z",
"first_seen_at": "2024-10-18T10:46:05.061Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-northeast-1-112233445566",
"delta": null,
"status": "FAIL",
@@ -1966,7 +1931,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.632Z",
"updated_at": "2024-10-18T11:16:24.634Z",
"first_seen_at": "2024-10-18T10:46:05.080Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-southeast-2-112233445566",
"delta": null,
"status": "FAIL",
@@ -2022,7 +1986,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.639Z",
"updated_at": "2024-10-18T11:16:24.642Z",
"first_seen_at": "2024-10-18T10:46:05.099Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-northeast-2-112233445566",
"delta": null,
"status": "FAIL",
@@ -2078,7 +2041,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:24.646Z",
"updated_at": "2024-10-18T11:16:24.648Z",
"first_seen_at": "2024-10-18T10:46:05.115Z",
"uid": "prowler-aws-accessanalyzer_enabled-112233445566-ap-northeast-3-112233445566",
"delta": null,
"status": "FAIL",
@@ -2134,7 +2096,6 @@
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T11:16:26.033Z",
"updated_at": "2024-10-18T11:16:26.045Z",
"first_seen_at": "2024-10-18T11:16:26.033Z",
"uid": "prowler-aws-account_security_contact_information_is_registered-112233445566-us-east-1-112233445566",
"delta": "new",
"status": "MANUAL",

View File

@@ -1,237 +0,0 @@
import random
from datetime import datetime, timezone
from math import ceil
from uuid import uuid4
from django.core.management.base import BaseCommand
from tqdm import tqdm
from api.db_utils import rls_transaction
from api.models import (
Finding,
Provider,
Resource,
ResourceFindingMapping,
Scan,
StatusChoices,
)
from prowler.lib.check.models import CheckMetadata
class Command(BaseCommand):
help = "Populates the database with test data for performance testing."
def add_arguments(self, parser):
parser.add_argument(
"--tenant",
type=str,
required=True,
help="Tenant id for which the data will be populated.",
)
parser.add_argument(
"--resources",
type=int,
required=True,
help="The number of resources to create.",
)
parser.add_argument(
"--findings",
type=int,
required=True,
help="The number of findings to create.",
)
parser.add_argument(
"--batch", type=int, required=True, help="The batch size for bulk creation."
)
parser.add_argument(
"--alias",
type=str,
required=False,
help="Optional alias for the provider and scan",
)
def handle(self, *args, **options):
tenant_id = options["tenant"]
num_resources = options["resources"]
num_findings = options["findings"]
batch_size = options["batch"]
alias = options["alias"] or "Testing"
uid_token = str(uuid4())
self.stdout.write(self.style.NOTICE("Starting data population"))
self.stdout.write(self.style.NOTICE(f"\tTenant: {tenant_id}"))
self.stdout.write(self.style.NOTICE(f"\tAlias: {alias}"))
self.stdout.write(self.style.NOTICE(f"\tResources: {num_resources}"))
self.stdout.write(self.style.NOTICE(f"\tFindings: {num_findings}"))
self.stdout.write(self.style.NOTICE(f"\tBatch size: {batch_size}\n\n"))
# Resource metadata
possible_regions = [
"us-east-1",
"us-east-2",
"us-west-1",
"us-west-2",
"ca-central-1",
"eu-central-1",
"eu-west-1",
"eu-west-2",
"eu-west-3",
"ap-southeast-1",
"ap-southeast-2",
"ap-northeast-1",
"ap-northeast-2",
"ap-south-1",
"sa-east-1",
]
possible_services = []
possible_types = []
bulk_check_metadata = CheckMetadata.get_bulk(provider="aws")
for check_metadata in bulk_check_metadata.values():
if check_metadata.ServiceName not in possible_services:
possible_services.append(check_metadata.ServiceName)
if (
check_metadata.ResourceType
and check_metadata.ResourceType not in possible_types
):
possible_types.append(check_metadata.ResourceType)
with rls_transaction(tenant_id):
provider, _ = Provider.all_objects.get_or_create(
tenant_id=tenant_id,
provider="aws",
connected=True,
uid=str(random.randint(100000000000, 999999999999)),
defaults={
"alias": alias,
},
)
with rls_transaction(tenant_id):
scan = Scan.all_objects.create(
tenant_id=tenant_id,
provider=provider,
name=alias,
trigger="manual",
state="executing",
progress=0,
started_at=datetime.now(timezone.utc),
)
scan_state = "completed"
try:
# Create resources
resources = []
for i in range(num_resources):
resources.append(
Resource(
tenant_id=tenant_id,
provider_id=provider.id,
uid=f"testing-{uid_token}-{i}",
name=f"Testing {uid_token}-{i}",
region=random.choice(possible_regions),
service=random.choice(possible_services),
type=random.choice(possible_types),
)
)
num_batches = ceil(len(resources) / batch_size)
self.stdout.write(self.style.WARNING("Creating resources..."))
for i in tqdm(range(0, len(resources), batch_size), total=num_batches):
with rls_transaction(tenant_id):
Resource.all_objects.bulk_create(resources[i : i + batch_size])
self.stdout.write(self.style.SUCCESS("Resources created successfully.\n\n"))
with rls_transaction(tenant_id):
scan.progress = 33
scan.save()
# Create Findings
findings = []
possible_deltas = ["new", "changed", None]
possible_severities = ["critical", "high", "medium", "low"]
findings_resources_mapping = []
for i in range(num_findings):
severity = random.choice(possible_severities)
check_id = random.randint(1, 1000)
assigned_resource_num = random.randint(0, len(resources) - 1)
assigned_resource = resources[assigned_resource_num]
findings_resources_mapping.append(assigned_resource_num)
findings.append(
Finding(
tenant_id=tenant_id,
scan=scan,
uid=f"testing-{uid_token}-{i}",
delta=random.choice(possible_deltas),
check_id=f"check-{check_id}",
status=random.choice(list(StatusChoices)),
severity=severity,
impact=severity,
raw_result={},
check_metadata={
"checktitle": f"Test title for check {check_id}",
"risk": f"Testing risk {uid_token}-{i}",
"provider": "aws",
"severity": severity,
"categories": ["category1", "category2", "category3"],
"description": "This is a random description that should not matter for testing purposes.",
"servicename": assigned_resource.service,
"resourcetype": assigned_resource.type,
},
)
)
num_batches = ceil(len(findings) / batch_size)
self.stdout.write(self.style.WARNING("Creating findings..."))
for i in tqdm(range(0, len(findings), batch_size), total=num_batches):
with rls_transaction(tenant_id):
Finding.all_objects.bulk_create(findings[i : i + batch_size])
self.stdout.write(self.style.SUCCESS("Findings created successfully.\n\n"))
with rls_transaction(tenant_id):
scan.progress = 66
scan.save()
# Create ResourceFindingMapping
mappings = []
for index, f in enumerate(findings):
mappings.append(
ResourceFindingMapping(
tenant_id=tenant_id,
resource=resources[findings_resources_mapping[index]],
finding=f,
)
)
num_batches = ceil(len(mappings) / batch_size)
self.stdout.write(
self.style.WARNING("Creating resource-finding mappings...")
)
for i in tqdm(range(0, len(mappings), batch_size), total=num_batches):
with rls_transaction(tenant_id):
ResourceFindingMapping.objects.bulk_create(
mappings[i : i + batch_size]
)
self.stdout.write(
self.style.SUCCESS(
"Resource-finding mappings created successfully.\n\n"
)
)
except Exception as e:
self.stdout.write(self.style.ERROR(f"Failed to populate test data: {e}"))
scan_state = "failed"
finally:
scan.completed_at = datetime.now(timezone.utc)
scan.duration = int(
(datetime.now(timezone.utc) - scan.started_at).total_seconds()
)
scan.progress = 100
scan.state = scan_state
scan.unique_resource_count = num_resources
with rls_transaction(tenant_id):
scan.save()
self.stdout.write(self.style.NOTICE("Successfully populated test data."))

View File

@@ -1,15 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0005_rbac_missing_admin_roles"),
]
operations = [
migrations.AddField(
model_name="finding",
name="first_seen_at",
field=models.DateTimeField(editable=False, null=True),
),
]

View File

@@ -1,25 +0,0 @@
# Generated by Django 5.1.5 on 2025-01-28 15:03
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0006_findings_first_seen"),
]
operations = [
migrations.AddIndex(
model_name="scan",
index=models.Index(
fields=["tenant_id", "provider_id", "state", "inserted_at"],
name="scans_prov_state_insert_idx",
),
),
migrations.AddIndex(
model_name="scansummary",
index=models.Index(
fields=["tenant_id", "scan_id"], name="scan_summaries_tenant_scan_idx"
),
),
]

View File

@@ -1,64 +0,0 @@
import json
from datetime import datetime, timedelta, timezone
import django.db.models.deletion
from django.db import migrations, models
from django_celery_beat.models import PeriodicTask
from api.db_utils import rls_transaction
from api.models import Scan, StateChoices
def migrate_daily_scheduled_scan_tasks(apps, schema_editor):
for daily_scheduled_scan_task in PeriodicTask.objects.filter(
task="scan-perform-scheduled"
):
task_kwargs = json.loads(daily_scheduled_scan_task.kwargs)
tenant_id = task_kwargs["tenant_id"]
provider_id = task_kwargs["provider_id"]
current_time = datetime.now(timezone.utc)
scheduled_time_today = datetime.combine(
current_time.date(),
daily_scheduled_scan_task.start_time.time(),
tzinfo=timezone.utc,
)
if current_time < scheduled_time_today:
next_scan_date = scheduled_time_today
else:
next_scan_date = scheduled_time_today + timedelta(days=1)
with rls_transaction(tenant_id):
Scan.objects.create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
scheduled_at=next_scan_date,
scheduler_task_id=daily_scheduled_scan_task.id,
)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0007_scan_and_scan_summaries_indexes"),
("django_celery_beat", "0019_alter_periodictasks_options"),
]
operations = [
migrations.AddField(
model_name="scan",
name="scheduler_task",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
to="django_celery_beat.periodictask",
),
),
migrations.RunPython(migrate_daily_scheduled_scan_tasks),
]

View File

@@ -1,22 +0,0 @@
# Generated by Django 5.1.5 on 2025-02-07 09:42
import django.core.validators
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0008_daily_scheduled_tasks_update"),
]
operations = [
migrations.AlterField(
model_name="provider",
name="uid",
field=models.CharField(
max_length=250,
validators=[django.core.validators.MinLengthValidator(3)],
verbose_name="Unique identifier for the provider, set by the provider",
),
),
]

View File

@@ -1,109 +0,0 @@
from functools import partial
from django.db import connection, migrations
def create_index_on_partitions(
apps, schema_editor, parent_table: str, index_name: str, index_details: str
):
with connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]
# Iterate over partitions and create index concurrently.
# Note: PostgreSQL does not allow CONCURRENTLY inside a transaction,
# so we need atomic = False for this migration.
for partition in partitions:
sql = (
f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {partition.replace('.', '_')}_{index_name} ON {partition} "
f"{index_details};"
)
schema_editor.execute(sql)
def drop_index_on_partitions(apps, schema_editor, parent_table: str, index_name: str):
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]
# Iterate over partitions and drop index concurrently.
for partition in partitions:
partition_index = f"{partition.replace('.', '_')}_{index_name}"
sql = f"DROP INDEX CONCURRENTLY IF EXISTS {partition_index};"
schema_editor.execute(sql)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0009_increase_provider_uid_maximum_length"),
]
operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
index_details="(tenant_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
index_details="(tenant_id, scan_id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
index_details="(tenant_id, scan_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
index_details="(tenant_id, id) where delta = 'new'",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
),
),
]

View File

@@ -1,49 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0010_findings_performance_indexes_partitions"),
]
operations = [
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
condition=models.Q(("delta", "new")),
fields=["tenant_id", "id"],
name="find_delta_new_idx",
),
),
migrations.AddIndex(
model_name="resourcetagmapping",
index=models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
),
migrations.AddIndex(
model_name="resource",
index=models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
),
]

View File

@@ -1,15 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0011_findings_performance_indexes_parent"),
]
operations = [
migrations.AddField(
model_name="scan",
name="output_location",
field=models.CharField(blank=True, max_length=200, null=True),
),
]

View File

@@ -1,35 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-03 15:46
from functools import partial
from django.db import migrations
from api.db_utils import IntegrationTypeEnum, PostgresEnumMigration, register_enum
from api.models import Integration
IntegrationTypeEnumMigration = PostgresEnumMigration(
enum_name="integration_type",
enum_values=tuple(
integration_type[0]
for integration_type in Integration.IntegrationChoices.choices
),
)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0012_scan_report_output"),
]
operations = [
migrations.RunPython(
IntegrationTypeEnumMigration.create_enum_type,
reverse_code=IntegrationTypeEnumMigration.drop_enum_type,
),
migrations.RunPython(
partial(register_enum, enum_class=IntegrationTypeEnum),
reverse_code=migrations.RunPython.noop,
),
]

View File

@@ -1,131 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-03 15:46
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
from api.rls import RowLevelSecurityConstraint
class Migration(migrations.Migration):
dependencies = [
("api", "0013_integrations_enum"),
]
operations = [
migrations.CreateModel(
name="Integration",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
("enabled", models.BooleanField(default=False)),
("connected", models.BooleanField(blank=True, null=True)),
(
"connection_last_checked_at",
models.DateTimeField(blank=True, null=True),
),
(
"integration_type",
api.db_utils.IntegrationTypeEnumField(
choices=[
("amazon_s3", "Amazon S3"),
("saml", "SAML"),
("aws_security_hub", "AWS Security Hub"),
("jira", "JIRA"),
("slack", "Slack"),
]
),
),
("configuration", models.JSONField(default=dict)),
("_credentials", models.BinaryField(db_column="credentials")),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={"db_table": "integrations", "abstract": False},
),
migrations.AddConstraint(
model_name="integration",
constraint=RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_integration",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
migrations.CreateModel(
name="IntegrationProviderRelationship",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
(
"integration",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="api.integration",
),
),
(
"provider",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.provider"
),
),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "integration_provider_mappings",
"constraints": [
models.UniqueConstraint(
fields=("integration_id", "provider_id"),
name="unique_integration_provider_rel",
),
],
},
),
migrations.AddConstraint(
model_name="IntegrationProviderRelationship",
constraint=RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_integrationproviderrelationship",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
migrations.AddField(
model_name="integration",
name="providers",
field=models.ManyToManyField(
blank=True,
related_name="integrations",
through="api.IntegrationProviderRelationship",
to="api.provider",
),
),
]

View File

@@ -1,26 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-25 11:29
from django.db import migrations, models
import api.db_utils
class Migration(migrations.Migration):
dependencies = [
("api", "0014_integrations"),
]
operations = [
migrations.AddField(
model_name="finding",
name="muted",
field=models.BooleanField(default=False),
),
migrations.AlterField(
model_name="finding",
name="status",
field=api.db_utils.StatusEnumField(
choices=[("FAIL", "Fail"), ("PASS", "Pass"), ("MANUAL", "Manual")]
),
),
]

View File

@@ -1,32 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-31 10:46
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0015_finding_muted"),
]
operations = [
migrations.AddField(
model_name="finding",
name="compliance",
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AddField(
model_name="resource",
name="details",
field=models.TextField(blank=True, null=True),
),
migrations.AddField(
model_name="resource",
name="metadata",
field=models.TextField(blank=True, null=True),
),
migrations.AddField(
model_name="resource",
name="partition",
field=models.TextField(blank=True, null=True),
),
]

View File

@@ -1,32 +0,0 @@
# Generated by Django 5.1.7 on 2025-04-16 08:47
from django.db import migrations
import api.db_utils
class Migration(migrations.Migration):
dependencies = [
("api", "0016_finding_compliance_resource_details_and_more"),
]
operations = [
migrations.AlterField(
model_name="provider",
name="provider",
field=api.db_utils.ProviderEnumField(
choices=[
("aws", "AWS"),
("azure", "Azure"),
("gcp", "GCP"),
("kubernetes", "Kubernetes"),
("m365", "M365"),
],
default="aws",
),
),
migrations.RunSQL(
"ALTER TYPE provider ADD VALUE IF NOT EXISTS 'm365';",
reverse_sql=migrations.RunSQL.noop,
),
]

View File

@@ -11,7 +11,6 @@ from django.core.validators import MinLengthValidator
from django.db import models
from django.db.models import Q
from django.utils.translation import gettext_lazy as _
from django_celery_beat.models import PeriodicTask
from django_celery_results.models import TaskResult
from psqlextra.manager import PostgresManager
from psqlextra.models import PostgresPartitionedModel
@@ -21,7 +20,6 @@ from uuid6 import uuid7
from api.db_utils import (
CustomUserManager,
FindingDeltaEnumField,
IntegrationTypeEnumField,
InvitationStateEnumField,
MemberRoleEnumField,
ProviderEnumField,
@@ -59,6 +57,7 @@ class StatusChoices(models.TextChoices):
FAIL = "FAIL", _("Fail")
PASS = "PASS", _("Pass")
MANUAL = "MANUAL", _("Manual")
MUTED = "MUTED", _("Muted")
class StateChoices(models.TextChoices):
@@ -191,7 +190,6 @@ class Provider(RowLevelSecurityProtectedModel):
AZURE = "azure", _("Azure")
GCP = "gcp", _("GCP")
KUBERNETES = "kubernetes", _("Kubernetes")
M365 = "m365", _("M365")
@staticmethod
def validate_aws_uid(value):
@@ -215,15 +213,6 @@ class Provider(RowLevelSecurityProtectedModel):
pointer="/data/attributes/uid",
)
@staticmethod
def validate_m365_uid(value):
if not re.match(r"^[a-zA-Z0-9-]+\.onmicrosoft\.com$", value):
raise ModelValidationError(
detail="M365 tenant ID must be a valid domain.",
code="m365-uid",
pointer="/data/attributes/uid",
)
@staticmethod
def validate_gcp_uid(value):
if not re.match(r"^[a-z][a-z0-9-]{5,29}$", value):
@@ -237,13 +226,13 @@ class Provider(RowLevelSecurityProtectedModel):
@staticmethod
def validate_kubernetes_uid(value):
if not re.match(
r"^[a-z0-9][A-Za-z0-9_.:\/-]{1,250}$",
r"(^[a-z0-9]([-a-z0-9]{1,61}[a-z0-9])?$)|(^arn:aws(-cn|-us-gov|-iso|-iso-b)?:[a-zA-Z0-9\-]+:([a-z]{2}-[a-z]+-\d{1})?:(\d{12})?:[a-zA-Z0-9\-_\/:\.\*]+(:\d+)?$)",
value,
):
raise ModelValidationError(
detail="The value must either be a valid Kubernetes UID (up to 63 characters, "
"starting and ending with a lowercase letter or number, containing only "
"lowercase alphanumeric characters and hyphens) or a valid AWS EKS Cluster ARN, GCP GKE Context Name or Azure AKS Cluster Name.",
"lowercase alphanumeric characters and hyphens) or a valid EKS ARN.",
code="kubernetes-uid",
pointer="/data/attributes/uid",
)
@@ -257,7 +246,7 @@ class Provider(RowLevelSecurityProtectedModel):
)
uid = models.CharField(
"Unique identifier for the provider, set by the provider",
max_length=250,
max_length=63,
blank=False,
validators=[MinLengthValidator(3)],
)
@@ -421,10 +410,6 @@ class Scan(RowLevelSecurityProtectedModel):
started_at = models.DateTimeField(null=True, blank=True)
completed_at = models.DateTimeField(null=True, blank=True)
next_scan_at = models.DateTimeField(null=True, blank=True)
scheduler_task = models.ForeignKey(
PeriodicTask, on_delete=models.CASCADE, null=True, blank=True
)
output_location = models.CharField(blank=True, null=True, max_length=200)
# TODO: mutelist foreign key
class Meta(RowLevelSecurityProtectedModel.Meta):
@@ -443,10 +428,6 @@ class Scan(RowLevelSecurityProtectedModel):
fields=["provider", "state", "trigger", "scheduled_at"],
name="scans_prov_state_trig_sche_idx",
),
models.Index(
fields=["tenant_id", "provider_id", "state", "inserted_at"],
name="scans_prov_state_insert_idx",
),
]
class JSONAPIMeta:
@@ -528,11 +509,6 @@ class Resource(RowLevelSecurityProtectedModel):
editable=False,
)
metadata = models.TextField(blank=True, null=True)
details = models.TextField(blank=True, null=True)
partition = models.TextField(blank=True, null=True)
# Relationships
tags = models.ManyToManyField(
ResourceTag,
verbose_name="Tags associated with the resource, by provider",
@@ -568,10 +544,6 @@ class Resource(RowLevelSecurityProtectedModel):
fields=["uid", "region", "service", "name"],
name="resource_uid_reg_serv_name_idx",
),
models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
]
@@ -619,12 +591,6 @@ class ResourceTagMapping(RowLevelSecurityProtectedModel):
),
]
indexes = [
models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
]
class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
"""
@@ -649,7 +615,6 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
id = models.UUIDField(primary_key=True, default=uuid7, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
first_seen_at = models.DateTimeField(editable=False, null=True)
uid = models.CharField(max_length=300)
delta = FindingDeltaEnumField(
@@ -670,8 +635,6 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
tags = models.JSONField(default=dict, null=True, blank=True)
check_id = models.CharField(max_length=100, blank=False, null=False)
check_metadata = models.JSONField(default=dict, null=False)
muted = models.BooleanField(default=False, null=False)
compliance = models.JSONField(default=dict, null=True, blank=True)
# Relationships
scan = models.ForeignKey(to=Scan, related_name="findings", on_delete=models.CASCADE)
@@ -725,17 +688,7 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
],
name="findings_filter_idx",
),
models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
models.Index(
fields=["tenant_id", "id"],
condition=Q(delta="new"),
name="find_delta_new_idx",
),
]
class JSONAPIMeta:
@@ -1146,89 +1099,6 @@ class ScanSummary(RowLevelSecurityProtectedModel):
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
indexes = [
models.Index(
fields=["tenant_id", "scan_id"],
name="scan_summaries_tenant_scan_idx",
)
]
class JSONAPIMeta:
resource_name = "scan-summaries"
class Integration(RowLevelSecurityProtectedModel):
class IntegrationChoices(models.TextChoices):
S3 = "amazon_s3", _("Amazon S3")
SAML = "saml", _("SAML")
AWS_SECURITY_HUB = "aws_security_hub", _("AWS Security Hub")
JIRA = "jira", _("JIRA")
SLACK = "slack", _("Slack")
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
enabled = models.BooleanField(default=False)
connected = models.BooleanField(null=True, blank=True)
connection_last_checked_at = models.DateTimeField(null=True, blank=True)
integration_type = IntegrationTypeEnumField(choices=IntegrationChoices.choices)
configuration = models.JSONField(default=dict)
_credentials = models.BinaryField(db_column="credentials")
providers = models.ManyToManyField(
Provider,
related_name="integrations",
through="IntegrationProviderRelationship",
blank=True,
)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "integrations"
constraints = [
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
class JSONAPIMeta:
resource_name = "integrations"
@property
def credentials(self):
if isinstance(self._credentials, memoryview):
encrypted_bytes = self._credentials.tobytes()
elif isinstance(self._credentials, str):
encrypted_bytes = self._credentials.encode()
else:
encrypted_bytes = self._credentials
decrypted_data = fernet.decrypt(encrypted_bytes)
return json.loads(decrypted_data.decode())
@credentials.setter
def credentials(self, value):
encrypted_data = fernet.encrypt(json.dumps(value).encode())
self._credentials = encrypted_data
class IntegrationProviderRelationship(RowLevelSecurityProtectedModel):
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
integration = models.ForeignKey(Integration, on_delete=models.CASCADE)
provider = models.ForeignKey(Provider, on_delete=models.CASCADE)
inserted_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "integration_provider_mappings"
constraints = [
models.UniqueConstraint(
fields=["integration_id", "provider_id"],
name="unique_integration_provider_rel",
),
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]

View File

@@ -2,7 +2,8 @@ from typing import Any
from uuid import uuid4
from django.core.exceptions import ValidationError
from django.db import DEFAULT_DB_ALIAS, models
from django.db import DEFAULT_DB_ALIAS
from django.db import models
from django.db.backends.ddl_references import Statement, Table
from api.db_utils import DB_USER, POSTGRES_TENANT_VAR
@@ -58,11 +59,11 @@ class RowLevelSecurityConstraint(models.BaseConstraint):
drop_sql_query = """
ALTER TABLE %(table_name)s NO FORCE ROW LEVEL SECURITY;
ALTER TABLE %(table_name)s DISABLE ROW LEVEL SECURITY;
REVOKE ALL ON TABLE %(table_name)s FROM %(db_user)s;
REVOKE ALL ON TABLE %(table_name) TO %(db_user)s;
"""
drop_policy_sql_query = """
DROP POLICY IF EXISTS %(db_user)s_%(raw_table_name)s_{statement} ON %(table_name)s;
DROP POLICY IF EXISTS %(db_user)s_%(table_name)s_{statement} on %(table_name)s;
"""
def __init__(
@@ -87,7 +88,9 @@ class RowLevelSecurityConstraint(models.BaseConstraint):
f"{grant_queries}{self.grant_sql_query.format(statement=statement)}"
)
full_create_sql_query = f"{self.rls_sql_query}{policy_queries}{grant_queries}"
full_create_sql_query = (
f"{self.rls_sql_query}" f"{policy_queries}" f"{grant_queries}"
)
table_name = model._meta.db_table
if self.partition_name:
@@ -104,20 +107,16 @@ class RowLevelSecurityConstraint(models.BaseConstraint):
def remove_sql(self, model: Any, schema_editor: Any) -> Any:
field_column = schema_editor.quote_name(self.target_field)
raw_table_name = model._meta.db_table
table_name = raw_table_name
if self.partition_name:
raw_table_name = f"{raw_table_name}_{self.partition_name}"
table_name = raw_table_name
full_drop_sql_query = (
f"{self.drop_sql_query}"
f"{''.join([self.drop_policy_sql_query.format(statement=statement) for statement in self.statements])}"
f"{''.join([self.drop_policy_sql_query.format(statement) for statement in self.statements])}"
)
table_name = model._meta.db_table
if self.partition_name:
table_name = f"{table_name}_{self.partition_name}"
return Statement(
full_drop_sql_query,
table_name=Table(table_name, schema_editor.quote_name),
raw_table_name=raw_table_name,
field_column=field_column,
db_user=DB_USER,
partition_name=self.partition_name,

View File

@@ -1,12 +1,12 @@
from celery import states
from celery.signals import before_task_publish
from config.celery import celery_app
from django.db.models.signals import post_delete
from django.dispatch import receiver
from django_celery_beat.models import PeriodicTask
from django_celery_results.backends.database import DatabaseBackend
from api.db_utils import delete_related_daily_task
from api.models import Provider
from config.celery import celery_app
def create_task_result_on_publish(sender=None, headers=None, **kwargs): # noqa: F841
@@ -31,4 +31,5 @@ before_task_publish.connect(
@receiver(post_delete, sender=Provider)
def delete_provider_scan_task(sender, instance, **kwargs): # noqa: F841
# Delete the associated periodic task when the provider is deleted
delete_related_daily_task(instance.id)
task_name = f"scan-perform-scheduled-{instance.id}"
PeriodicTask.objects.filter(name=task_name).delete()

File diff suppressed because it is too large Load Diff

View File

@@ -3,8 +3,6 @@ from conftest import TEST_PASSWORD, get_api_tokens, get_authorization_header
from django.urls import reverse
from rest_framework.test import APIClient
from api.models import Membership, User
@pytest.mark.django_db
def test_basic_authentication():
@@ -179,122 +177,3 @@ def test_user_me_when_inviting_users(create_test_user, tenants_fixture, roles_fi
user2_me = client.get(reverse("user-me"), headers=user2_headers)
assert user2_me.status_code == 200
assert user2_me.json()["data"]["attributes"]["email"] == user2_email
@pytest.mark.django_db
class TestTokenSwitchTenant:
def test_switch_tenant_with_valid_token(self, tenants_fixture, providers_fixture):
client = APIClient()
test_user = "test_email@prowler.com"
test_password = "test_password"
# Check that we can create a new user without any kind of authentication
user_creation_response = client.post(
reverse("user-list"),
data={
"data": {
"type": "users",
"attributes": {
"name": "test",
"email": test_user,
"password": test_password,
},
}
},
format="vnd.api+json",
)
assert user_creation_response.status_code == 201
# Create a new relationship between this user and another tenant
tenant_id = tenants_fixture[0].id
user_instance = User.objects.get(email=test_user)
Membership.objects.create(user=user_instance, tenant_id=tenant_id)
# Check that using our new user's credentials we can authenticate and get the providers
access_token, _ = get_api_tokens(client, test_user, test_password)
auth_headers = get_authorization_header(access_token)
user_me_response = client.get(
reverse("user-me"),
headers=auth_headers,
)
assert user_me_response.status_code == 200
# Assert this user belongs to two tenants
assert (
user_me_response.json()["data"]["relationships"]["memberships"]["meta"][
"count"
]
== 2
)
provider_response = client.get(
reverse("provider-list"),
headers=auth_headers,
)
assert provider_response.status_code == 200
# Empty response since there are no providers in this tenant
assert not provider_response.json()["data"]
switch_tenant_response = client.post(
reverse("token-switch"),
data={
"data": {
"type": "tokens-switch-tenant",
"attributes": {"tenant_id": tenant_id},
}
},
headers=auth_headers,
)
assert switch_tenant_response.status_code == 200
new_access_token = switch_tenant_response.json()["data"]["attributes"]["access"]
new_auth_headers = get_authorization_header(new_access_token)
provider_response = client.get(
reverse("provider-list"),
headers=new_auth_headers,
)
assert provider_response.status_code == 200
# Now it must be data because we switched to another tenant with providers
assert provider_response.json()["data"]
def test_switch_tenant_with_invalid_token(self, create_test_user, tenants_fixture):
client = APIClient()
access_token, refresh_token = get_api_tokens(
client, create_test_user.email, TEST_PASSWORD
)
auth_headers = get_authorization_header(access_token)
invalid_token_response = client.post(
reverse("token-switch"),
data={
"data": {
"type": "tokens-switch-tenant",
"attributes": {"tenant_id": "invalid_tenant_id"},
}
},
headers=auth_headers,
)
assert invalid_token_response.status_code == 400
assert invalid_token_response.json()["errors"][0]["code"] == "invalid"
assert (
invalid_token_response.json()["errors"][0]["detail"]
== "Must be a valid UUID."
)
invalid_tenant_response = client.post(
reverse("token-switch"),
data={
"data": {
"type": "tokens-switch-tenant",
"attributes": {"tenant_id": tenants_fixture[-1].id},
}
},
headers=auth_headers,
)
assert invalid_tenant_response.status_code == 400
assert invalid_tenant_response.json()["errors"][0]["code"] == "invalid"
assert invalid_tenant_response.json()["errors"][0]["detail"] == (
"Tenant does not exist or user is not a " "member."
)

View File

@@ -131,10 +131,9 @@ class TestBatchDelete:
return provider_count
@pytest.mark.django_db
def test_batch_delete(self, tenants_fixture, create_test_providers):
tenant_id = str(tenants_fixture[0].id)
def test_batch_delete(self, create_test_providers):
_, summary = batch_delete(
tenant_id, Provider.objects.all(), batch_size=create_test_providers // 2
Provider.objects.all(), batch_size=create_test_providers // 2
)
assert Provider.objects.all().count() == 0
assert summary == {"api.Provider": create_test_providers}

View File

@@ -92,31 +92,3 @@ class TestResourceModel:
assert len(resource.tags.filter(tenant_id=tenant_id)) == 0
assert resource.get_tags(tenant_id=tenant_id) == {}
# @pytest.mark.django_db
# class TestFindingModel:
# def test_add_finding_with_long_uid(
# self, providers_fixture, scans_fixture, resources_fixture
# ):
# provider, *_ = providers_fixture
# tenant_id = provider.tenant_id
# long_uid = "1" * 500
# _ = Finding.objects.create(
# tenant_id=tenant_id,
# uid=long_uid,
# delta=Finding.DeltaChoices.NEW,
# check_metadata={},
# status=StatusChoices.PASS,
# status_extended="",
# severity="high",
# impact="high",
# raw_result={},
# check_id="test_check",
# scan=scans_fixture[0],
# first_seen_at=None,
# muted=False,
# compliance={},
# )
# assert Finding.objects.filter(uid=long_uid).exists()

View File

@@ -1,19 +1,7 @@
from unittest.mock import ANY, Mock, patch
import pytest
from django.urls import reverse
from rest_framework import status
from api.models import (
Membership,
ProviderGroup,
ProviderGroupMembership,
Role,
RoleProviderGroupRelationship,
User,
UserRoleRelationship,
)
from api.v1.serializers import TokenSerializer
from unittest.mock import patch, ANY, Mock
@pytest.mark.django_db
@@ -316,96 +304,3 @@ class TestProviderViewSet:
reverse("provider-connection", kwargs={"pk": provider.id})
)
assert response.status_code == status.HTTP_403_FORBIDDEN
@pytest.mark.django_db
class TestLimitedVisibility:
TEST_EMAIL = "rbac@rbac.com"
TEST_PASSWORD = "thisisapassword123"
@pytest.fixture
def limited_admin_user(
self, django_db_setup, django_db_blocker, tenants_fixture, providers_fixture
):
with django_db_blocker.unblock():
tenant = tenants_fixture[0]
provider = providers_fixture[0]
user = User.objects.create_user(
name="testing",
email=self.TEST_EMAIL,
password=self.TEST_PASSWORD,
)
Membership.objects.create(
user=user,
tenant=tenant,
role=Membership.RoleChoices.OWNER,
)
role = Role.objects.create(
name="limited_visibility",
tenant=tenant,
manage_users=True,
manage_account=True,
manage_billing=True,
manage_providers=True,
manage_integrations=True,
manage_scans=True,
unlimited_visibility=False,
)
UserRoleRelationship.objects.create(
user=user,
role=role,
tenant=tenant,
)
provider_group = ProviderGroup.objects.create(
name="limited_visibility_group",
tenant=tenant,
)
ProviderGroupMembership.objects.create(
tenant=tenant,
provider=provider,
provider_group=provider_group,
)
RoleProviderGroupRelationship.objects.create(
tenant=tenant, role=role, provider_group=provider_group
)
return user
@pytest.fixture
def authenticated_client_rbac_limited(
self, limited_admin_user, tenants_fixture, client
):
client.user = limited_admin_user
tenant_id = tenants_fixture[0].id
serializer = TokenSerializer(
data={
"type": "tokens",
"email": self.TEST_EMAIL,
"password": self.TEST_PASSWORD,
"tenant_id": tenant_id,
}
)
serializer.is_valid(raise_exception=True)
access_token = serializer.validated_data["access"]
client.defaults["HTTP_AUTHORIZATION"] = f"Bearer {access_token}"
return client
def test_integrations(
self, authenticated_client_rbac_limited, integrations_fixture, providers_fixture
):
# Integration 2 is related to provider1 and provider 2
# This user cannot see provider 2
integration = integrations_fixture[1]
response = authenticated_client_rbac_limited.get(
reverse("integration-detail", kwargs={"pk": integration.id})
)
assert response.status_code == status.HTTP_200_OK
assert integration.providers.count() == 2
assert (
response.json()["data"]["relationships"]["providers"]["meta"]["count"] == 1
)

View File

@@ -1,25 +1,25 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
from unittest.mock import patch, MagicMock
import pytest
from rest_framework.exceptions import NotFound, ValidationError
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Invitation, Provider
from api.utils import (
get_prowler_provider_kwargs,
initialize_prowler_provider,
merge_dicts,
prowler_provider_connection_test,
return_prowler_provider,
validate_invitation,
)
from prowler.providers.aws.aws_provider import AwsProvider
from prowler.providers.azure.azure_provider import AzureProvider
from prowler.providers.gcp.gcp_provider import GcpProvider
from prowler.providers.kubernetes.kubernetes_provider import KubernetesProvider
from prowler.providers.m365.m365_provider import M365Provider
from rest_framework.exceptions import ValidationError, NotFound
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Invitation
from api.models import Provider
from api.utils import (
merge_dicts,
return_prowler_provider,
initialize_prowler_provider,
prowler_provider_connection_test,
get_prowler_provider_kwargs,
)
from api.utils import validate_invitation
class TestMergeDicts:
@@ -105,7 +105,6 @@ class TestReturnProwlerProvider:
(Provider.ProviderChoices.GCP.value, GcpProvider),
(Provider.ProviderChoices.AZURE.value, AzureProvider),
(Provider.ProviderChoices.KUBERNETES.value, KubernetesProvider),
(Provider.ProviderChoices.M365.value, M365Provider),
],
)
def test_return_prowler_provider(self, provider_type, expected_provider):
@@ -145,18 +144,6 @@ class TestProwlerProviderConnectionTest:
key="value", provider_id="1234567890", raise_on_exception=False
)
@pytest.mark.django_db
@patch("api.utils.return_prowler_provider")
def test_prowler_provider_connection_test_without_secret(
self, mock_return_prowler_provider, providers_fixture
):
mock_return_prowler_provider.return_value = MagicMock()
connection = prowler_provider_connection_test(providers_fixture[0])
assert connection.is_connected is False
assert isinstance(connection.error, Provider.secret.RelatedObjectDoesNotExist)
assert str(connection.error) == "Provider has no secret."
class TestGetProwlerProviderKwargs:
@pytest.mark.parametrize(
@@ -178,10 +165,6 @@ class TestGetProwlerProviderKwargs:
Provider.ProviderChoices.KUBERNETES.value,
{"context": "provider_uid"},
),
(
Provider.ProviderChoices.M365.value,
{},
),
],
)
def test_get_prowler_provider_kwargs(self, provider_type, expected_extra_kwargs):
@@ -291,10 +274,9 @@ class TestValidateInvitation:
expired_time = datetime.now(timezone.utc) - timedelta(days=1)
invitation.expires_at = expired_time
with (
patch("api.utils.Invitation.objects.using") as mock_using,
patch("api.utils.datetime") as mock_datetime,
):
with patch("api.utils.Invitation.objects.using") as mock_using, patch(
"api.utils.datetime"
) as mock_datetime:
mock_db = mock_using.return_value
mock_db.get.return_value = invitation
mock_datetime.now.return_value = datetime.now(timezone.utc)

File diff suppressed because it is too large Load Diff

View File

@@ -1,26 +1,15 @@
from datetime import datetime, timezone
from allauth.socialaccount.providers.oauth2.client import OAuth2Client
from rest_framework.exceptions import NotFound, ValidationError
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Invitation, Provider
from prowler.providers.aws.aws_provider import AwsProvider
from prowler.providers.azure.azure_provider import AzureProvider
from prowler.providers.common.models import Connection
from prowler.providers.gcp.gcp_provider import GcpProvider
from prowler.providers.kubernetes.kubernetes_provider import KubernetesProvider
from prowler.providers.m365.m365_provider import M365Provider
from rest_framework.exceptions import ValidationError, NotFound
class CustomOAuth2Client(OAuth2Client):
def __init__(self, client_id, secret, *args, **kwargs):
# Remove any duplicate "scope_delimiter" from kwargs
# Bug present in dj-rest-auth after version v7.0.1
# https://github.com/iMerica/dj-rest-auth/issues/673
kwargs.pop("scope_delimiter", None)
super().__init__(client_id, secret, *args, **kwargs)
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Provider, Invitation
def merge_dicts(default_dict: dict, replacement_dict: dict) -> dict:
@@ -52,14 +41,14 @@ def merge_dicts(default_dict: dict, replacement_dict: dict) -> dict:
def return_prowler_provider(
provider: Provider,
) -> [AwsProvider | AzureProvider | GcpProvider | KubernetesProvider | M365Provider]:
) -> [AwsProvider | AzureProvider | GcpProvider | KubernetesProvider]:
"""Return the Prowler provider class based on the given provider type.
Args:
provider (Provider): The provider object containing the provider type and associated secrets.
Returns:
AwsProvider | AzureProvider | GcpProvider | KubernetesProvider | M365Provider: The corresponding provider class.
AwsProvider | AzureProvider | GcpProvider | KubernetesProvider: The corresponding provider class.
Raises:
ValueError: If the provider type specified in `provider.provider` is not supported.
@@ -73,8 +62,6 @@ def return_prowler_provider(
prowler_provider = AzureProvider
case Provider.ProviderChoices.KUBERNETES.value:
prowler_provider = KubernetesProvider
case Provider.ProviderChoices.M365.value:
prowler_provider = M365Provider
case _:
raise ValueError(f"Provider type {provider.provider} not supported")
return prowler_provider
@@ -107,15 +94,15 @@ def get_prowler_provider_kwargs(provider: Provider) -> dict:
def initialize_prowler_provider(
provider: Provider,
) -> AwsProvider | AzureProvider | GcpProvider | KubernetesProvider | M365Provider:
) -> AwsProvider | AzureProvider | GcpProvider | KubernetesProvider:
"""Initialize a Prowler provider instance based on the given provider type.
Args:
provider (Provider): The provider object containing the provider type and associated secrets.
Returns:
AwsProvider | AzureProvider | GcpProvider | KubernetesProvider | M365Provider: An instance of the corresponding provider class
(`AwsProvider`, `AzureProvider`, `GcpProvider`, `KubernetesProvider` or `M365Provider`) initialized with the
AwsProvider | AzureProvider | GcpProvider | KubernetesProvider: An instance of the corresponding provider class
(`AwsProvider`, `AzureProvider`, `GcpProvider`, or `KubernetesProvider`) initialized with the
provider's secrets.
"""
prowler_provider = return_prowler_provider(provider)
@@ -133,12 +120,7 @@ def prowler_provider_connection_test(provider: Provider) -> Connection:
Connection: A connection object representing the result of the connection test for the specified provider.
"""
prowler_provider = return_prowler_provider(provider)
try:
prowler_provider_kwargs = provider.secret.secret
except Provider.secret.RelatedObjectDoesNotExist as secret_error:
return Connection(is_connected=False, error=secret_error)
prowler_provider_kwargs = provider.secret.secret
return prowler_provider.test_connection(
**prowler_provider_kwargs, provider_id=provider.uid, raise_on_exception=False
)

View File

@@ -106,7 +106,7 @@ def uuid7_end(uuid_obj: UUID, offset_months: int = 1) -> UUID:
Args:
uuid_obj: A UUIDv7 object.
offset_months: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
offset_days: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
partitions are not being used, if so the value will be the one set at FINDINGS_TABLE_PARTITION_MONTHS.
Returns:

View File

@@ -1,122 +0,0 @@
from drf_spectacular.utils import extend_schema_field
from rest_framework_json_api import serializers
from rest_framework_json_api.serializers import ValidationError
class BaseValidateSerializer(serializers.Serializer):
def validate(self, data):
if hasattr(self, "initial_data"):
initial_data = set(self.initial_data.keys()) - {"id", "type"}
unknown_keys = initial_data - set(self.fields.keys())
if unknown_keys:
raise ValidationError(f"Invalid fields: {unknown_keys}")
return data
# Integrations
class S3ConfigSerializer(BaseValidateSerializer):
bucket_name = serializers.CharField()
output_directory = serializers.CharField()
class Meta:
resource_name = "integrations"
class AWSCredentialSerializer(BaseValidateSerializer):
role_arn = serializers.CharField(required=False)
external_id = serializers.CharField(required=False)
role_session_name = serializers.CharField(required=False)
session_duration = serializers.IntegerField(
required=False, min_value=900, max_value=43200
)
aws_access_key_id = serializers.CharField(required=False)
aws_secret_access_key = serializers.CharField(required=False)
aws_session_token = serializers.CharField(required=False)
class Meta:
resource_name = "integrations"
@extend_schema_field(
{
"oneOf": [
{
"type": "object",
"title": "AWS Credentials",
"properties": {
"role_arn": {
"type": "string",
"description": "The Amazon Resource Name (ARN) of the role to assume. Required for AWS role "
"assumption.",
},
"external_id": {
"type": "string",
"description": "An identifier to enhance security for role assumption.",
},
"aws_access_key_id": {
"type": "string",
"description": "The AWS access key ID. Only required if the environment lacks pre-configured "
"AWS credentials.",
},
"aws_secret_access_key": {
"type": "string",
"description": "The AWS secret access key. Required if 'aws_access_key_id' is provided or if "
"no AWS credentials are pre-configured.",
},
"aws_session_token": {
"type": "string",
"description": "The session token for temporary credentials, if applicable.",
},
"session_duration": {
"type": "integer",
"minimum": 900,
"maximum": 43200,
"default": 3600,
"description": "The duration (in seconds) for the role session.",
},
"role_session_name": {
"type": "string",
"description": "An identifier for the role session, useful for tracking sessions in AWS logs. "
"The regex used to validate this parameter is a string of characters consisting of "
"upper- and lower-case alphanumeric characters with no spaces. You can also include "
"underscores or any of the following characters: =,.@-\n\n"
"Examples:\n"
"- MySession123\n"
"- User_Session-1\n"
"- Test.Session@2",
"pattern": "^[a-zA-Z0-9=,.@_-]+$",
},
},
},
]
}
)
class IntegrationCredentialField(serializers.JSONField):
pass
@extend_schema_field(
{
"oneOf": [
{
"type": "object",
"title": "Amazon S3",
"properties": {
"bucket_name": {
"type": "string",
"description": "The name of the S3 bucket where files will be stored.",
},
"output_directory": {
"type": "string",
"description": "The directory path within the bucket where files will be saved.",
},
},
"required": ["bucket_name", "output_directory"],
},
]
}
)
class IntegrationConfigField(serializers.JSONField):
pass

View File

@@ -1,172 +0,0 @@
from drf_spectacular.utils import extend_schema_field
from rest_framework_json_api import serializers
@extend_schema_field(
{
"oneOf": [
{
"type": "object",
"title": "AWS Static Credentials",
"properties": {
"aws_access_key_id": {
"type": "string",
"description": "The AWS access key ID. Required for environments where no IAM role is being "
"assumed and direct AWS access is needed.",
},
"aws_secret_access_key": {
"type": "string",
"description": "The AWS secret access key. Must accompany 'aws_access_key_id' to authorize "
"access to AWS resources.",
},
"aws_session_token": {
"type": "string",
"description": "The session token associated with temporary credentials. Only needed for "
"session-based or temporary AWS access.",
},
},
"required": ["aws_access_key_id", "aws_secret_access_key"],
},
{
"type": "object",
"title": "AWS Assume Role",
"properties": {
"role_arn": {
"type": "string",
"description": "The Amazon Resource Name (ARN) of the role to assume. Required for AWS role "
"assumption.",
},
"external_id": {
"type": "string",
"description": "An identifier to enhance security for role assumption.",
},
"aws_access_key_id": {
"type": "string",
"description": "The AWS access key ID. Only required if the environment lacks pre-configured "
"AWS credentials.",
},
"aws_secret_access_key": {
"type": "string",
"description": "The AWS secret access key. Required if 'aws_access_key_id' is provided or if "
"no AWS credentials are pre-configured.",
},
"aws_session_token": {
"type": "string",
"description": "The session token for temporary credentials, if applicable.",
},
"session_duration": {
"type": "integer",
"minimum": 900,
"maximum": 43200,
"default": 3600,
"description": "The duration (in seconds) for the role session.",
},
"role_session_name": {
"type": "string",
"description": "An identifier for the role session, useful for tracking sessions in AWS logs. "
"The regex used to validate this parameter is a string of characters consisting of "
"upper- and lower-case alphanumeric characters with no spaces. You can also include "
"underscores or any of the following characters: =,.@-\n\n"
"Examples:\n"
"- MySession123\n"
"- User_Session-1\n"
"- Test.Session@2",
"pattern": "^[a-zA-Z0-9=,.@_-]+$",
},
},
"required": ["role_arn", "external_id"],
},
{
"type": "object",
"title": "Azure Static Credentials",
"properties": {
"client_id": {
"type": "string",
"description": "The Azure application (client) ID for authentication in Azure AD.",
},
"client_secret": {
"type": "string",
"description": "The client secret associated with the application (client) ID, providing "
"secure access.",
},
"tenant_id": {
"type": "string",
"description": "The Azure tenant ID, representing the directory where the application is "
"registered.",
},
},
"required": ["client_id", "client_secret", "tenant_id"],
},
{
"type": "object",
"title": "M365 Static Credentials",
"properties": {
"client_id": {
"type": "string",
"description": "The Azure application (client) ID for authentication in Azure AD.",
},
"client_secret": {
"type": "string",
"description": "The client secret associated with the application (client) ID, providing "
"secure access.",
},
"tenant_id": {
"type": "string",
"description": "The Azure tenant ID, representing the directory where the application is "
"registered.",
},
"user": {
"type": "email",
"description": "User microsoft email address.",
},
"encrypted_password": {
"type": "string",
"description": "User encrypted password.",
},
},
"required": [
"client_id",
"client_secret",
"tenant_id",
"user",
"encrypted_password",
],
},
{
"type": "object",
"title": "GCP Static Credentials",
"properties": {
"client_id": {
"type": "string",
"description": "The client ID from Google Cloud, used to identify the application for GCP "
"access.",
},
"client_secret": {
"type": "string",
"description": "The client secret associated with the GCP client ID, required for secure "
"access.",
},
"refresh_token": {
"type": "string",
"description": "A refresh token that allows the application to obtain new access tokens for "
"extended use.",
},
},
"required": ["client_id", "client_secret", "refresh_token"],
},
{
"type": "object",
"title": "Kubernetes Static Credentials",
"properties": {
"kubeconfig_content": {
"type": "string",
"description": "The content of the Kubernetes kubeconfig file, encoded as a string.",
}
},
"required": ["kubeconfig_content"],
},
]
}
)
class ProviderSecretField(serializers.JSONField):
pass

View File

@@ -16,8 +16,6 @@ from rest_framework_simplejwt.tokens import RefreshToken
from api.models import (
ComplianceOverview,
Finding,
Integration,
IntegrationProviderRelationship,
Invitation,
InvitationRoleRelationship,
Membership,
@@ -36,76 +34,11 @@ from api.models import (
UserRoleRelationship,
)
from api.rls import Tenant
from api.v1.serializer_utils.integrations import (
AWSCredentialSerializer,
IntegrationConfigField,
IntegrationCredentialField,
S3ConfigSerializer,
)
from api.v1.serializer_utils.providers import ProviderSecretField
# Tokens
def generate_tokens(user: User, tenant_id: str) -> dict:
try:
refresh = RefreshToken.for_user(user)
except InvalidKeyError:
# Handle invalid key error
raise ValidationError(
{
"detail": "Token generation failed due to invalid key configuration. Provide valid "
"DJANGO_TOKEN_SIGNING_KEY and DJANGO_TOKEN_VERIFYING_KEY in the environment."
}
)
except Exception as e:
raise ValidationError({"detail": str(e)})
# Post-process the tokens
# Set the tenant_id
refresh["tenant_id"] = tenant_id
# Set the nbf (not before) claim to the iat (issued at) claim. At this moment, simplejwt does not provide a
# way to set the nbf claim
refresh.payload["nbf"] = refresh["iat"]
# Get the access token
access = refresh.access_token
if settings.SIMPLE_JWT["UPDATE_LAST_LOGIN"]:
update_last_login(None, user)
return {"access": str(access), "refresh": str(refresh)}
class BaseTokenSerializer(TokenObtainPairSerializer):
def custom_validate(self, attrs, social: bool = False):
email = attrs.get("email")
password = attrs.get("password")
tenant_id = str(attrs.get("tenant_id", ""))
# Authenticate user
user = (
User.objects.get(email=email)
if social
else authenticate(username=email, password=password)
)
if user is None:
raise ValidationError("Invalid credentials")
if tenant_id:
if not user.is_member_of_tenant(tenant_id):
raise ValidationError("Tenant does not exist or user is not a member.")
else:
first_membership = user.memberships.order_by("date_joined").first()
if first_membership is None:
raise ValidationError("User has no memberships.")
tenant_id = str(first_membership.tenant_id)
return generate_tokens(user, tenant_id)
class TokenSerializer(BaseTokenSerializer):
class TokenSerializer(TokenObtainPairSerializer):
email = serializers.EmailField(write_only=True)
password = serializers.CharField(write_only=True)
tenant_id = serializers.UUIDField(
@@ -123,25 +56,53 @@ class TokenSerializer(BaseTokenSerializer):
resource_name = "tokens"
def validate(self, attrs):
return super().custom_validate(attrs)
email = attrs.get("email")
password = attrs.get("password")
tenant_id = str(attrs.get("tenant_id", ""))
# Authenticate user
user = authenticate(username=email, password=password)
if user is None:
raise ValidationError("Invalid credentials")
class TokenSocialLoginSerializer(BaseTokenSerializer):
email = serializers.EmailField(write_only=True)
if tenant_id:
if not user.is_member_of_tenant(tenant_id):
raise ValidationError("Tenant does not exist or user is not a member.")
else:
first_membership = user.memberships.order_by("date_joined").first()
if first_membership is None:
raise ValidationError("User has no memberships.")
tenant_id = str(first_membership.tenant_id)
# Output tokens
refresh = serializers.CharField(read_only=True)
access = serializers.CharField(read_only=True)
# Generate tokens
try:
refresh = RefreshToken.for_user(user)
except InvalidKeyError:
# Handle invalid key error
raise ValidationError(
{
"detail": "Token generation failed due to invalid key configuration. Provide valid "
"DJANGO_TOKEN_SIGNING_KEY and DJANGO_TOKEN_VERIFYING_KEY in the environment."
}
)
except Exception as e:
raise ValidationError({"detail": str(e)})
class JSONAPIMeta:
resource_name = "tokens"
# Post-process the tokens
# Set the tenant_id
refresh["tenant_id"] = tenant_id
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fields.pop("password", None)
# Set the nbf (not before) claim to the iat (issued at) claim. At this moment, simplejwt does not provide a
# way to set the nbf claim
refresh.payload["nbf"] = refresh["iat"]
def validate(self, attrs):
return super().custom_validate(attrs, social=True)
# Get the access token
access = refresh.access_token
if settings.SIMPLE_JWT["UPDATE_LAST_LOGIN"]:
update_last_login(None, user)
return {"access": str(access), "refresh": str(refresh)}
# TODO: Check if we can change the parent class to TokenRefreshSerializer from rest_framework_simplejwt.serializers
@@ -179,30 +140,6 @@ class TokenRefreshSerializer(serializers.Serializer):
raise ValidationError({"refresh": "Invalid or expired token"})
class TokenSwitchTenantSerializer(serializers.Serializer):
tenant_id = serializers.UUIDField(
write_only=True, help_text="The tenant ID for which to request a new token."
)
access = serializers.CharField(read_only=True)
refresh = serializers.CharField(read_only=True)
class JSONAPIMeta:
resource_name = "tokens-switch-tenant"
def validate(self, attrs):
request = self.context["request"]
user = request.user
if not user.is_authenticated:
raise ValidationError("Invalid or expired token.")
tenant_id = str(attrs.get("tenant_id"))
if not user.is_member_of_tenant(tenant_id):
raise ValidationError("Tenant does not exist or user is not a member.")
return generate_tokens(user, tenant_id)
# Base
@@ -754,43 +691,6 @@ class ProviderSerializer(RLSSerializer):
}
class ProviderIncludeSerializer(RLSSerializer):
"""
Serializer for the Provider model.
"""
provider = ProviderEnumSerializerField()
connection = serializers.SerializerMethodField(read_only=True)
class Meta:
model = Provider
fields = [
"id",
"inserted_at",
"updated_at",
"provider",
"uid",
"alias",
"connection",
# "scanner_args",
]
@extend_schema_field(
{
"type": "object",
"properties": {
"connected": {"type": "boolean"},
"last_checked_at": {"type": "string", "format": "date-time"},
},
}
)
def get_connection(self, obj):
return {
"connected": obj.connected,
"last_checked_at": obj.connection_last_checked_at,
}
class ProviderCreateSerializer(RLSSerializer, BaseWriteSerializer):
class Meta:
model = Provider
@@ -852,39 +752,6 @@ class ScanSerializer(RLSSerializer):
"url",
]
included_serializers = {
"provider": "api.v1.serializers.ProviderIncludeSerializer",
}
class ScanIncludeSerializer(RLSSerializer):
trigger = serializers.ChoiceField(
choices=Scan.TriggerChoices.choices, read_only=True
)
state = StateEnumSerializerField(read_only=True)
class Meta:
model = Scan
fields = [
"id",
"name",
"trigger",
"state",
"unique_resource_count",
"progress",
# "scanner_args",
"duration",
"inserted_at",
"started_at",
"completed_at",
"scheduled_at",
"provider",
]
included_serializers = {
"provider": "api.v1.serializers.ProviderIncludeSerializer",
}
class ScanCreateSerializer(RLSSerializer, BaseWriteSerializer):
class Meta:
@@ -952,23 +819,6 @@ class ScanTaskSerializer(RLSSerializer):
]
class ScanReportSerializer(serializers.Serializer):
id = serializers.CharField(source="scan")
class Meta:
resource_name = "scan-reports"
fields = ["id"]
class ScanComplianceReportSerializer(serializers.Serializer):
id = serializers.CharField(source="scan")
name = serializers.CharField()
class Meta:
resource_name = "scan-reports"
fields = ["id", "name"]
class ResourceTagSerializer(RLSSerializer):
"""
Serializer for the ResourceTag model
@@ -1034,51 +884,6 @@ class ResourceSerializer(RLSSerializer):
return fields
class ResourceIncludeSerializer(RLSSerializer):
"""
Serializer for the Resource model.
"""
tags = serializers.SerializerMethodField()
type_ = serializers.CharField(read_only=True)
class Meta:
model = Resource
fields = [
"id",
"inserted_at",
"updated_at",
"uid",
"name",
"region",
"service",
"type_",
"tags",
]
extra_kwargs = {
"id": {"read_only": True},
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
}
@extend_schema_field(
{
"type": "object",
"description": "Tags associated with the resource",
"example": {"env": "prod", "owner": "johndoe"},
}
)
def get_tags(self, obj):
return obj.get_tags(self.context.get("tenant_id"))
def get_fields(self):
"""`type` is a Python reserved keyword."""
fields = super().get_fields()
type_ = fields.pop("type_")
fields["type"] = type_
return fields
class FindingSerializer(RLSSerializer):
"""
Serializer for the Finding model.
@@ -1100,8 +905,6 @@ class FindingSerializer(RLSSerializer):
"raw_result",
"inserted_at",
"updated_at",
"first_seen_at",
"muted",
"url",
# Relationships
"scan",
@@ -1109,12 +912,11 @@ class FindingSerializer(RLSSerializer):
]
included_serializers = {
"scan": ScanIncludeSerializer,
"resources": ResourceIncludeSerializer,
"scan": ScanSerializer,
"resources": ResourceSerializer,
}
# To be removed when the related endpoint is removed as well
class FindingDynamicFilterSerializer(serializers.Serializer):
services = serializers.ListField(child=serializers.CharField(), allow_empty=True)
regions = serializers.ListField(child=serializers.CharField(), allow_empty=True)
@@ -1123,19 +925,6 @@ class FindingDynamicFilterSerializer(serializers.Serializer):
resource_name = "finding-dynamic-filters"
class FindingMetadataSerializer(serializers.Serializer):
services = serializers.ListField(child=serializers.CharField(), allow_empty=True)
regions = serializers.ListField(child=serializers.CharField(), allow_empty=True)
resource_types = serializers.ListField(
child=serializers.CharField(), allow_empty=True
)
# Temporarily disabled until we implement tag filtering in the UI
# tags = serializers.JSONField(help_text="Tags are described as key-value pairs.")
class Meta:
resource_name = "findings-metadata"
# Provider secrets
class BaseWriteProviderSecretSerializer(BaseWriteSerializer):
@staticmethod
@@ -1151,8 +940,6 @@ class BaseWriteProviderSecretSerializer(BaseWriteSerializer):
serializer = GCPProviderSecret(data=secret)
elif provider_type == Provider.ProviderChoices.KUBERNETES.value:
serializer = KubernetesProviderSecret(data=secret)
elif provider_type == Provider.ProviderChoices.M365.value:
serializer = M365ProviderSecret(data=secret)
else:
raise serializers.ValidationError(
{"provider": f"Provider type not supported {provider_type}"}
@@ -1192,17 +979,6 @@ class AzureProviderSecret(serializers.Serializer):
resource_name = "provider-secrets"
class M365ProviderSecret(serializers.Serializer):
client_id = serializers.CharField()
client_secret = serializers.CharField()
tenant_id = serializers.CharField()
user = serializers.EmailField()
encrypted_password = serializers.CharField()
class Meta:
resource_name = "provider-secrets"
class GCPProviderSecret(serializers.Serializer):
client_id = serializers.CharField()
client_secret = serializers.CharField()
@@ -1221,7 +997,7 @@ class KubernetesProviderSecret(serializers.Serializer):
class AWSRoleAssumptionProviderSecret(serializers.Serializer):
role_arn = serializers.CharField()
external_id = serializers.CharField()
external_id = serializers.CharField(required=False)
role_session_name = serializers.CharField(required=False)
session_duration = serializers.IntegerField(
required=False, min_value=900, max_value=43200
@@ -1234,6 +1010,142 @@ class AWSRoleAssumptionProviderSecret(serializers.Serializer):
resource_name = "provider-secrets"
@extend_schema_field(
{
"oneOf": [
{
"type": "object",
"title": "AWS Static Credentials",
"properties": {
"aws_access_key_id": {
"type": "string",
"description": "The AWS access key ID. Required for environments where no IAM role is being "
"assumed and direct AWS access is needed.",
},
"aws_secret_access_key": {
"type": "string",
"description": "The AWS secret access key. Must accompany 'aws_access_key_id' to authorize "
"access to AWS resources.",
},
"aws_session_token": {
"type": "string",
"description": "The session token associated with temporary credentials. Only needed for "
"session-based or temporary AWS access.",
},
},
"required": ["aws_access_key_id", "aws_secret_access_key"],
},
{
"type": "object",
"title": "AWS Assume Role",
"properties": {
"role_arn": {
"type": "string",
"description": "The Amazon Resource Name (ARN) of the role to assume. Required for AWS role "
"assumption.",
},
"aws_access_key_id": {
"type": "string",
"description": "The AWS access key ID. Only required if the environment lacks pre-configured "
"AWS credentials.",
},
"aws_secret_access_key": {
"type": "string",
"description": "The AWS secret access key. Required if 'aws_access_key_id' is provided or if "
"no AWS credentials are pre-configured.",
},
"aws_session_token": {
"type": "string",
"description": "The session token for temporary credentials, if applicable.",
},
"session_duration": {
"type": "integer",
"minimum": 900,
"maximum": 43200,
"default": 3600,
"description": "The duration (in seconds) for the role session.",
},
"external_id": {
"type": "string",
"description": "An optional identifier to enhance security for role assumption; may be "
"required by the role administrator.",
},
"role_session_name": {
"type": "string",
"description": "An identifier for the role session, useful for tracking sessions in AWS logs. "
"The regex used to validate this parameter is a string of characters consisting of "
"upper- and lower-case alphanumeric characters with no spaces. You can also include "
"underscores or any of the following characters: =,.@-\n\n"
"Examples:\n"
"- MySession123\n"
"- User_Session-1\n"
"- Test.Session@2",
"pattern": "^[a-zA-Z0-9=,.@_-]+$",
},
},
"required": ["role_arn"],
},
{
"type": "object",
"title": "Azure Static Credentials",
"properties": {
"client_id": {
"type": "string",
"description": "The Azure application (client) ID for authentication in Azure AD.",
},
"client_secret": {
"type": "string",
"description": "The client secret associated with the application (client) ID, providing "
"secure access.",
},
"tenant_id": {
"type": "string",
"description": "The Azure tenant ID, representing the directory where the application is "
"registered.",
},
},
"required": ["client_id", "client_secret", "tenant_id"],
},
{
"type": "object",
"title": "GCP Static Credentials",
"properties": {
"client_id": {
"type": "string",
"description": "The client ID from Google Cloud, used to identify the application for GCP "
"access.",
},
"client_secret": {
"type": "string",
"description": "The client secret associated with the GCP client ID, required for secure "
"access.",
},
"refresh_token": {
"type": "string",
"description": "A refresh token that allows the application to obtain new access tokens for "
"extended use.",
},
},
"required": ["client_id", "client_secret", "refresh_token"],
},
{
"type": "object",
"title": "Kubernetes Static Credentials",
"properties": {
"kubeconfig_content": {
"type": "string",
"description": "The content of the Kubernetes kubeconfig file, encoded as a string.",
}
},
"required": ["kubeconfig_content"],
},
]
}
)
class ProviderSecretField(serializers.JSONField):
pass
class ProviderSecretSerializer(RLSSerializer):
"""
Serializer for the ProviderSecret model.
@@ -1507,8 +1419,8 @@ class RoleSerializer(RLSSerializer, BaseWriteSerializer):
"manage_account",
# Disable for the first release
# "manage_billing",
# "manage_integrations",
# /Disable for the first release
"manage_integrations",
"manage_providers",
"manage_scans",
"permission_state",
@@ -1790,13 +1702,6 @@ class ComplianceOverviewFullSerializer(ComplianceOverviewSerializer):
return obj.requirements
class ComplianceOverviewMetadataSerializer(serializers.Serializer):
regions = serializers.ListField(child=serializers.CharField(), allow_empty=True)
class Meta:
resource_name = "compliance-overviews-metadata"
# Overviews
@@ -1817,7 +1722,7 @@ class OverviewProviderSerializer(serializers.Serializer):
"properties": {
"pass": {"type": "integer"},
"fail": {"type": "integer"},
"muted": {"type": "integer"},
"manual": {"type": "integer"},
"total": {"type": "integer"},
},
}
@@ -1826,7 +1731,7 @@ class OverviewProviderSerializer(serializers.Serializer):
return {
"pass": obj["findings_passed"],
"fail": obj["findings_failed"],
"muted": obj["findings_muted"],
"manual": obj["findings_manual"],
"total": obj["total_findings"],
}
@@ -1921,201 +1826,3 @@ class ScheduleDailyCreateSerializer(serializers.Serializer):
if unknown_keys:
raise ValidationError(f"Invalid fields: {unknown_keys}")
return data
# Integrations
class BaseWriteIntegrationSerializer(BaseWriteSerializer):
@staticmethod
def validate_integration_data(
integration_type: str,
providers: list[Provider], # noqa
configuration: dict,
credentials: dict,
):
if integration_type == Integration.IntegrationChoices.S3:
config_serializer = S3ConfigSerializer
credentials_serializers = [AWSCredentialSerializer]
# TODO: This will be required for AWS Security Hub
# if providers and not all(
# provider.provider == Provider.ProviderChoices.AWS
# for provider in providers
# ):
# raise serializers.ValidationError(
# {"providers": "All providers must be AWS for the S3 integration."}
# )
else:
raise serializers.ValidationError(
{
"integration_type": f"Integration type not supported yet: {integration_type}"
}
)
config_serializer(data=configuration).is_valid(raise_exception=True)
for cred_serializer in credentials_serializers:
try:
cred_serializer(data=credentials).is_valid(raise_exception=True)
break
except ValidationError:
continue
else:
raise ValidationError(
{"credentials": "Invalid credentials for the integration type."}
)
class IntegrationSerializer(RLSSerializer):
"""
Serializer for the Integration model.
"""
providers = serializers.ResourceRelatedField(
queryset=Provider.objects.all(), many=True
)
class Meta:
model = Integration
fields = [
"id",
"inserted_at",
"updated_at",
"enabled",
"connected",
"connection_last_checked_at",
"integration_type",
"configuration",
"providers",
"url",
]
included_serializers = {
"providers": "api.v1.serializers.ProviderIncludeSerializer",
}
def to_representation(self, instance):
representation = super().to_representation(instance)
allowed_providers = self.context.get("allowed_providers")
if allowed_providers:
allowed_provider_ids = {str(provider.id) for provider in allowed_providers}
representation["providers"] = [
provider
for provider in representation["providers"]
if provider["id"] in allowed_provider_ids
]
return representation
class IntegrationCreateSerializer(BaseWriteIntegrationSerializer):
credentials = IntegrationCredentialField(write_only=True)
configuration = IntegrationConfigField()
providers = serializers.ResourceRelatedField(
queryset=Provider.objects.all(), many=True, required=False
)
class Meta:
model = Integration
fields = [
"inserted_at",
"updated_at",
"enabled",
"connected",
"connection_last_checked_at",
"integration_type",
"configuration",
"credentials",
"providers",
]
extra_kwargs = {
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
"connected": {"read_only": True},
"enabled": {"read_only": True},
"connection_last_checked_at": {"read_only": True},
}
def validate(self, attrs):
integration_type = attrs.get("integration_type")
providers = attrs.get("providers")
configuration = attrs.get("configuration")
credentials = attrs.get("credentials")
validated_attrs = super().validate(attrs)
self.validate_integration_data(
integration_type, providers, configuration, credentials
)
return validated_attrs
def create(self, validated_data):
tenant_id = self.context.get("tenant_id")
providers = validated_data.pop("providers", [])
integration = Integration.objects.create(tenant_id=tenant_id, **validated_data)
through_model_instances = [
IntegrationProviderRelationship(
integration=integration,
provider=provider,
tenant_id=tenant_id,
)
for provider in providers
]
IntegrationProviderRelationship.objects.bulk_create(through_model_instances)
return integration
class IntegrationUpdateSerializer(BaseWriteIntegrationSerializer):
credentials = IntegrationCredentialField(write_only=True, required=False)
configuration = IntegrationConfigField(required=False)
providers = serializers.ResourceRelatedField(
queryset=Provider.objects.all(), many=True, required=False
)
class Meta:
model = Integration
fields = [
"inserted_at",
"updated_at",
"enabled",
"connected",
"connection_last_checked_at",
"integration_type",
"configuration",
"credentials",
"providers",
]
extra_kwargs = {
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
"connected": {"read_only": True},
"connection_last_checked_at": {"read_only": True},
"integration_type": {"read_only": True},
}
def validate(self, attrs):
integration_type = self.instance.integration_type
providers = attrs.get("providers")
configuration = attrs.get("configuration") or self.instance.configuration
credentials = attrs.get("credentials") or self.instance.credentials
validated_attrs = super().validate(attrs)
self.validate_integration_data(
integration_type, providers, configuration, credentials
)
return validated_attrs
def update(self, instance, validated_data):
tenant_id = self.context.get("tenant_id")
if validated_data.get("providers") is not None:
instance.providers.clear()
new_relationships = [
IntegrationProviderRelationship(
integration=instance, provider=provider, tenant_id=tenant_id
)
for provider in validated_data["providers"]
]
IntegrationProviderRelationship.objects.bulk_create(new_relationships)
return super().update(instance, validated_data)

View File

@@ -3,32 +3,28 @@ from drf_spectacular.views import SpectacularRedocView
from rest_framework_nested import routers
from api.v1.views import (
ComplianceOverviewViewSet,
CustomTokenObtainView,
CustomTokenRefreshView,
CustomTokenSwitchTenantView,
FindingViewSet,
GithubSocialLoginView,
GoogleSocialLoginView,
IntegrationViewSet,
InvitationAcceptViewSet,
InvitationViewSet,
MembershipViewSet,
OverviewViewSet,
ProviderGroupProvidersRelationshipView,
ProviderGroupViewSet,
ProviderGroupProvidersRelationshipView,
ProviderSecretViewSet,
InvitationViewSet,
InvitationAcceptViewSet,
RoleViewSet,
RoleProviderGroupRelationshipView,
UserRoleRelationshipView,
OverviewViewSet,
ComplianceOverviewViewSet,
ProviderViewSet,
ResourceViewSet,
RoleProviderGroupRelationshipView,
RoleViewSet,
ScanViewSet,
ScheduleViewSet,
SchemaView,
TaskViewSet,
TenantMembersViewSet,
TenantViewSet,
UserRoleRelationshipView,
UserViewSet,
)
@@ -48,7 +44,6 @@ router.register(
)
router.register(r"overviews", OverviewViewSet, basename="overview")
router.register(r"schedules", ScheduleViewSet, basename="schedule")
router.register(r"integrations", IntegrationViewSet, basename="integration")
tenants_router = routers.NestedSimpleRouter(router, r"tenants", lookup="tenant")
tenants_router.register(
@@ -61,7 +56,6 @@ users_router.register(r"memberships", MembershipViewSet, basename="user-membersh
urlpatterns = [
path("tokens", CustomTokenObtainView.as_view(), name="token-obtain"),
path("tokens/refresh", CustomTokenRefreshView.as_view(), name="token-refresh"),
path("tokens/switch", CustomTokenSwitchTenantView.as_view(), name="token-switch"),
path(
"providers/secrets",
ProviderSecretViewSet.as_view({"get": "list", "post": "create"}),
@@ -112,8 +106,6 @@ urlpatterns = [
),
name="provider_group-providers-relationship",
),
path("tokens/google", GoogleSocialLoginView.as_view(), name="token-google"),
path("tokens/github", GithubSocialLoginView.as_view(), name="token-github"),
path("", include(router.urls)),
path("", include(tenants_router.urls)),
path("", include(users_router.urls)),

View File

@@ -1,28 +1,12 @@
import glob
import os
import sentry_sdk
from allauth.socialaccount.providers.github.views import GitHubOAuth2Adapter
from allauth.socialaccount.providers.google.views import GoogleOAuth2Adapter
from botocore.exceptions import ClientError, NoCredentialsError, ParamValidationError
from celery.result import AsyncResult
from config.env import env
from config.settings.social_login import (
GITHUB_OAUTH_CALLBACK_URL,
GOOGLE_OAUTH_CALLBACK_URL,
)
from dj_rest_auth.registration.views import SocialLoginView
from django.conf import settings as django_settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import Count, Exists, F, OuterRef, Prefetch, Q, Subquery, Sum
from django.db.models.functions import Coalesce
from django.http import HttpResponse
from django.db.models import Count, F, OuterRef, Prefetch, Q, Subquery, Sum
from django.urls import reverse
from django.utils.decorators import method_decorator
from django.views.decorators.cache import cache_control
from django_celery_beat.models import PeriodicTask
from drf_spectacular.settings import spectacular_settings
from drf_spectacular.utils import (
OpenApiParameter,
@@ -46,21 +30,19 @@ from rest_framework.permissions import SAFE_METHODS
from rest_framework_json_api.views import RelationshipView, Response
from rest_framework_simplejwt.exceptions import InvalidToken, TokenError
from tasks.beat import schedule_provider_scan
from tasks.jobs.export import get_s3_client
from tasks.tasks import (
check_provider_connection_task,
delete_provider_task,
delete_tenant_task,
perform_scan_summary_task,
perform_scan_task,
)
from api.base_views import BaseRLSViewSet, BaseTenantViewset, BaseUserViewset
from api.compliance import get_compliance_frameworks
from api.db_router import MainRouter
from api.filters import (
ComplianceOverviewFilter,
FindingFilter,
IntegrationFilter,
InvitationFilter,
MembershipFilter,
ProviderFilter,
@@ -78,7 +60,6 @@ from api.filters import (
from api.models import (
ComplianceOverview,
Finding,
Integration,
Invitation,
Membership,
Provider,
@@ -86,13 +67,13 @@ from api.models import (
ProviderGroupMembership,
ProviderSecret,
Resource,
ResourceFindingMapping,
Role,
RoleProviderGroupRelationship,
Scan,
ScanSummary,
SeverityChoices,
StateChoices,
StatusChoices,
Task,
User,
UserRoleRelationship,
@@ -100,17 +81,13 @@ from api.models import (
from api.pagination import ComplianceOverviewPagination
from api.rbac.permissions import Permissions, get_providers, get_role
from api.rls import Tenant
from api.utils import CustomOAuth2Client, validate_invitation
from api.utils import validate_invitation
from api.uuid_utils import datetime_to_uuid7
from api.v1.serializers import (
ComplianceOverviewFullSerializer,
ComplianceOverviewMetadataSerializer,
ComplianceOverviewSerializer,
FindingDynamicFilterSerializer,
FindingMetadataSerializer,
FindingSerializer,
IntegrationCreateSerializer,
IntegrationSerializer,
IntegrationUpdateSerializer,
InvitationAcceptSerializer,
InvitationCreateSerializer,
InvitationSerializer,
@@ -135,9 +112,7 @@ from api.v1.serializers import (
RoleProviderGroupRelationshipSerializer,
RoleSerializer,
RoleUpdateSerializer,
ScanComplianceReportSerializer,
ScanCreateSerializer,
ScanReportSerializer,
ScanSerializer,
ScanUpdateSerializer,
ScheduleDailyCreateSerializer,
@@ -145,8 +120,6 @@ from api.v1.serializers import (
TenantSerializer,
TokenRefreshSerializer,
TokenSerializer,
TokenSocialLoginSerializer,
TokenSwitchTenantSerializer,
UserCreateSerializer,
UserRoleRelationshipSerializer,
UserSerializer,
@@ -213,43 +186,13 @@ class CustomTokenRefreshView(GenericAPIView):
)
@extend_schema(
tags=["Token"],
summary="Switch tenant using a valid tenant ID",
description="Switch tenant by providing a valid tenant ID. The authenticated user must belong to the tenant.",
)
class CustomTokenSwitchTenantView(GenericAPIView):
permission_classes = [permissions.IsAuthenticated]
resource_name = "tokens-switch-tenant"
serializer_class = TokenSwitchTenantSerializer
http_method_names = ["post"]
def post(self, request):
serializer = TokenSwitchTenantSerializer(
data=request.data, context={"request": request}
)
try:
serializer.is_valid(raise_exception=True)
except TokenError as e:
raise InvalidToken(e.args[0])
return Response(
data={
"type": "tokens-switch-tenant",
"attributes": serializer.validated_data,
},
status=status.HTTP_200_OK,
)
@extend_schema(exclude=True)
class SchemaView(SpectacularAPIView):
serializer_class = None
def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.7.0"
spectacular_settings.VERSION = "1.1.1"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)
@@ -305,67 +248,10 @@ class SchemaView(SpectacularAPIView):
"description": "Endpoints for task management, allowing retrieval of task status and "
"revoking tasks that have not started.",
},
{
"name": "Integration",
"description": "Endpoints for managing third-party integrations, including registration, configuration,"
" retrieval, and deletion of integrations such as S3, JIRA, or other services.",
},
]
return super().get(request, *args, **kwargs)
@extend_schema(exclude=True)
class GoogleSocialLoginView(SocialLoginView):
adapter_class = GoogleOAuth2Adapter
client_class = CustomOAuth2Client
callback_url = GOOGLE_OAUTH_CALLBACK_URL
def get_response(self):
original_response = super().get_response()
if self.user and self.user.is_authenticated:
serializer = TokenSocialLoginSerializer(data={"email": self.user.email})
try:
serializer.is_valid(raise_exception=True)
except TokenError as e:
raise InvalidToken(e.args[0])
return Response(
data={
"type": "google-social-tokens",
"attributes": serializer.validated_data,
},
status=status.HTTP_200_OK,
)
return original_response
@extend_schema(exclude=True)
class GithubSocialLoginView(SocialLoginView):
adapter_class = GitHubOAuth2Adapter
client_class = CustomOAuth2Client
callback_url = GITHUB_OAUTH_CALLBACK_URL
def get_response(self):
original_response = super().get_response()
if self.user and self.user.is_authenticated:
serializer = TokenSocialLoginSerializer(data={"email": self.user.email})
try:
serializer.is_valid(raise_exception=True)
except TokenError as e:
raise InvalidToken(e.args[0])
return Response(
data={
"type": "github-social-tokens",
"attributes": serializer.validated_data,
},
status=status.HTTP_200_OK,
)
return original_response
@extend_schema_view(
list=extend_schema(
tags=["User"],
@@ -389,8 +275,8 @@ class GithubSocialLoginView(SocialLoginView):
),
destroy=extend_schema(
tags=["User"],
summary="Delete the user account",
description="Remove the current user account from the system.",
summary="Delete a user account",
description="Remove a user account from the system.",
),
me=extend_schema(
tags=["User"],
@@ -454,12 +340,6 @@ class UserViewSet(BaseUserViewset):
status=status.HTTP_200_OK,
)
def destroy(self, request, *args, **kwargs):
if kwargs["pk"] != str(self.request.user.id):
raise ValidationError("Only the current user can be deleted.")
return super().destroy(request, *args, **kwargs)
@extend_schema(
parameters=[
OpenApiParameter(
@@ -1091,8 +971,6 @@ class ProviderViewSet(BaseRLSViewSet):
provider = get_object_or_404(Provider, pk=pk)
provider.is_deleted = True
provider.save()
task_name = f"scan-perform-scheduled-{pk}"
PeriodicTask.objects.filter(name=task_name).update(enabled=False)
with transaction.atomic():
task = delete_provider_task.delay(
@@ -1140,39 +1018,6 @@ class ProviderViewSet(BaseRLSViewSet):
request=ScanCreateSerializer,
responses={202: OpenApiResponse(response=TaskSerializer)},
),
report=extend_schema(
tags=["Scan"],
summary="Download ZIP report",
description="Returns a ZIP file containing the requested report",
request=ScanReportSerializer,
responses={
200: OpenApiResponse(description="Report obtained successfully"),
202: OpenApiResponse(description="The task is in progress"),
403: OpenApiResponse(description="There is a problem with credentials"),
404: OpenApiResponse(description="The scan has no reports"),
},
),
compliance=extend_schema(
tags=["Scan"],
summary="Retrieve compliance report as CSV",
description="Download a specific compliance report (e.g., 'cis_1.4_aws') as a CSV file.",
parameters=[
OpenApiParameter(
name="name",
type=str,
location=OpenApiParameter.PATH,
required=True,
description="The compliance report name, like 'cis_1.4_aws'",
),
],
responses={
200: OpenApiResponse(
description="CSV file containing the compliance report"
),
404: OpenApiResponse(description="Compliance report not found"),
},
request=None,
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
@@ -1199,7 +1044,7 @@ class ScanViewSet(BaseRLSViewSet):
"""
if self.request.method in SAFE_METHODS:
# No permissions required for GET requests
self.required_permissions = []
self.required_permissions = [Permissions.MANAGE_PROVIDERS]
else:
# Require permission for non-GET requests
self.required_permissions = [Permissions.MANAGE_SCANS]
@@ -1221,14 +1066,6 @@ class ScanViewSet(BaseRLSViewSet):
return ScanCreateSerializer
elif self.action == "partial_update":
return ScanUpdateSerializer
elif self.action == "report":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
return ScanReportSerializer
elif self.action == "compliance":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
return ScanComplianceReportSerializer
return super().get_serializer_class()
def partial_update(self, request, *args, **kwargs):
@@ -1246,210 +1083,6 @@ class ScanViewSet(BaseRLSViewSet):
)
return Response(data=read_serializer.data, status=status.HTTP_200_OK)
def _get_task_status(self, scan_instance):
"""
Returns task status if the scan or its associated report-generation task is still executing.
If the scan is in an EXECUTING state or if a background task related to report generation
is found and also executing, this method returns a 202 Accepted response with the task
metadata and a `Content-Location` header pointing to the task detail endpoint.
Args:
scan_instance (Scan): The scan instance for which the task status is being checked.
Returns:
Response or None:
- A `Response` with HTTP 202 status and serialized task data if the task is executing.
- `None` if no running task is found or if the task has already completed.
"""
task = None
if scan_instance.state == StateChoices.EXECUTING and scan_instance.task:
task = scan_instance.task
else:
try:
task = Task.objects.get(
task_runner_task__task_name="scan-report",
task_runner_task__task_args__contains=str(scan_instance.id),
)
except Task.DoesNotExist:
return None
self.response_serializer_class = TaskSerializer
serializer = self.get_serializer(task)
if serializer.data.get("state") != StateChoices.EXECUTING:
return None
return Response(
data=serializer.data,
status=status.HTTP_202_ACCEPTED,
headers={
"Content-Location": reverse(
"task-detail", kwargs={"pk": serializer.data["id"]}
)
},
)
def _load_file(self, path_pattern, s3=False, bucket=None, list_objects=False):
"""
Loads a binary file (e.g., ZIP or CSV) and returns its content and filename.
Depending on the input parameters, this method supports loading:
- From S3 using a direct key.
- From S3 by listing objects under a prefix and matching suffix.
- From the local filesystem using glob pattern matching.
Args:
path_pattern (str): The key or glob pattern representing the file location.
s3 (bool, optional): Whether the file is stored in S3. Defaults to False.
bucket (str, optional): The name of the S3 bucket, required if `s3=True`. Defaults to None.
list_objects (bool, optional): If True and `s3=True`, list objects by prefix to find the file. Defaults to False.
Returns:
tuple[bytes, str]: A tuple containing the file content as bytes and the filename if successful.
Response: A DRF `Response` object with an appropriate status and error detail if an error occurs.
"""
if s3:
try:
client = get_s3_client()
except (ClientError, NoCredentialsError, ParamValidationError):
return Response(
{"detail": "There is a problem with credentials."},
status=status.HTTP_403_FORBIDDEN,
)
if list_objects:
# list keys under prefix then match suffix
prefix = os.path.dirname(path_pattern)
suffix = os.path.basename(path_pattern)
try:
resp = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
except ClientError as e:
sentry_sdk.capture_exception(e)
return Response(
{
"detail": "Unable to list compliance files in S3: encountered an AWS error."
},
status=status.HTTP_502_BAD_GATEWAY,
)
contents = resp.get("Contents", [])
keys = [obj["Key"] for obj in contents if obj["Key"].endswith(suffix)]
if not keys:
return Response(
{
"detail": f"No compliance file found for name '{os.path.splitext(suffix)[0]}'."
},
status=status.HTTP_404_NOT_FOUND,
)
# path_pattern here is prefix, but in compliance we build correct suffix check before
key = keys[0]
else:
# path_pattern is exact key
key = path_pattern
try:
s3_obj = client.get_object(Bucket=bucket, Key=key)
except ClientError as e:
code = e.response.get("Error", {}).get("Code")
if code == "NoSuchKey":
return Response(
{"detail": "The scan has no reports."},
status=status.HTTP_404_NOT_FOUND,
)
return Response(
{"detail": "There is a problem with credentials."},
status=status.HTTP_403_FORBIDDEN,
)
content = s3_obj["Body"].read()
filename = os.path.basename(key)
else:
files = glob.glob(path_pattern)
if not files:
return Response(
{"detail": "The scan has no reports."},
status=status.HTTP_404_NOT_FOUND,
)
filepath = files[0]
with open(filepath, "rb") as f:
content = f.read()
filename = os.path.basename(filepath)
return content, filename
def _serve_file(self, content, filename, content_type):
response = HttpResponse(content, content_type=content_type)
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response
@action(detail=True, methods=["get"], url_name="report")
def report(self, request, pk=None):
scan = self.get_object()
# Check for executing tasks
running_resp = self._get_task_status(scan)
if running_resp:
return running_resp
if not scan.output_location:
return Response(
{"detail": "The scan has no reports."}, status=status.HTTP_404_NOT_FOUND
)
if scan.output_location.startswith("s3://"):
bucket = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
key_prefix = scan.output_location.removeprefix(f"s3://{bucket}/")
loader = self._load_file(
key_prefix, s3=True, bucket=bucket, list_objects=False
)
else:
loader = self._load_file(scan.output_location, s3=False)
if isinstance(loader, Response):
return loader
content, filename = loader
return self._serve_file(content, filename, "application/x-zip-compressed")
@action(
detail=True,
methods=["get"],
url_path="compliance/(?P<name>[^/]+)",
url_name="compliance",
)
def compliance(self, request, pk=None, name=None):
scan = self.get_object()
if name not in get_compliance_frameworks(scan.provider.provider):
return Response(
{"detail": f"Compliance '{name}' not found."},
status=status.HTTP_404_NOT_FOUND,
)
running_resp = self._get_task_status(scan)
if running_resp:
return running_resp
if not scan.output_location:
return Response(
{"detail": "The scan has no reports."}, status=status.HTTP_404_NOT_FOUND
)
if scan.output_location.startswith("s3://"):
bucket = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
key_prefix = scan.output_location.removeprefix(f"s3://{bucket}/")
prefix = os.path.join(
os.path.dirname(key_prefix), "compliance", f"{name}.csv"
)
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
else:
base = os.path.dirname(scan.output_location)
pattern = os.path.join(base, "compliance", f"*_{name}.csv")
loader = self._load_file(pattern, s3=False)
if isinstance(loader, Response):
return loader
content, filename = loader
return self._serve_file(content, filename, "text/csv")
def create(self, request, *args, **kwargs):
input_serializer = self.get_serializer(data=request.data)
input_serializer.is_valid(raise_exception=True)
@@ -1464,6 +1097,10 @@ class ScanViewSet(BaseRLSViewSet):
# Disabled for now
# checks_to_execute=scan.scanner_args.get("checks_to_execute"),
},
link=perform_scan_summary_task.si(
tenant_id=self.request.tenant_id,
scan_id=str(scan.id),
),
)
scan.task_id = task.id
@@ -1627,14 +1264,6 @@ class ResourceViewSet(BaseRLSViewSet):
tags=["Finding"],
summary="List all findings",
description="Retrieve a list of all findings with options for filtering by various criteria.",
parameters=[
OpenApiParameter(
name="filter[inserted_at]",
description="At least one of the variations of the `filter[inserted_at]` filter must be provided.",
required=True,
type=OpenApiTypes.DATE,
)
],
),
retrieve=extend_schema(
tags=["Finding"],
@@ -1645,51 +1274,33 @@ class ResourceViewSet(BaseRLSViewSet):
tags=["Finding"],
summary="Retrieve the services and regions that are impacted by findings",
description="Fetch services and regions affected in findings.",
filters=True,
deprecated=True,
),
metadata=extend_schema(
tags=["Finding"],
summary="Retrieve metadata values from findings",
description="Fetch unique metadata values from a set of findings. This is useful for dynamic filtering.",
parameters=[
OpenApiParameter(
name="filter[inserted_at]",
description="At least one of the variations of the `filter[inserted_at]` filter must be provided.",
required=True,
type=OpenApiTypes.DATE,
)
],
responses={201: OpenApiResponse(response=MembershipSerializer)},
filters=True,
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
class FindingViewSet(BaseRLSViewSet):
queryset = Finding.all_objects.all()
queryset = Finding.objects.all()
serializer_class = FindingSerializer
filterset_class = FindingFilter
prefetch_for_includes = {
"__all__": [],
"resources": [
Prefetch("resources", queryset=Resource.objects.select_related("findings"))
],
"scan": [Prefetch("scan", queryset=Scan.objects.select_related("findings"))],
}
http_method_names = ["get"]
ordering = ["-inserted_at"]
filterset_class = FindingFilter
ordering = ["-id"]
ordering_fields = [
"id",
"status",
"severity",
"check_id",
"inserted_at",
"updated_at",
]
prefetch_for_includes = {
"__all__": [],
"resources": [
Prefetch(
"resources",
queryset=Resource.all_objects.prefetch_related("tags", "findings"),
)
],
"scan": [
Prefetch("scan", queryset=Scan.all_objects.select_related("findings"))
],
}
# RBAC required permissions (implicit -> MANAGE_PROVIDERS enable unlimited visibility or check the visibility of
# the provider through the provider group)
required_permissions = []
@@ -1697,65 +1308,52 @@ class FindingViewSet(BaseRLSViewSet):
def get_serializer_class(self):
if self.action == "findings_services_regions":
return FindingDynamicFilterSerializer
elif self.action == "metadata":
return FindingMetadataSerializer
return super().get_serializer_class()
def get_queryset(self):
tenant_id = self.request.tenant_id
user_roles = get_role(self.request.user)
if user_roles.unlimited_visibility:
# User has unlimited visibility, return all findings
queryset = Finding.all_objects.filter(tenant_id=tenant_id)
# User has unlimited visibility, return all scans
queryset = Finding.objects.filter(tenant_id=self.request.tenant_id)
else:
# User lacks permission, filter findings based on provider groups associated with the role
queryset = Finding.all_objects.filter(
# User lacks permission, filter providers based on provider groups associated with the role
queryset = Finding.objects.filter(
scan__provider__in=get_providers(user_roles)
)
search_value = self.request.query_params.get("filter[search]", None)
if search_value:
# Django's ORM will build a LEFT JOIN and OUTER JOIN on any "through" tables, resulting in duplicates
# The duplicates then require a `distinct` query
search_query = SearchQuery(
search_value, config="simple", search_type="plain"
)
resource_match = Resource.all_objects.filter(
text_search=search_query,
id__in=ResourceFindingMapping.objects.filter(
resource_id=OuterRef("pk"),
tenant_id=tenant_id,
).values("resource_id"),
)
queryset = queryset.filter(
Q(text_search=search_query) | Q(Exists(resource_match))
)
Q(impact_extended__contains=search_value)
| Q(status_extended__contains=search_value)
| Q(check_id=search_value)
| Q(check_id__icontains=search_value)
| Q(text_search=search_query)
| Q(resources__uid=search_value)
| Q(resources__name=search_value)
| Q(resources__region=search_value)
| Q(resources__service=search_value)
| Q(resources__type=search_value)
| Q(resources__uid__contains=search_value)
| Q(resources__name__contains=search_value)
| Q(resources__region__contains=search_value)
| Q(resources__service__contains=search_value)
| Q(resources__tags__text_search=search_query)
| Q(resources__text_search=search_query)
).distinct()
return queryset
def filter_queryset(self, queryset):
# Do not apply filters when retrieving specific finding
if self.action == "retrieve":
return queryset
return super().filter_queryset(queryset)
def list(self, request, *args, **kwargs):
base_qs = self.filter_queryset(self.get_queryset())
paginated_ids = self.paginate_queryset(base_qs.values_list("id", flat=True))
if paginated_ids is not None:
ids = list(paginated_ids)
findings = (
Finding.all_objects.filter(tenant_id=self.request.tenant_id, id__in=ids)
.select_related("scan")
.prefetch_related("resources")
)
# Re-sort in Python to preserve ordering:
findings = sorted(findings, key=lambda x: ids.index(x.id))
serializer = self.get_serializer(findings, many=True)
return self.get_paginated_response(serializer.data)
serializer = self.get_serializer(base_qs, many=True)
return Response(serializer.data)
def inserted_at_to_uuidv7(self, inserted_at):
if inserted_at is None:
return None
return datetime_to_uuid7(inserted_at)
@action(detail=False, methods=["get"], url_name="findings_services_regions")
def findings_services_regions(self, request):
@@ -1778,38 +1376,6 @@ class FindingViewSet(BaseRLSViewSet):
return Response(data=serializer.data, status=status.HTTP_200_OK)
@action(detail=False, methods=["get"], url_name="metadata")
def metadata(self, request):
tenant_id = self.request.tenant_id
queryset = self.get_queryset()
filtered_queryset = self.filter_queryset(queryset)
filtered_ids = filtered_queryset.order_by().values("id")
relevant_resources = Resource.all_objects.filter(
tenant_id=tenant_id, findings__id__in=Subquery(filtered_ids)
).only("service", "region", "type")
aggregation = relevant_resources.aggregate(
services=ArrayAgg("service", flat=True),
regions=ArrayAgg("region", flat=True),
resource_types=ArrayAgg("type", flat=True),
)
services = sorted(set(aggregation["services"] or []))
regions = sorted({region for region in aggregation["regions"] or [] if region})
resource_types = sorted(set(aggregation["resource_types"] or []))
result = {
"services": services,
"regions": regions,
"resource_types": resource_types,
}
serializer = self.get_serializer(data=result)
serializer.is_valid(raise_exception=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@extend_schema_view(
list=extend_schema(
@@ -2195,21 +1761,6 @@ class RoleProviderGroupRelationshipView(RelationshipView, BaseRLSViewSet):
description="Fetch detailed information about a specific compliance overview by its ID, including detailed "
"requirement information and check's status.",
),
metadata=extend_schema(
tags=["Compliance Overview"],
summary="Retrieve metadata values from compliance overviews",
description="Fetch unique metadata values from a set of compliance overviews. This is useful for dynamic "
"filtering.",
parameters=[
OpenApiParameter(
name="filter[scan_id]",
required=True,
type=OpenApiTypes.UUID,
location=OpenApiParameter.QUERY,
description="Related scan ID.",
),
],
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
@@ -2271,8 +1822,6 @@ class ComplianceOverviewViewSet(BaseRLSViewSet):
def get_serializer_class(self):
if self.action == "retrieve":
return ComplianceOverviewFullSerializer
elif self.action == "metadata":
return ComplianceOverviewMetadataSerializer
return super().get_serializer_class()
def list(self, request, *args, **kwargs):
@@ -2289,35 +1838,6 @@ class ComplianceOverviewViewSet(BaseRLSViewSet):
)
return super().list(request, *args, **kwargs)
@action(detail=False, methods=["get"], url_name="metadata")
def metadata(self, request):
scan_id = request.query_params.get("filter[scan_id]")
if not scan_id:
raise ValidationError(
[
{
"detail": "This query parameter is required.",
"status": 400,
"source": {"pointer": "filter[scan_id]"},
"code": "required",
}
]
)
tenant_id = self.request.tenant_id
regions = list(
ComplianceOverview.objects.filter(tenant_id=tenant_id, scan_id=scan_id)
.values_list("region", flat=True)
.order_by("region")
.distinct()
)
result = {"regions": regions}
serializer = self.get_serializer(data=result)
serializer.is_valid(raise_exception=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@extend_schema(tags=["Overview"])
@extend_schema_view(
@@ -2418,53 +1938,68 @@ class OverviewViewSet(BaseRLSViewSet):
@action(detail=False, methods=["get"], url_name="providers")
def providers(self, request):
tenant_id = self.request.tenant_id
latest_scan_ids = (
Scan.objects.filter(
# Subquery to get the most recent finding for each uid
latest_finding_ids = (
Finding.objects.filter(
tenant_id=tenant_id,
state=StateChoices.COMPLETED,
uid=OuterRef("uid"),
scan__provider=OuterRef("scan__provider"),
)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
.order_by("-id") # Most recent by id
.values("id")[:1]
)
# Filter findings to only include the most recent for each uid
recent_findings = Finding.objects.filter(
tenant_id=tenant_id, id__in=Subquery(latest_finding_ids)
)
# Aggregate findings by provider
findings_aggregated = (
ScanSummary.objects.filter(tenant_id=tenant_id, scan_id__in=latest_scan_ids)
.values("scan__provider__provider")
recent_findings.values("scan__provider__provider")
.annotate(
findings_passed=Coalesce(Sum("_pass"), 0),
findings_failed=Coalesce(Sum("fail"), 0),
findings_muted=Coalesce(Sum("muted"), 0),
total_findings=Coalesce(Sum("total"), 0),
findings_passed=Count("id", filter=Q(status=StatusChoices.PASS.value)),
findings_failed=Count("id", filter=Q(status=StatusChoices.FAIL.value)),
findings_manual=Count(
"id", filter=Q(status=StatusChoices.MANUAL.value)
),
total_findings=Count("id"),
)
.order_by("-findings_failed")
)
# Aggregate total resources by provider
resources_aggregated = (
Resource.objects.filter(tenant_id=tenant_id)
.values("provider__provider")
.annotate(total_resources=Count("id"))
)
resources_dict = {
row["provider__provider"]: row["total_resources"]
for row in resources_aggregated
}
# Combine findings and resources data
overview = []
for row in findings_aggregated:
provider_type = row["scan__provider__provider"]
for findings in findings_aggregated:
provider = findings["scan__provider__provider"]
total_resources = next(
(
res["total_resources"]
for res in resources_aggregated
if res["provider__provider"] == provider
),
0,
)
overview.append(
{
"provider": provider_type,
"total_resources": resources_dict.get(provider_type, 0),
"total_findings": row["total_findings"],
"findings_passed": row["findings_passed"],
"findings_failed": row["findings_failed"],
"findings_muted": row["findings_muted"],
"provider": provider,
"total_resources": total_resources,
"total_findings": findings["total_findings"],
"findings_passed": findings["findings_passed"],
"findings_failed": findings["findings_failed"],
"findings_manual": findings["findings_manual"],
}
)
serializer = OverviewProviderSerializer(overview, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@action(detail=False, methods=["get"], url_name="findings")
@@ -2479,7 +2014,7 @@ class OverviewViewSet(BaseRLSViewSet):
state=StateChoices.COMPLETED,
provider_id=OuterRef("scan__provider_id"),
)
.order_by("-inserted_at")
.order_by("-id")
.values("id")[:1]
)
@@ -2524,7 +2059,7 @@ class OverviewViewSet(BaseRLSViewSet):
state=StateChoices.COMPLETED,
provider_id=OuterRef("scan__provider_id"),
)
.order_by("-inserted_at")
.order_by("-id")
.values("id")[:1]
)
@@ -2560,7 +2095,7 @@ class OverviewViewSet(BaseRLSViewSet):
state=StateChoices.COMPLETED,
provider_id=OuterRef("scan__provider_id"),
)
.order_by("-inserted_at")
.order_by("-id")
.values("id")[:1]
)
@@ -2638,67 +2173,3 @@ class ScheduleViewSet(BaseRLSViewSet):
)
},
)
@extend_schema_view(
list=extend_schema(
tags=["Integration"],
summary="List all integrations",
description="Retrieve a list of all configured integrations with options for filtering by various criteria.",
),
retrieve=extend_schema(
tags=["Integration"],
summary="Retrieve integration details",
description="Fetch detailed information about a specific integration by its ID.",
),
create=extend_schema(
tags=["Integration"],
summary="Create a new integration",
description="Register a new integration with the system, providing necessary configuration details.",
),
partial_update=extend_schema(
tags=["Integration"],
summary="Partially update an integration",
description="Modify certain fields of an existing integration without affecting other settings.",
),
destroy=extend_schema(
tags=["Integration"],
summary="Delete an integration",
description="Remove an integration from the system by its ID.",
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
class IntegrationViewSet(BaseRLSViewSet):
queryset = Integration.objects.all()
serializer_class = IntegrationSerializer
http_method_names = ["get", "post", "patch", "delete"]
filterset_class = IntegrationFilter
ordering = ["integration_type", "-inserted_at"]
# RBAC required permissions
required_permissions = [Permissions.MANAGE_INTEGRATIONS]
allowed_providers = None
def get_queryset(self):
user_roles = get_role(self.request.user)
if user_roles.unlimited_visibility:
# User has unlimited visibility, return all integrations
queryset = Integration.objects.filter(tenant_id=self.request.tenant_id)
else:
# User lacks permission, filter providers based on provider groups associated with the role
allowed_providers = get_providers(user_roles)
queryset = Integration.objects.filter(providers__in=allowed_providers)
self.allowed_providers = allowed_providers
return queryset
def get_serializer_class(self):
if self.action == "create":
return IntegrationCreateSerializer
elif self.action == "partial_update":
return IntegrationUpdateSerializer
return super().get_serializer_class()
def get_serializer_context(self):
context = super().get_serializer_context()
context["allowed_providers"] = self.allowed_providers
return context

View File

@@ -50,9 +50,9 @@ class RLSTask(Task):
tenant_id = kwargs.get("tenant_id")
with rls_transaction(tenant_id):
APITask.objects.update_or_create(
APITask.objects.create(
id=task_result_instance.task_id,
tenant_id=tenant_id,
defaults={"task_runner_task": task_result_instance},
task_runner_task=task_result_instance,
)
return result

View File

@@ -2,9 +2,10 @@ import json
import logging
from enum import StrEnum
from config.env import env
from django_guid.log_filters import CorrelationId
from config.env import env
class BackendLogger(StrEnum):
GUNICORN = "gunicorn"
@@ -38,9 +39,9 @@ class NDJSONFormatter(logging.Formatter):
"funcName": record.funcName,
"process": record.process,
"thread": record.thread,
"transaction_id": (
record.transaction_id if hasattr(record, "transaction_id") else None
),
"transaction_id": record.transaction_id
if hasattr(record, "transaction_id")
else None,
}
# Add REST API extra fields

View File

@@ -4,8 +4,6 @@ from config.custom_logging import LOGGING # noqa
from config.env import BASE_DIR, env # noqa
from config.settings.celery import * # noqa
from config.settings.partitions import * # noqa
from config.settings.sentry import * # noqa
from config.settings.social_login import * # noqa
SECRET_KEY = env("SECRET_KEY", default="secret")
DEBUG = env.bool("DJANGO_DEBUG", default=False)
@@ -31,13 +29,6 @@ INSTALLED_APPS = [
"django_celery_results",
"django_celery_beat",
"rest_framework_simplejwt.token_blacklist",
"allauth",
"allauth.account",
"allauth.socialaccount",
"allauth.socialaccount.providers.google",
"allauth.socialaccount.providers.github",
"dj_rest_auth.registration",
"rest_framework.authtoken",
]
MIDDLEWARE = [
@@ -51,11 +42,8 @@ MIDDLEWARE = [
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"api.middleware.APILoggingMiddleware",
"allauth.account.middleware.AccountMiddleware",
]
SITE_ID = 1
CORS_ALLOWED_ORIGINS = ["http://localhost", "http://127.0.0.1"]
ROOT_URLCONF = "config.urls"
@@ -219,27 +207,3 @@ CACHE_STALE_WHILE_REVALIDATE = env.int("DJANGO_STALE_WHILE_REVALIDATE", 60)
TESTING = False
FINDINGS_MAX_DAYS_IN_RANGE = env.int("DJANGO_FINDINGS_MAX_DAYS_IN_RANGE", 7)
# API export settings
DJANGO_TMP_OUTPUT_DIRECTORY = env.str(
"DJANGO_TMP_OUTPUT_DIRECTORY", "/tmp/prowler_api_output"
)
DJANGO_FINDINGS_BATCH_SIZE = env.str("DJANGO_FINDINGS_BATCH_SIZE", 1000)
DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID = env.str("DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID", "")
DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY = env.str(
"DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY", ""
)
DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN = env.str("DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN", "")
DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION = env.str("DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION", "")
# HTTP Security Headers
SECURE_CONTENT_TYPE_NOSNIFF = True
X_FRAME_OPTIONS = "DENY"
SECURE_REFERRER_POLICY = "strict-origin-when-cross-origin"
DJANGO_DELETION_BATCH_SIZE = env.int("DJANGO_DELETION_BATCH_SIZE", 5000)

View File

@@ -1,6 +1,7 @@
from config.django.base import * # noqa
from config.env import env
DEBUG = env.bool("DJANGO_DEBUG", default=True)
ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["*"])

View File

@@ -1,6 +1,7 @@
from config.django.base import * # noqa
from config.env import env
DEBUG = env.bool("DJANGO_DEBUG", default=False)
ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["localhost", "127.0.0.1"])

View File

@@ -1,6 +1,7 @@
from config.django.base import * # noqa
from config.env import env
DEBUG = env.bool("DJANGO_DEBUG", default=False)
ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["localhost", "127.0.0.1"])

View File

@@ -1,102 +0,0 @@
import sentry_sdk
from config.env import env
IGNORED_EXCEPTIONS = [
# Provider is not connected due to credentials errors
"is not connected",
# Authentication Errors from AWS
"InvalidToken",
"AccessDeniedException",
"AuthorizationErrorException",
"UnrecognizedClientException",
"UnauthorizedOperation",
"AuthFailure",
"InvalidClientTokenId",
"AWSInvalidProviderIdError",
"InternalServerErrorException",
"AccessDenied",
"No Shodan API Key", # Shodan Check
"RequestLimitExceeded", # For now we don't want to log the RequestLimitExceeded errors
"ThrottlingException",
"Rate exceeded",
"SubscriptionRequiredException",
"UnknownOperationException",
"OptInRequired",
"ReadTimeout",
"LimitExceeded",
"ConnectTimeoutError",
"ExpiredToken",
"IncompleteSignature",
"RegionDisabledException",
"TooManyRequestsException",
"SignatureDoesNotMatch",
"InvalidParameterValueException",
"InvalidInputException",
"ValidationException",
"AWSSecretAccessKeyInvalidError",
"InvalidAction",
"InvalidRequestException",
"RequestExpired",
"ConnectionClosedError",
"MaxRetryError",
"AWSAccessKeyIDInvalidError",
"AWSSessionTokenExpiredError",
"EndpointConnectionError", # AWS Service is not available in a region
"Pool is closed", # The following comes from urllib3: eu-west-1 -- HTTPClientError[126]: An HTTP Client raised an unhandled exception: AWSHTTPSConnectionPool(host='hostname.s3.eu-west-1.amazonaws.com', port=443): Pool is closed.
# Authentication Errors from GCP
"ClientAuthenticationError",
"AuthorizationFailed",
"Reauthentication is needed",
"Permission denied to get service",
"API has not been used in project",
"HttpError 404 when requesting",
"HttpError 403 when requesting",
"HttpError 400 when requesting",
"GCPNoAccesibleProjectsError",
# Authentication Errors from Azure
"ClientAuthenticationError",
"AuthorizationFailed",
"Subscription Not Registered",
"AzureNotValidClientIdError",
"AzureNotValidClientSecretError",
"AzureNotValidTenantIdError",
"AzureInvalidProviderIdError",
"AzureTenantIdAndClientSecretNotBelongingToClientIdError",
"AzureTenantIdAndClientIdNotBelongingToClientSecretError",
"AzureClientIdAndClientSecretNotBelongingToTenantIdError",
"AzureHTTPResponseError",
"Error with credentials provided",
]
def before_send(event, hint):
"""
before_send handles the Sentry events in order to sent them or not
"""
# Ignore logs with the ignored_exceptions
# https://docs.python.org/3/library/logging.html#logrecord-objects
if "log_record" in hint:
log_msg = hint["log_record"].msg
log_lvl = hint["log_record"].levelno
# Handle Error events and discard the rest
if log_lvl == 40 and any(ignored in log_msg for ignored in IGNORED_EXCEPTIONS):
return
return event
sentry_sdk.init(
dsn=env.str("DJANGO_SENTRY_DSN", ""),
# Add data like request headers and IP for users,
# see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info
before_send=before_send,
send_default_pii=True,
_experiments={
# Set continuous_profiling_auto_start to True
# to automatically start the profiler on when
# possible.
"continuous_profiling_auto_start": True,
},
attach_stacktrace=True,
ignore_errors=IGNORED_EXCEPTIONS,
)

View File

@@ -1,53 +0,0 @@
from config.env import env
# Provider Oauth settings
GOOGLE_OAUTH_CLIENT_ID = env("SOCIAL_GOOGLE_OAUTH_CLIENT_ID", default="")
GOOGLE_OAUTH_CLIENT_SECRET = env("SOCIAL_GOOGLE_OAUTH_CLIENT_SECRET", default="")
GOOGLE_OAUTH_CALLBACK_URL = env("SOCIAL_GOOGLE_OAUTH_CALLBACK_URL", default="")
GITHUB_OAUTH_CLIENT_ID = env("SOCIAL_GITHUB_OAUTH_CLIENT_ID", default="")
GITHUB_OAUTH_CLIENT_SECRET = env("SOCIAL_GITHUB_OAUTH_CLIENT_SECRET", default="")
GITHUB_OAUTH_CALLBACK_URL = env("SOCIAL_GITHUB_OAUTH_CALLBACK_URL", default="")
# Allauth settings
ACCOUNT_LOGIN_METHODS = {"email"} # Use Email / Password authentication
ACCOUNT_USERNAME_REQUIRED = False
ACCOUNT_EMAIL_REQUIRED = True
ACCOUNT_EMAIL_VERIFICATION = "none" # Do not require email confirmation
ACCOUNT_USER_MODEL_USERNAME_FIELD = None
REST_AUTH = {
"TOKEN_MODEL": None,
"REST_USE_JWT": True,
}
# django-allauth (social)
# Authenticate if local account with this email address already exists
SOCIALACCOUNT_EMAIL_AUTHENTICATION = True
# Connect local account and social account if local account with that email address already exists
SOCIALACCOUNT_EMAIL_AUTHENTICATION_AUTO_CONNECT = True
SOCIALACCOUNT_ADAPTER = "api.adapters.ProwlerSocialAccountAdapter"
SOCIALACCOUNT_PROVIDERS = {
"google": {
"APP": {
"client_id": GOOGLE_OAUTH_CLIENT_ID,
"secret": GOOGLE_OAUTH_CLIENT_SECRET,
"key": "",
},
"SCOPE": [
"email",
"profile",
],
"AUTH_PARAMS": {
"access_type": "online",
},
},
"github": {
"APP": {
"client_id": GITHUB_OAUTH_CLIENT_ID,
"secret": GITHUB_OAUTH_CLIENT_SECRET,
},
"SCOPE": [
"user",
"read:org",
],
},
}

View File

@@ -15,8 +15,6 @@ from api.db_utils import rls_transaction
from api.models import (
ComplianceOverview,
Finding,
Integration,
IntegrationProviderRelationship,
Invitation,
Membership,
Provider,
@@ -488,7 +486,7 @@ def scans_fixture(tenants_fixture, providers_fixture):
name="Scan 1",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
state=StateChoices.AVAILABLE,
tenant_id=tenant.id,
started_at="2024-01-02T00:00:00Z",
)
@@ -628,7 +626,6 @@ def findings_fixture(scans_fixture, resources_fixture):
"CheckId": "test_check_id",
"Description": "test description apple sauce",
},
first_seen_at="2024-01-02T00:00:00Z",
)
finding1.add_resources([resource1])
@@ -654,8 +651,6 @@ def findings_fixture(scans_fixture, resources_fixture):
"CheckId": "test_check_id",
"Description": "test description orange juice",
},
first_seen_at="2024-01-02T00:00:00Z",
muted=True,
)
finding2.add_resources([resource2])
@@ -880,46 +875,6 @@ def scan_summaries_fixture(tenants_fixture, providers_fixture):
)
@pytest.fixture
def integrations_fixture(providers_fixture):
provider1, provider2, *_ = providers_fixture
tenant_id = provider1.tenant_id
integration1 = Integration.objects.create(
tenant_id=tenant_id,
enabled=True,
connected=True,
integration_type="amazon_s3",
configuration={"key": "value"},
credentials={"psswd": "1234"},
)
IntegrationProviderRelationship.objects.create(
tenant_id=tenant_id,
integration=integration1,
provider=provider1,
)
integration2 = Integration.objects.create(
tenant_id=tenant_id,
enabled=True,
connected=True,
integration_type="amazon_s3",
configuration={"key": "value"},
credentials={"psswd": "1234"},
)
IntegrationProviderRelationship.objects.create(
tenant_id=tenant_id,
integration=integration2,
provider=provider1,
)
IntegrationProviderRelationship.objects.create(
tenant_id=tenant_id,
integration=integration2,
provider=provider2,
)
return integration1, integration2
def get_authorization_header(access_token: str) -> dict:
return {"Authorization": f"Bearer {access_token}"}

View File

@@ -5,14 +5,10 @@ from django_celery_beat.models import IntervalSchedule, PeriodicTask
from rest_framework_json_api.serializers import ValidationError
from tasks.tasks import perform_scheduled_scan_task
from api.db_utils import rls_transaction
from api.models import Provider, Scan, StateChoices
from api.models import Provider
def schedule_provider_scan(provider_instance: Provider):
tenant_id = str(provider_instance.tenant_id)
provider_id = str(provider_instance.id)
schedule, _ = IntervalSchedule.objects.get_or_create(
every=24,
period=IntervalSchedule.HOURS,
@@ -21,9 +17,23 @@ def schedule_provider_scan(provider_instance: Provider):
# Create a unique name for the periodic task
task_name = f"scan-perform-scheduled-{provider_instance.id}"
if PeriodicTask.objects.filter(
interval=schedule, name=task_name, task="scan-perform-scheduled"
).exists():
# Schedule the task
_, created = PeriodicTask.objects.get_or_create(
interval=schedule,
name=task_name,
task="scan-perform-scheduled",
kwargs=json.dumps(
{
"tenant_id": str(provider_instance.tenant_id),
"provider_id": str(provider_instance.id),
}
),
one_off=False,
defaults={
"start_time": datetime.now(timezone.utc) + timedelta(hours=24),
},
)
if not created:
raise ValidationError(
[
{
@@ -35,36 +45,9 @@ def schedule_provider_scan(provider_instance: Provider):
]
)
with rls_transaction(tenant_id):
scheduled_scan = Scan.objects.create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.AVAILABLE,
scheduled_at=datetime.now(timezone.utc),
)
# Schedule the task
periodic_task_instance = PeriodicTask.objects.create(
interval=schedule,
name=task_name,
task="scan-perform-scheduled",
kwargs=json.dumps(
{
"tenant_id": tenant_id,
"provider_id": provider_id,
}
),
one_off=False,
start_time=datetime.now(timezone.utc) + timedelta(hours=24),
)
scheduled_scan.scheduler_task_id = periodic_task_instance.id
scheduled_scan.save()
return perform_scheduled_scan_task.apply_async(
kwargs={
"tenant_id": str(provider_instance.tenant_id),
"provider_id": provider_id,
"provider_id": str(provider_instance.id),
},
)

View File

@@ -1,5 +1,5 @@
from celery.utils.log import get_task_logger
from django.db import DatabaseError
from django.db import transaction
from api.db_router import MainRouter
from api.db_utils import batch_delete, rls_transaction
@@ -8,12 +8,11 @@ from api.models import Finding, Provider, Resource, Scan, ScanSummary, Tenant
logger = get_task_logger(__name__)
def delete_provider(tenant_id: str, pk: str):
def delete_provider(pk: str):
"""
Gracefully deletes an instance of a provider along with its related data.
Args:
tenant_id (str): Tenant ID the resources belong to.
pk (str): The primary key of the Provider instance to delete.
Returns:
@@ -23,31 +22,33 @@ def delete_provider(tenant_id: str, pk: str):
Raises:
Provider.DoesNotExist: If no instance with the provided primary key exists.
"""
with rls_transaction(tenant_id):
instance = Provider.all_objects.get(pk=pk)
deletion_summary = {}
deletion_steps = [
("Scan Summaries", ScanSummary.all_objects.filter(scan__provider=instance)),
("Findings", Finding.all_objects.filter(scan__provider=instance)),
("Resources", Resource.all_objects.filter(provider=instance)),
("Scans", Scan.all_objects.filter(provider=instance)),
]
instance = Provider.all_objects.get(pk=pk)
deletion_summary = {}
for step_name, queryset in deletion_steps:
try:
_, step_summary = batch_delete(tenant_id, queryset)
deletion_summary.update(step_summary)
except DatabaseError as db_error:
logger.error(f"Error deleting {step_name}: {db_error}")
raise
with transaction.atomic():
# Delete Scan Summaries
scan_summaries_qs = ScanSummary.all_objects.filter(scan__provider=instance)
_, scans_summ_summary = batch_delete(scan_summaries_qs)
deletion_summary.update(scans_summ_summary)
try:
with rls_transaction(tenant_id):
_, provider_summary = instance.delete()
# Delete Findings
findings_qs = Finding.all_objects.filter(scan__provider=instance)
_, findings_summary = batch_delete(findings_qs)
deletion_summary.update(findings_summary)
# Delete Resources
resources_qs = Resource.all_objects.filter(provider=instance)
_, resources_summary = batch_delete(resources_qs)
deletion_summary.update(resources_summary)
# Delete Scans
scans_qs = Scan.all_objects.filter(provider=instance)
_, scans_summary = batch_delete(scans_qs)
deletion_summary.update(scans_summary)
provider_deleted_count, provider_summary = instance.delete()
deletion_summary.update(provider_summary)
except DatabaseError as db_error:
logger.error(f"Error deleting Provider: {db_error}")
raise
return deletion_summary
@@ -65,8 +66,9 @@ def delete_tenant(pk: str):
deletion_summary = {}
for provider in Provider.objects.using(MainRouter.admin_db).filter(tenant_id=pk):
summary = delete_provider(pk, provider.id)
deletion_summary.update(summary)
with rls_transaction(pk):
summary = delete_provider(provider.id)
deletion_summary.update(summary)
Tenant.objects.using(MainRouter.admin_db).filter(id=pk).delete()

View File

@@ -1,249 +0,0 @@
import os
import zipfile
import boto3
import config.django.base as base
from botocore.exceptions import ClientError, NoCredentialsError, ParamValidationError
from celery.utils.log import get_task_logger
from django.conf import settings
from prowler.config.config import (
csv_file_suffix,
html_file_suffix,
json_ocsf_file_suffix,
output_file_timestamp,
)
from prowler.lib.outputs.compliance.aws_well_architected.aws_well_architected import (
AWSWellArchitected,
)
from prowler.lib.outputs.compliance.cis.cis_aws import AWSCIS
from prowler.lib.outputs.compliance.cis.cis_azure import AzureCIS
from prowler.lib.outputs.compliance.cis.cis_gcp import GCPCIS
from prowler.lib.outputs.compliance.cis.cis_kubernetes import KubernetesCIS
from prowler.lib.outputs.compliance.cis.cis_m365 import M365CIS
from prowler.lib.outputs.compliance.ens.ens_aws import AWSENS
from prowler.lib.outputs.compliance.ens.ens_azure import AzureENS
from prowler.lib.outputs.compliance.ens.ens_gcp import GCPENS
from prowler.lib.outputs.compliance.iso27001.iso27001_aws import AWSISO27001
from prowler.lib.outputs.compliance.iso27001.iso27001_azure import AzureISO27001
from prowler.lib.outputs.compliance.iso27001.iso27001_gcp import GCPISO27001
from prowler.lib.outputs.compliance.iso27001.iso27001_kubernetes import (
KubernetesISO27001,
)
from prowler.lib.outputs.compliance.kisa_ismsp.kisa_ismsp_aws import AWSKISAISMSP
from prowler.lib.outputs.compliance.mitre_attack.mitre_attack_aws import AWSMitreAttack
from prowler.lib.outputs.compliance.mitre_attack.mitre_attack_azure import (
AzureMitreAttack,
)
from prowler.lib.outputs.compliance.mitre_attack.mitre_attack_gcp import GCPMitreAttack
from prowler.lib.outputs.compliance.prowler_threatscore.prowler_threatscore_aws import (
ProwlerThreatScoreAWS,
)
from prowler.lib.outputs.compliance.prowler_threatscore.prowler_threatscore_azure import (
ProwlerThreatScoreAzure,
)
from prowler.lib.outputs.compliance.prowler_threatscore.prowler_threatscore_gcp import (
ProwlerThreatScoreGCP,
)
from prowler.lib.outputs.csv.csv import CSV
from prowler.lib.outputs.html.html import HTML
from prowler.lib.outputs.ocsf.ocsf import OCSF
logger = get_task_logger(__name__)
COMPLIANCE_CLASS_MAP = {
"aws": [
(lambda name: name.startswith("cis_"), AWSCIS),
(lambda name: name == "mitre_attack_aws", AWSMitreAttack),
(lambda name: name.startswith("ens_"), AWSENS),
(
lambda name: name.startswith("aws_well_architected_framework"),
AWSWellArchitected,
),
(lambda name: name.startswith("iso27001_"), AWSISO27001),
(lambda name: name.startswith("kisa"), AWSKISAISMSP),
(lambda name: name == "prowler_threatscore_aws", ProwlerThreatScoreAWS),
],
"azure": [
(lambda name: name.startswith("cis_"), AzureCIS),
(lambda name: name == "mitre_attack_azure", AzureMitreAttack),
(lambda name: name.startswith("ens_"), AzureENS),
(lambda name: name.startswith("iso27001_"), AzureISO27001),
(lambda name: name == "prowler_threatscore_azure", ProwlerThreatScoreAzure),
],
"gcp": [
(lambda name: name.startswith("cis_"), GCPCIS),
(lambda name: name == "mitre_attack_gcp", GCPMitreAttack),
(lambda name: name.startswith("ens_"), GCPENS),
(lambda name: name.startswith("iso27001_"), GCPISO27001),
(lambda name: name == "prowler_threatscore_gcp", ProwlerThreatScoreGCP),
],
"kubernetes": [
(lambda name: name.startswith("cis_"), KubernetesCIS),
(lambda name: name.startswith("iso27001_"), KubernetesISO27001),
],
"m365": [
(lambda name: name.startswith("cis_"), M365CIS),
],
}
# Predefined mapping for output formats and their configurations
OUTPUT_FORMATS_MAPPING = {
"csv": {
"class": CSV,
"suffix": csv_file_suffix,
"kwargs": {},
},
"json-ocsf": {"class": OCSF, "suffix": json_ocsf_file_suffix, "kwargs": {}},
"html": {"class": HTML, "suffix": html_file_suffix, "kwargs": {"stats": {}}},
}
def _compress_output_files(output_directory: str) -> str:
"""
Compress output files from all configured output formats into a ZIP archive.
Args:
output_directory (str): The directory where the output files are located.
The function looks up all known suffixes in OUTPUT_FORMATS_MAPPING
and compresses those files into a single ZIP.
Returns:
str: The full path to the newly created ZIP archive.
"""
zip_path = f"{output_directory}.zip"
parent_dir = os.path.dirname(output_directory)
zip_path_abs = os.path.abspath(zip_path)
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
for foldername, _, filenames in os.walk(parent_dir):
for filename in filenames:
file_path = os.path.join(foldername, filename)
if os.path.abspath(file_path) == zip_path_abs:
continue
arcname = os.path.relpath(file_path, start=parent_dir)
zipf.write(file_path, arcname)
return zip_path
def get_s3_client():
"""
Create and return a boto3 S3 client using AWS credentials from environment variables.
This function attempts to initialize an S3 client by reading the AWS access key, secret key,
session token, and region from environment variables. It then validates the client by listing
available S3 buckets. If an error occurs during this process (for example, due to missing or
invalid credentials), it falls back to creating an S3 client without explicitly provided credentials,
which may rely on other configuration sources (e.g., IAM roles).
Returns:
boto3.client: A configured S3 client instance.
Raises:
ClientError, NoCredentialsError, or ParamValidationError if both attempts to create a client fail.
"""
s3_client = None
try:
s3_client = boto3.client(
"s3",
aws_access_key_id=settings.DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY,
aws_session_token=settings.DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN,
region_name=settings.DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION,
)
s3_client.list_buckets()
except (ClientError, NoCredentialsError, ParamValidationError, ValueError):
s3_client = boto3.client("s3")
s3_client.list_buckets()
return s3_client
def _upload_to_s3(tenant_id: str, zip_path: str, scan_id: str) -> str:
"""
Upload the specified ZIP file to an S3 bucket.
If the S3 bucket environment variables are not configured,
the function returns None without performing an upload.
Args:
tenant_id (str): The tenant identifier, used as part of the S3 key prefix.
zip_path (str): The local file system path to the ZIP file to be uploaded.
scan_id (str): The scan identifier, used as part of the S3 key prefix.
Returns:
str: The S3 URI of the uploaded file (e.g., "s3://<bucket>/<key>") if successful.
None: If the required environment variables for the S3 bucket are not set.
Raises:
botocore.exceptions.ClientError: If the upload attempt to S3 fails for any reason.
"""
bucket = base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET
if not bucket:
return None
try:
s3 = get_s3_client()
# Upload the ZIP file (outputs) to the S3 bucket
zip_key = f"{tenant_id}/{scan_id}/{os.path.basename(zip_path)}"
s3.upload_file(
Filename=zip_path,
Bucket=bucket,
Key=zip_key,
)
# Upload the compliance directory to the S3 bucket
compliance_dir = os.path.join(os.path.dirname(zip_path), "compliance")
for filename in os.listdir(compliance_dir):
local_path = os.path.join(compliance_dir, filename)
if not os.path.isfile(local_path):
continue
file_key = f"{tenant_id}/{scan_id}/compliance/{filename}"
s3.upload_file(Filename=local_path, Bucket=bucket, Key=file_key)
return f"s3://{base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET}/{zip_key}"
except (ClientError, NoCredentialsError, ParamValidationError, ValueError) as e:
logger.error(f"S3 upload failed: {str(e)}")
def _generate_output_directory(
output_directory, prowler_provider: object, tenant_id: str, scan_id: str
) -> tuple[str, str]:
"""
Generate a file system path for the output directory of a prowler scan.
This function constructs the output directory path by combining a base
temporary output directory, the tenant ID, the scan ID, and details about
the prowler provider along with a timestamp. The resulting path is used to
store the output files of a prowler scan.
Note:
This function depends on one external variable:
- `output_file_timestamp`: A timestamp (as a string) used to uniquely identify the output.
Args:
output_directory (str): The base output directory.
prowler_provider (object): An identifier or descriptor for the prowler provider.
Typically, this is a string indicating the provider (e.g., "aws").
tenant_id (str): The unique identifier for the tenant.
scan_id (str): The unique identifier for the scan.
Returns:
str: The constructed file system path for the prowler scan output directory.
Example:
>>> _generate_output_directory("/tmp", "aws", "tenant-1234", "scan-5678")
'/tmp/tenant-1234/aws/scan-5678/prowler-output-2023-02-15T12:34:56',
'/tmp/tenant-1234/aws/scan-5678/compliance/prowler-output-2023-02-15T12:34:56'
"""
path = (
f"{output_directory}/{tenant_id}/{scan_id}/prowler-output-"
f"{prowler_provider}-{output_file_timestamp}"
)
os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
compliance_path = (
f"{output_directory}/{tenant_id}/{scan_id}/compliance/prowler-output-"
f"{prowler_provider}-{output_file_timestamp}"
)
os.makedirs("/".join(compliance_path.split("/")[:-1]), exist_ok=True)
return path, compliance_path

View File

@@ -1,4 +1,3 @@
import json
import time
from copy import deepcopy
from datetime import datetime, timezone
@@ -7,7 +6,6 @@ from celery.utils.log import get_task_logger
from config.settings.celery import CELERY_DEADLOCK_ATTEMPTS
from django.db import IntegrityError, OperationalError
from django.db.models import Case, Count, IntegerField, Sum, When
from tasks.utils import CustomEncoder
from api.compliance import (
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE,
@@ -154,9 +152,6 @@ def perform_prowler_scan(
for progress, findings in prowler_scan.scan():
for finding in findings:
if finding is None:
logger.error(f"None finding detected on scan {scan_id}.")
continue
for attempt in range(CELERY_DEADLOCK_ATTEMPTS):
try:
with rls_transaction(tenant_id):
@@ -181,10 +176,7 @@ def perform_prowler_scan(
# Update resource fields if necessary
updated_fields = []
if (
finding.region
and resource_instance.region != finding.region
):
if resource_instance.region != finding.region:
resource_instance.region = finding.region
updated_fields.append("region")
if resource_instance.service != finding.service_name:
@@ -193,17 +185,6 @@ def perform_prowler_scan(
if resource_instance.type != finding.resource_type:
resource_instance.type = finding.resource_type
updated_fields.append("type")
if resource_instance.metadata != finding.resource_metadata:
resource_instance.metadata = json.dumps(
finding.resource_metadata, cls=CustomEncoder
)
updated_fields.append("metadata")
if resource_instance.details != finding.resource_details:
resource_instance.details = finding.resource_details
updated_fields.append("details")
if resource_instance.partition != finding.partition:
resource_instance.partition = finding.partition
updated_fields.append("partition")
if updated_fields:
with rls_transaction(tenant_id):
resource_instance.save(update_fields=updated_fields)
@@ -238,33 +219,24 @@ def perform_prowler_scan(
# Process finding
with rls_transaction(tenant_id):
finding_uid = finding.uid
last_first_seen_at = None
if finding_uid not in last_status_cache:
most_recent_finding = (
Finding.all_objects.filter(
tenant_id=tenant_id, uid=finding_uid
)
.order_by("-inserted_at")
.values("status", "first_seen_at")
Finding.objects.filter(uid=finding_uid)
.order_by("-id")
.values("status")
.first()
)
last_status = None
if most_recent_finding:
last_status = most_recent_finding["status"]
last_first_seen_at = most_recent_finding["first_seen_at"]
last_status_cache[finding_uid] = last_status, last_first_seen_at
last_status = (
most_recent_finding["status"]
if most_recent_finding
else None
)
last_status_cache[finding_uid] = last_status
else:
last_status, last_first_seen_at = last_status_cache[finding_uid]
last_status = last_status_cache[finding_uid]
status = FindingStatus[finding.status]
delta = _create_finding_delta(last_status, status)
# For the findings prior to the change, when a first finding is found with delta!="new" it will be
# assigned a current date as first_seen_at and the successive findings with the same UID will
# always get the date of the previous finding.
# For new findings, when a finding (delta="new") is found for the first time, the first_seen_at
# attribute will be assigned the current date, the following findings will get that date.
if not last_first_seen_at:
last_first_seen_at = datetime.now(tz=timezone.utc)
# Create the finding
finding_instance = Finding.objects.create(
@@ -279,9 +251,6 @@ def perform_prowler_scan(
raw_result=finding.raw,
check_id=finding.check_id,
scan=scan_instance,
first_seen_at=last_first_seen_at,
muted=finding.muted,
compliance=finding.compliance,
)
finding_instance.add_resources([resource_instance])
@@ -359,18 +328,9 @@ def perform_prowler_scan(
total_requirements=compliance["total_requirements"],
)
)
try:
with rls_transaction(tenant_id):
ComplianceOverview.objects.bulk_create(
compliance_overview_objects, batch_size=100
)
except Exception as overview_exception:
import sentry_sdk
with rls_transaction(tenant_id):
ComplianceOverview.objects.bulk_create(compliance_overview_objects)
sentry_sdk.capture_exception(overview_exception)
logger.error(
f"Error storing compliance overview for scan {scan_id}: {overview_exception}"
)
if exception is not None:
raise exception
@@ -407,7 +367,7 @@ def aggregate_findings(tenant_id: str, scan_id: str):
- muted_changed: Muted findings with a delta of 'changed'.
"""
with rls_transaction(tenant_id):
findings = Finding.objects.filter(tenant_id=tenant_id, scan_id=scan_id)
findings = Finding.objects.filter(scan_id=scan_id)
aggregation = findings.values(
"check_id",
@@ -417,21 +377,21 @@ def aggregate_findings(tenant_id: str, scan_id: str):
).annotate(
fail=Sum(
Case(
When(status="FAIL", muted=False, then=1),
When(status="FAIL", then=1),
default=0,
output_field=IntegerField(),
)
),
_pass=Sum(
Case(
When(status="PASS", muted=False, then=1),
When(status="PASS", then=1),
default=0,
output_field=IntegerField(),
)
),
muted_count=Sum(
muted=Sum(
Case(
When(muted=True, then=1),
When(status="MUTED", then=1),
default=0,
output_field=IntegerField(),
)
@@ -439,63 +399,63 @@ def aggregate_findings(tenant_id: str, scan_id: str):
total=Count("id"),
new=Sum(
Case(
When(delta="new", muted=False, then=1),
When(delta="new", then=1),
default=0,
output_field=IntegerField(),
)
),
changed=Sum(
Case(
When(delta="changed", muted=False, then=1),
When(delta="changed", then=1),
default=0,
output_field=IntegerField(),
)
),
unchanged=Sum(
Case(
When(delta__isnull=True, muted=False, then=1),
When(delta__isnull=True, then=1),
default=0,
output_field=IntegerField(),
)
),
fail_new=Sum(
Case(
When(delta="new", status="FAIL", muted=False, then=1),
When(delta="new", status="FAIL", then=1),
default=0,
output_field=IntegerField(),
)
),
fail_changed=Sum(
Case(
When(delta="changed", status="FAIL", muted=False, then=1),
When(delta="changed", status="FAIL", then=1),
default=0,
output_field=IntegerField(),
)
),
pass_new=Sum(
Case(
When(delta="new", status="PASS", muted=False, then=1),
When(delta="new", status="PASS", then=1),
default=0,
output_field=IntegerField(),
)
),
pass_changed=Sum(
Case(
When(delta="changed", status="PASS", muted=False, then=1),
When(delta="changed", status="PASS", then=1),
default=0,
output_field=IntegerField(),
)
),
muted_new=Sum(
Case(
When(delta="new", muted=True, then=1),
When(delta="new", status="MUTED", then=1),
default=0,
output_field=IntegerField(),
)
),
muted_changed=Sum(
Case(
When(delta="changed", muted=True, then=1),
When(delta="changed", status="MUTED", then=1),
default=0,
output_field=IntegerField(),
)
@@ -513,7 +473,7 @@ def aggregate_findings(tenant_id: str, scan_id: str):
region=agg["resources__region"],
fail=agg["fail"],
_pass=agg["_pass"],
muted=agg["muted_count"],
muted=agg["muted"],
total=agg["total"],
new=agg["new"],
changed=agg["changed"],

View File

@@ -1,35 +1,15 @@
from datetime import datetime, timedelta, timezone
from pathlib import Path
from shutil import rmtree
from celery import chain, shared_task
from celery.utils.log import get_task_logger
from celery import shared_task
from config.celery import RLSTask
from config.django.base import DJANGO_FINDINGS_BATCH_SIZE, DJANGO_TMP_OUTPUT_DIRECTORY
from django_celery_beat.models import PeriodicTask
from tasks.jobs.connection import check_provider_connection
from tasks.jobs.deletion import delete_provider, delete_tenant
from tasks.jobs.export import (
COMPLIANCE_CLASS_MAP,
OUTPUT_FORMATS_MAPPING,
_compress_output_files,
_generate_output_directory,
_upload_to_s3,
)
from tasks.jobs.scan import aggregate_findings, perform_prowler_scan
from tasks.utils import batched, get_next_execution_datetime
from api.compliance import get_compliance_frameworks
from api.db_utils import rls_transaction
from api.decorators import set_tenant
from api.models import Finding, Provider, Scan, ScanSummary, StateChoices
from api.utils import initialize_prowler_provider
from api.v1.serializers import ScanTaskSerializer
from prowler.lib.check.compliance_models import Compliance
from prowler.lib.outputs.compliance.generic.generic import GenericCompliance
from prowler.lib.outputs.finding import Finding as FindingOutput
logger = get_task_logger(__name__)
from api.models import Provider, Scan
@shared_task(base=RLSTask, name="provider-connection-check")
@@ -49,10 +29,9 @@ def check_provider_connection_task(provider_id: str):
return check_provider_connection(provider_id=provider_id)
@shared_task(
base=RLSTask, name="provider-deletion", queue="deletion", autoretry_for=(Exception,)
)
def delete_provider_task(provider_id: str, tenant_id: str):
@shared_task(base=RLSTask, name="provider-deletion")
@set_tenant
def delete_provider_task(provider_id: str):
"""
Task to delete a specific Provider instance.
@@ -60,7 +39,6 @@ def delete_provider_task(provider_id: str, tenant_id: str):
Args:
provider_id (str): The primary key of the `Provider` instance to be deleted.
tenant_id (str): Tenant ID the provider belongs to.
Returns:
tuple: A tuple containing:
@@ -68,7 +46,7 @@ def delete_provider_task(provider_id: str, tenant_id: str):
- A dictionary with the count of deleted instances per model,
including related models if cascading deletes were triggered.
"""
return delete_provider(tenant_id=tenant_id, pk=provider_id)
return delete_provider(pk=provider_id)
@shared_task(base=RLSTask, name="scan-perform", queue="scans")
@@ -91,22 +69,13 @@ def perform_scan_task(
Returns:
dict: The result of the scan execution, typically including the status and results of the performed checks.
"""
result = perform_prowler_scan(
return perform_prowler_scan(
tenant_id=tenant_id,
scan_id=scan_id,
provider_id=provider_id,
checks_to_execute=checks_to_execute,
)
chain(
perform_scan_summary_task.si(tenant_id, scan_id),
generate_outputs.si(
scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
),
).apply_async()
return result
@shared_task(base=RLSTask, bind=True, name="scan-perform-scheduled", queue="scans")
def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
@@ -131,90 +100,34 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
task_id = self.request.id
with rls_transaction(tenant_id):
provider_instance = Provider.objects.get(pk=provider_id)
periodic_task_instance = PeriodicTask.objects.get(
name=f"scan-perform-scheduled-{provider_id}"
)
next_scan_date = datetime.combine(
datetime.now(timezone.utc), periodic_task_instance.start_time.time()
) + timedelta(hours=24)
executed_scan = Scan.objects.filter(
scan_instance = Scan.objects.create(
tenant_id=tenant_id,
provider_id=provider_id,
task__task_runner_task__task_id=task_id,
).order_by("completed_at")
if (
Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
scheduler_task_id=periodic_task_instance.id,
scheduled_at__date=datetime.now(timezone.utc).date(),
).exists()
or executed_scan.exists()
):
# Duplicated task execution due to visibility timeout or scan is already running
logger.warning(f"Duplicated scheduled scan for provider {provider_id}.")
try:
affected_scan = executed_scan.first()
if not affected_scan:
raise ValueError(
"Error retrieving affected scan details after detecting duplicated scheduled "
"scan."
)
# Return the affected scan details to avoid losing data
serializer = ScanTaskSerializer(instance=affected_scan)
except Exception as duplicated_scan_exception:
logger.error(
f"Duplicated scheduled scan for provider {provider_id}. Error retrieving affected scan details: "
f"{str(duplicated_scan_exception)}"
)
raise duplicated_scan_exception
return serializer.data
next_scan_datetime = get_next_execution_datetime(task_id, provider_id)
scan_instance, _ = Scan.objects.get_or_create(
tenant_id=tenant_id,
provider_id=provider_id,
name="Daily scheduled scan",
provider=provider_instance,
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
scheduler_task_id=periodic_task_instance.id,
defaults={
"state": StateChoices.SCHEDULED,
"name": "Daily scheduled scan",
"scheduled_at": next_scan_datetime - timedelta(days=1),
},
next_scan_at=next_scan_date,
task_id=task_id,
)
scan_instance.task_id = task_id
scan_instance.save()
try:
result = perform_prowler_scan(
tenant_id=tenant_id,
scan_id=str(scan_instance.id),
provider_id=provider_id,
)
except Exception as e:
raise e
finally:
with rls_transaction(tenant_id):
Scan.objects.get_or_create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
scheduled_at=next_scan_datetime,
scheduler_task_id=periodic_task_instance.id,
)
chain(
perform_scan_summary_task.si(tenant_id, scan_instance.id),
generate_outputs.si(
scan_id=str(scan_instance.id), provider_id=provider_id, tenant_id=tenant_id
),
).apply_async()
result = perform_prowler_scan(
tenant_id=tenant_id,
scan_id=str(scan_instance.id),
provider_id=provider_id,
)
perform_scan_summary_task.apply_async(
kwargs={
"tenant_id": tenant_id,
"scan_id": str(scan_instance.id),
}
)
return result
@@ -223,138 +136,6 @@ def perform_scan_summary_task(tenant_id: str, scan_id: str):
return aggregate_findings(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(name="tenant-deletion", queue="deletion", autoretry_for=(Exception,))
@shared_task(name="tenant-deletion")
def delete_tenant_task(tenant_id: str):
return delete_tenant(pk=tenant_id)
@shared_task(
base=RLSTask,
name="scan-report",
queue="scan-reports",
)
@set_tenant(keep_tenant=True)
def generate_outputs(scan_id: str, provider_id: str, tenant_id: str):
"""
Process findings in batches and generate output files in multiple formats.
This function retrieves findings associated with a scan, processes them
in batches of 50, and writes each batch to the corresponding output files.
It reuses output writer instances across batches, updates them with each
batch of transformed findings, and uses a flag to indicate when the final
batch is being processed. Finally, the output files are compressed and
uploaded to S3.
Args:
tenant_id (str): The tenant identifier.
scan_id (str): The scan identifier.
provider_id (str): The provider_id id to be used in generating outputs.
"""
# Check if the scan has findings
if not ScanSummary.objects.filter(scan_id=scan_id).exists():
logger.info(f"No findings found for scan {scan_id}")
return {"upload": False}
provider_obj = Provider.objects.get(id=provider_id)
prowler_provider = initialize_prowler_provider(provider_obj)
provider_uid = provider_obj.uid
provider_type = provider_obj.provider
frameworks_bulk = Compliance.get_bulk(provider_type)
frameworks_avail = get_compliance_frameworks(provider_type)
out_dir, comp_dir = _generate_output_directory(
DJANGO_TMP_OUTPUT_DIRECTORY, provider_uid, tenant_id, scan_id
)
def get_writer(writer_map, name, factory, is_last):
"""
Return existing writer_map[name] or create via factory().
In both cases set `.close_file = is_last`.
"""
initialization = False
if name not in writer_map:
writer_map[name] = factory()
initialization = True
w = writer_map[name]
w.close_file = is_last
return w, initialization
output_writers = {}
compliance_writers = {}
scan_summary = FindingOutput._transform_findings_stats(
ScanSummary.objects.filter(scan_id=scan_id)
)
qs = Finding.all_objects.filter(scan_id=scan_id).order_by("uid").iterator()
for batch, is_last in batched(qs, DJANGO_FINDINGS_BATCH_SIZE):
fos = [FindingOutput.transform_api_finding(f, prowler_provider) for f in batch]
# Outputs
for mode, cfg in OUTPUT_FORMATS_MAPPING.items():
cls = cfg["class"]
suffix = cfg["suffix"]
extra = cfg.get("kwargs", {}).copy()
if mode == "html":
extra.update(provider=prowler_provider, stats=scan_summary)
writer, initialization = get_writer(
output_writers,
cls,
lambda cls=cls, fos=fos, suffix=suffix: cls(
findings=fos,
file_path=out_dir,
file_extension=suffix,
from_cli=False,
),
is_last,
)
if not initialization:
writer.transform(fos)
writer.batch_write_data_to_file(**extra)
writer._data.clear()
# Compliance CSVs
for name in frameworks_avail:
compliance_obj = frameworks_bulk[name]
klass = GenericCompliance
for condition, cls in COMPLIANCE_CLASS_MAP.get(provider_type, []):
if condition(name):
klass = cls
break
filename = f"{comp_dir}_{name}.csv"
writer, initialization = get_writer(
compliance_writers,
name,
lambda klass=klass, fos=fos: klass(
findings=fos,
compliance=compliance_obj,
file_path=filename,
from_cli=False,
),
is_last,
)
if not initialization:
writer.transform(fos, compliance_obj, name)
writer.batch_write_data_to_file()
writer._data.clear()
compressed = _compress_output_files(out_dir)
upload_uri = _upload_to_s3(tenant_id, compressed, scan_id)
if upload_uri:
try:
rmtree(Path(compressed).parent, ignore_errors=True)
except Exception as e:
logger.error(f"Error deleting output files: {e}")
final_location, did_upload = upload_uri, True
else:
final_location, did_upload = compressed, False
Scan.all_objects.filter(id=scan_id).update(output_location=final_location)
logger.info(f"Scan outputs at {final_location}")
return {"upload": did_upload}

View File

@@ -6,8 +6,6 @@ from django_celery_beat.models import IntervalSchedule, PeriodicTask
from rest_framework_json_api.serializers import ValidationError
from tasks.beat import schedule_provider_scan
from api.models import Scan
@pytest.mark.django_db
class TestScheduleProviderScan:
@@ -17,11 +15,9 @@ class TestScheduleProviderScan:
with patch(
"tasks.tasks.perform_scheduled_scan_task.apply_async"
) as mock_apply_async:
assert Scan.all_objects.count() == 0
result = schedule_provider_scan(provider_instance)
assert result is not None
assert Scan.all_objects.count() == 1
mock_apply_async.assert_called_once_with(
kwargs={

View File

@@ -9,19 +9,17 @@ from api.models import Provider, Tenant
class TestDeleteProvider:
def test_delete_provider_success(self, providers_fixture):
instance = providers_fixture[0]
tenant_id = str(instance.tenant_id)
result = delete_provider(tenant_id, instance.id)
result = delete_provider(instance.id)
assert result
with pytest.raises(ObjectDoesNotExist):
Provider.objects.get(pk=instance.id)
def test_delete_provider_does_not_exist(self, tenants_fixture):
tenant_id = str(tenants_fixture[0].id)
def test_delete_provider_does_not_exist(self):
non_existent_pk = "babf6796-cfcc-4fd3-9dcf-88d012247645"
with pytest.raises(ObjectDoesNotExist):
delete_provider(tenant_id, non_existent_pk)
delete_provider(non_existent_pk)
@pytest.mark.django_db

View File

@@ -1,142 +0,0 @@
import os
import zipfile
from unittest.mock import MagicMock, patch
import pytest
from botocore.exceptions import ClientError
from tasks.jobs.export import (
_compress_output_files,
_generate_output_directory,
_upload_to_s3,
get_s3_client,
)
@pytest.mark.django_db
class TestOutputs:
def test_compress_output_files_creates_zip(self, tmp_path):
output_dir = tmp_path / "output"
output_dir.mkdir()
file_path = output_dir / "result.csv"
file_path.write_text("data")
zip_path = _compress_output_files(str(output_dir))
assert zip_path.endswith(".zip")
assert os.path.exists(zip_path)
with zipfile.ZipFile(zip_path, "r") as zipf:
assert "output/result.csv" in zipf.namelist()
@patch("tasks.jobs.export.boto3.client")
@patch("tasks.jobs.export.settings")
def test_get_s3_client_success(self, mock_settings, mock_boto_client):
mock_settings.DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID = "test"
mock_settings.DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY = "test"
mock_settings.DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN = "token"
mock_settings.DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION = "eu-west-1"
client_mock = MagicMock()
mock_boto_client.return_value = client_mock
client = get_s3_client()
assert client is not None
client_mock.list_buckets.assert_called()
@patch("tasks.jobs.export.boto3.client")
@patch("tasks.jobs.export.settings")
def test_get_s3_client_fallback(self, mock_settings, mock_boto_client):
mock_boto_client.side_effect = [
ClientError({"Error": {"Code": "403"}}, "ListBuckets"),
MagicMock(),
]
client = get_s3_client()
assert client is not None
@patch("tasks.jobs.export.get_s3_client")
@patch("tasks.jobs.export.base")
def test_upload_to_s3_success(self, mock_base, mock_get_client, tmp_path):
mock_base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = "test-bucket"
zip_path = tmp_path / "outputs.zip"
zip_path.write_bytes(b"dummy")
compliance_dir = tmp_path / "compliance"
compliance_dir.mkdir()
compliance_file = compliance_dir / "report.csv"
compliance_file.write_text("ok")
client_mock = MagicMock()
mock_get_client.return_value = client_mock
result = _upload_to_s3("tenant-id", str(zip_path), "scan-id")
expected_uri = "s3://test-bucket/tenant-id/scan-id/outputs.zip"
assert result == expected_uri
assert client_mock.upload_file.call_count == 2
@patch("tasks.jobs.export.get_s3_client")
@patch("tasks.jobs.export.base")
def test_upload_to_s3_missing_bucket(self, mock_base, mock_get_client):
mock_base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = ""
result = _upload_to_s3("tenant", "/tmp/fake.zip", "scan")
assert result is None
@patch("tasks.jobs.export.get_s3_client")
@patch("tasks.jobs.export.base")
def test_upload_to_s3_skips_non_files(self, mock_base, mock_get_client, tmp_path):
mock_base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = "test-bucket"
zip_path = tmp_path / "results.zip"
zip_path.write_bytes(b"zip")
compliance_dir = tmp_path / "compliance"
compliance_dir.mkdir()
(compliance_dir / "subdir").mkdir()
client_mock = MagicMock()
mock_get_client.return_value = client_mock
result = _upload_to_s3("tenant", str(zip_path), "scan")
expected_uri = "s3://test-bucket/tenant/scan/results.zip"
assert result == expected_uri
client_mock.upload_file.assert_called_once()
@patch(
"tasks.jobs.export.get_s3_client",
side_effect=ClientError({"Error": {}}, "Upload"),
)
@patch("tasks.jobs.export.base")
@patch("tasks.jobs.export.logger.error")
def test_upload_to_s3_failure_logs_error(
self, mock_logger, mock_base, mock_get_client, tmp_path
):
mock_base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = "bucket"
zip_path = tmp_path / "zipfile.zip"
zip_path.write_bytes(b"zip")
compliance_dir = tmp_path / "compliance"
compliance_dir.mkdir()
(compliance_dir / "report.csv").write_text("csv")
_upload_to_s3("tenant", str(zip_path), "scan")
mock_logger.assert_called()
def test_generate_output_directory_creates_paths(self, tmp_path):
from prowler.config.config import output_file_timestamp
base_dir = str(tmp_path)
tenant_id = "t1"
scan_id = "s1"
provider = "aws"
path, compliance = _generate_output_directory(
base_dir, provider, tenant_id, scan_id
)
assert os.path.isdir(os.path.dirname(path))
assert os.path.isdir(os.path.dirname(compliance))
assert path.endswith(f"{provider}-{output_file_timestamp}")
assert compliance.endswith(f"{provider}-{output_file_timestamp}")

View File

@@ -1,4 +1,3 @@
import json
import uuid
from unittest.mock import MagicMock, patch
@@ -8,7 +7,6 @@ from tasks.jobs.scan import (
_store_resources,
perform_prowler_scan,
)
from tasks.utils import CustomEncoder
from api.models import (
Finding,
@@ -109,13 +107,7 @@ class TestPerformScan:
finding.service_name = "service_name"
finding.resource_type = "resource_type"
finding.resource_tags = {"tag1": "value1", "tag2": "value2"}
finding.muted = False
finding.raw = {}
finding.resource_metadata = {"test": "metadata"}
finding.resource_details = {"details": "test"}
finding.partition = "partition"
finding.muted = True
finding.compliance = {"compliance1": "PASS"}
# Mock the ProwlerScan instance
mock_prowler_scan_instance = MagicMock()
@@ -153,8 +145,6 @@ class TestPerformScan:
assert scan_finding.severity == finding.severity
assert scan_finding.check_id == finding.check_id
assert scan_finding.raw_result == finding.raw
assert scan_finding.muted
assert scan_finding.compliance == finding.compliance
assert scan_resource.tenant == tenant
assert scan_resource.uid == finding.resource_uid
@@ -162,11 +152,6 @@ class TestPerformScan:
assert scan_resource.service == finding.service_name
assert scan_resource.type == finding.resource_type
assert scan_resource.name == finding.resource_name
assert scan_resource.metadata == json.dumps(
finding.resource_metadata, cls=CustomEncoder
)
assert scan_resource.details == f"{finding.resource_details}"
assert scan_resource.partition == finding.partition
# Assert that the resource tags have been created and associated
tags = scan_resource.tags.all()

View File

@@ -1,415 +0,0 @@
import uuid
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from tasks.tasks import generate_outputs
@pytest.mark.django_db
class TestGenerateOutputs:
def setup_method(self):
self.scan_id = str(uuid.uuid4())
self.provider_id = str(uuid.uuid4())
self.tenant_id = str(uuid.uuid4())
def test_no_findings_returns_early(self):
with patch("tasks.tasks.ScanSummary.objects.filter") as mock_filter:
mock_filter.return_value.exists.return_value = False
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
)
assert result == {"upload": False}
mock_filter.assert_called_once_with(scan_id=self.scan_id)
@patch("tasks.tasks.rmtree")
@patch("tasks.tasks._upload_to_s3")
@patch("tasks.tasks._compress_output_files")
@patch("tasks.tasks.get_compliance_frameworks")
@patch("tasks.tasks.Compliance.get_bulk")
@patch("tasks.tasks.initialize_prowler_provider")
@patch("tasks.tasks.Provider.objects.get")
@patch("tasks.tasks.ScanSummary.objects.filter")
@patch("tasks.tasks.Finding.all_objects.filter")
def test_generate_outputs_happy_path(
self,
mock_finding_filter,
mock_scan_summary_filter,
mock_provider_get,
mock_initialize_provider,
mock_compliance_get_bulk,
mock_get_available_frameworks,
mock_compress,
mock_upload,
mock_rmtree,
):
mock_scan_summary_filter.return_value.exists.return_value = True
mock_provider = MagicMock()
mock_provider.uid = "provider-uid"
mock_provider.provider = "aws"
mock_provider_get.return_value = mock_provider
prowler_provider = MagicMock()
mock_initialize_provider.return_value = prowler_provider
mock_compliance_get_bulk.return_value = {"cis": MagicMock()}
mock_get_available_frameworks.return_value = ["cis"]
dummy_finding = MagicMock(uid="f1")
mock_finding_filter.return_value.order_by.return_value.iterator.return_value = [
[dummy_finding],
True,
]
mock_transformed_stats = {"some": "stats"}
with (
patch(
"tasks.tasks.FindingOutput._transform_findings_stats",
return_value=mock_transformed_stats,
),
patch(
"tasks.tasks.FindingOutput.transform_api_finding",
return_value={"transformed": "f1"},
),
patch(
"tasks.tasks.OUTPUT_FORMATS_MAPPING",
{
"json": {
"class": MagicMock(name="JSONWriter"),
"suffix": ".json",
"kwargs": {},
}
},
),
patch(
"tasks.tasks.COMPLIANCE_CLASS_MAP",
{"aws": [(lambda x: True, MagicMock(name="CSVCompliance"))]},
),
patch(
"tasks.tasks._generate_output_directory",
return_value=("out-dir", "comp-dir"),
),
patch("tasks.tasks.Scan.all_objects.filter") as mock_scan_update,
):
mock_compress.return_value = "/tmp/zipped.zip"
mock_upload.return_value = "s3://bucket/zipped.zip"
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
)
assert result == {"upload": True}
mock_scan_update.return_value.update.assert_called_once_with(
output_location="s3://bucket/zipped.zip"
)
mock_rmtree.assert_called_once_with(
Path("/tmp/zipped.zip").parent, ignore_errors=True
)
def test_generate_outputs_fails_upload(self):
with (
patch("tasks.tasks.ScanSummary.objects.filter") as mock_filter,
patch("tasks.tasks.Provider.objects.get"),
patch("tasks.tasks.initialize_prowler_provider"),
patch("tasks.tasks.Compliance.get_bulk"),
patch("tasks.tasks.get_compliance_frameworks"),
patch("tasks.tasks.Finding.all_objects.filter") as mock_findings,
patch(
"tasks.tasks._generate_output_directory", return_value=("out", "comp")
),
patch("tasks.tasks.FindingOutput._transform_findings_stats"),
patch("tasks.tasks.FindingOutput.transform_api_finding"),
patch(
"tasks.tasks.OUTPUT_FORMATS_MAPPING",
{
"json": {
"class": MagicMock(name="Writer"),
"suffix": ".json",
"kwargs": {},
}
},
),
patch(
"tasks.tasks.COMPLIANCE_CLASS_MAP",
{"aws": [(lambda x: True, MagicMock())]},
),
patch("tasks.tasks._compress_output_files", return_value="/tmp/compressed"),
patch("tasks.tasks._upload_to_s3", return_value=None),
patch("tasks.tasks.Scan.all_objects.filter") as mock_scan_update,
):
mock_filter.return_value.exists.return_value = True
mock_findings.return_value.order_by.return_value.iterator.return_value = [
[MagicMock()],
True,
]
result = generate_outputs(
scan_id="scan",
provider_id="provider",
tenant_id=self.tenant_id,
)
assert result == {"upload": False}
mock_scan_update.return_value.update.assert_called_once()
def test_generate_outputs_triggers_html_extra_update(self):
mock_finding_output = MagicMock()
mock_finding_output.compliance = {"cis": ["requirement-1", "requirement-2"]}
with (
patch("tasks.tasks.ScanSummary.objects.filter") as mock_filter,
patch("tasks.tasks.Provider.objects.get"),
patch("tasks.tasks.initialize_prowler_provider"),
patch("tasks.tasks.Compliance.get_bulk", return_value={"cis": MagicMock()}),
patch("tasks.tasks.get_compliance_frameworks", return_value=["cis"]),
patch("tasks.tasks.Finding.all_objects.filter") as mock_findings,
patch(
"tasks.tasks._generate_output_directory", return_value=("out", "comp")
),
patch(
"tasks.tasks.FindingOutput._transform_findings_stats",
return_value={"some": "stats"},
),
patch(
"tasks.tasks.FindingOutput.transform_api_finding",
return_value=mock_finding_output,
),
patch("tasks.tasks._compress_output_files", return_value="/tmp/compressed"),
patch("tasks.tasks._upload_to_s3", return_value="s3://bucket/f.zip"),
patch("tasks.tasks.Scan.all_objects.filter"),
):
mock_filter.return_value.exists.return_value = True
mock_findings.return_value.order_by.return_value.iterator.return_value = [
[MagicMock()],
True,
]
html_writer_mock = MagicMock()
with (
patch(
"tasks.tasks.OUTPUT_FORMATS_MAPPING",
{
"html": {
"class": lambda *args, **kwargs: html_writer_mock,
"suffix": ".html",
"kwargs": {},
}
},
),
patch(
"tasks.tasks.COMPLIANCE_CLASS_MAP",
{"aws": [(lambda x: True, MagicMock())]},
),
):
generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
)
html_writer_mock.batch_write_data_to_file.assert_called_once()
def test_transform_called_only_on_second_batch(self):
raw1 = MagicMock()
raw2 = MagicMock()
tf1 = MagicMock()
tf1.compliance = {}
tf2 = MagicMock()
tf2.compliance = {}
writer_instances = []
class TrackingWriter:
def __init__(self, findings, file_path, file_extension, from_cli):
self.transform_called = 0
self.batch_write_data_to_file = MagicMock()
self._data = []
self.close_file = False
writer_instances.append(self)
def transform(self, fos):
self.transform_called += 1
with (
patch("tasks.tasks.ScanSummary.objects.filter") as mock_summary,
patch("tasks.tasks.Provider.objects.get"),
patch("tasks.tasks.initialize_prowler_provider"),
patch("tasks.tasks.Compliance.get_bulk"),
patch("tasks.tasks.get_compliance_frameworks", return_value=[]),
patch("tasks.tasks.FindingOutput._transform_findings_stats"),
patch(
"tasks.tasks.FindingOutput.transform_api_finding",
side_effect=[tf1, tf2],
),
patch(
"tasks.tasks._generate_output_directory",
return_value=("outdir", "compdir"),
),
patch("tasks.tasks._compress_output_files", return_value="outdir.zip"),
patch("tasks.tasks._upload_to_s3", return_value="s3://bucket/outdir.zip"),
patch("tasks.tasks.rmtree"),
patch("tasks.tasks.Scan.all_objects.filter"),
patch(
"tasks.tasks.batched",
return_value=[
([raw1], False),
([raw2], True),
],
),
):
mock_summary.return_value.exists.return_value = True
with patch(
"tasks.tasks.OUTPUT_FORMATS_MAPPING",
{
"json": {
"class": TrackingWriter,
"suffix": ".json",
"kwargs": {},
}
},
):
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
)
assert result == {"upload": True}
assert len(writer_instances) == 1
writer = writer_instances[0]
assert writer.transform_called == 1
def test_compliance_transform_called_on_second_batch(self):
raw1 = MagicMock()
raw2 = MagicMock()
compliance_obj = MagicMock()
writer_instances = []
class TrackingComplianceWriter:
def __init__(self, *args, **kwargs):
self.transform_calls = []
self._data = []
writer_instances.append(self)
def transform(self, fos, comp_obj, name):
self.transform_calls.append((fos, comp_obj, name))
def batch_write_data_to_file(self):
pass
two_batches = [
([raw1], False),
([raw2], True),
]
with (
patch("tasks.tasks.ScanSummary.objects.filter") as mock_summary,
patch(
"tasks.tasks.Provider.objects.get",
return_value=MagicMock(uid="UID", provider="aws"),
),
patch("tasks.tasks.initialize_prowler_provider"),
patch(
"tasks.tasks.Compliance.get_bulk", return_value={"cis": compliance_obj}
),
patch("tasks.tasks.get_compliance_frameworks", return_value=["cis"]),
patch(
"tasks.tasks._generate_output_directory",
return_value=("outdir", "compdir"),
),
patch("tasks.tasks.FindingOutput._transform_findings_stats"),
patch(
"tasks.tasks.FindingOutput.transform_api_finding",
side_effect=lambda f, prov: f,
),
patch("tasks.tasks._compress_output_files", return_value="outdir.zip"),
patch("tasks.tasks._upload_to_s3", return_value="s3://bucket/outdir.zip"),
patch("tasks.tasks.rmtree"),
patch(
"tasks.tasks.Scan.all_objects.filter",
return_value=MagicMock(update=lambda **kw: None),
),
patch("tasks.tasks.batched", return_value=two_batches),
patch("tasks.tasks.OUTPUT_FORMATS_MAPPING", {}),
patch(
"tasks.tasks.COMPLIANCE_CLASS_MAP",
{"aws": [(lambda name: True, TrackingComplianceWriter)]},
),
):
mock_summary.return_value.exists.return_value = True
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
)
assert len(writer_instances) == 1
writer = writer_instances[0]
assert writer.transform_calls == [([raw2], compliance_obj, "cis")]
assert result == {"upload": True}
def test_generate_outputs_logs_rmtree_exception(self, caplog):
mock_finding_output = MagicMock()
mock_finding_output.compliance = {"cis": ["requirement-1", "requirement-2"]}
with (
patch("tasks.tasks.ScanSummary.objects.filter") as mock_filter,
patch("tasks.tasks.Provider.objects.get"),
patch("tasks.tasks.initialize_prowler_provider"),
patch("tasks.tasks.Compliance.get_bulk", return_value={"cis": MagicMock()}),
patch("tasks.tasks.get_compliance_frameworks", return_value=["cis"]),
patch("tasks.tasks.Finding.all_objects.filter") as mock_findings,
patch(
"tasks.tasks._generate_output_directory", return_value=("out", "comp")
),
patch(
"tasks.tasks.FindingOutput._transform_findings_stats",
return_value={"some": "stats"},
),
patch(
"tasks.tasks.FindingOutput.transform_api_finding",
return_value=mock_finding_output,
),
patch("tasks.tasks._compress_output_files", return_value="/tmp/compressed"),
patch("tasks.tasks._upload_to_s3", return_value="s3://bucket/file.zip"),
patch("tasks.tasks.Scan.all_objects.filter"),
patch("tasks.tasks.rmtree", side_effect=Exception("Test deletion error")),
):
mock_filter.return_value.exists.return_value = True
mock_findings.return_value.order_by.return_value.iterator.return_value = [
[MagicMock()],
True,
]
with (
patch(
"tasks.tasks.OUTPUT_FORMATS_MAPPING",
{
"json": {
"class": lambda *args, **kwargs: MagicMock(),
"suffix": ".json",
"kwargs": {},
}
},
),
patch(
"tasks.tasks.COMPLIANCE_CLASS_MAP",
{"aws": [(lambda x: True, MagicMock())]},
),
):
with caplog.at_level("ERROR"):
generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
)
assert "Error deleting output files" in caplog.text

View File

@@ -1,102 +0,0 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import patch
import pytest
from django_celery_beat.models import IntervalSchedule, PeriodicTask
from django_celery_results.models import TaskResult
from tasks.utils import batched, get_next_execution_datetime
@pytest.mark.django_db
class TestGetNextExecutionDatetime:
@pytest.fixture
def setup_periodic_task(self, db):
# Create a periodic task with an hourly interval
interval = IntervalSchedule.objects.create(
every=1, period=IntervalSchedule.HOURS
)
periodic_task = PeriodicTask.objects.create(
name="scan-perform-scheduled-123",
task="scan-perform-scheduled",
interval=interval,
)
return periodic_task
@pytest.fixture
def setup_task_result(self, db):
# Create a task result record
task_result = TaskResult.objects.create(
task_id="abc123",
task_name="scan-perform-scheduled",
status="SUCCESS",
date_created=datetime.now(timezone.utc) - timedelta(hours=1),
result="Success",
)
return task_result
def test_get_next_execution_datetime_success(
self, setup_task_result, setup_periodic_task
):
task_result = setup_task_result
periodic_task = setup_periodic_task
# Mock periodic_task_name on TaskResult
with patch.object(
TaskResult, "periodic_task_name", return_value=periodic_task.name
):
next_execution = get_next_execution_datetime(
task_id=task_result.task_id, provider_id="123"
)
expected_time = task_result.date_created + timedelta(hours=1)
assert next_execution == expected_time
def test_get_next_execution_datetime_fallback_to_provider_id(
self, setup_task_result, setup_periodic_task
):
task_result = setup_task_result
# Simulate the case where `periodic_task_name` is missing
with patch.object(TaskResult, "periodic_task_name", return_value=None):
next_execution = get_next_execution_datetime(
task_id=task_result.task_id, provider_id="123"
)
expected_time = task_result.date_created + timedelta(hours=1)
assert next_execution == expected_time
def test_get_next_execution_datetime_periodic_task_does_not_exist(
self, setup_task_result
):
task_result = setup_task_result
with pytest.raises(PeriodicTask.DoesNotExist):
get_next_execution_datetime(
task_id=task_result.task_id, provider_id="nonexistent"
)
class TestBatchedFunction:
def test_empty_iterable(self):
result = list(batched([], 3))
assert result == [([], True)]
def test_exact_batches(self):
result = list(batched([1, 2, 3, 4], 2))
expected = [([1, 2], False), ([3, 4], False), ([], True)]
assert result == expected
def test_inexact_batches(self):
result = list(batched([1, 2, 3, 4, 5], 2))
expected = [([1, 2], False), ([3, 4], False), ([5], True)]
assert result == expected
def test_batch_size_one(self):
result = list(batched([1, 2, 3], 1))
expected = [([1], False), ([2], False), ([3], False), ([], True)]
assert result == expected
def test_batch_size_greater_than_length(self):
result = list(batched([1, 2, 3], 5))
expected = [([1, 2, 3], True)]
assert result == expected

View File

@@ -1,73 +0,0 @@
import json
from datetime import datetime, timedelta, timezone
from enum import Enum
from django_celery_beat.models import PeriodicTask
from django_celery_results.models import TaskResult
class CustomEncoder(json.JSONEncoder):
def default(self, o):
# Enum serialization
if isinstance(o, Enum):
return o.value
# Datetime and timedelta serialization
if isinstance(o, datetime):
return o.isoformat(timespec="seconds")
if isinstance(o, timedelta):
return o.total_seconds()
# Custom object serialization
try:
return super().default(o)
except TypeError:
try:
return o.__dict__
except AttributeError:
return str(o)
def get_next_execution_datetime(task_id: int, provider_id: str) -> datetime:
task_instance = TaskResult.objects.get(task_id=task_id)
try:
periodic_task_instance = PeriodicTask.objects.get(
name=task_instance.periodic_task_name
)
except PeriodicTask.DoesNotExist:
periodic_task_instance = PeriodicTask.objects.get(
name=f"scan-perform-scheduled-{provider_id}"
)
interval = periodic_task_instance.interval
current_scheduled_time = datetime.combine(
datetime.now(timezone.utc).date(),
task_instance.date_created.time(),
tzinfo=timezone.utc,
)
return current_scheduled_time + timedelta(**{interval.period: interval.every})
def batched(iterable, batch_size):
"""
Yield successive batches from an iterable.
Args:
iterable: An iterable source of items.
batch_size (int): The number of items per batch.
Yields:
tuple: A pair (batch, is_last_batch) where:
- batch (list): A list of items (with length equal to batch_size,
except possibly for the last batch).
- is_last_batch (bool): True if this is the final batch, False otherwise.
"""
batch = []
for item in iterable:
batch.append(item)
if len(batch) == batch_size:
yield batch, False
batch = []
yield batch, True

View File

@@ -1,156 +0,0 @@
#!/usr/bin/env python3
import argparse
import re
import subprocess
import sys
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use("ggplot")
def run_locust(
locust_file: str,
host: str,
users: int,
hatch_rate: int,
run_time: str,
csv_prefix: Path,
) -> Path:
artifacts_dir = Path("artifacts")
artifacts_dir.mkdir(parents=True, exist_ok=True)
cmd = [
"locust",
"-f",
f"scenarios/{locust_file}",
"--headless",
"-u",
str(users),
"-r",
str(hatch_rate),
"-t",
run_time,
"--host",
host,
"--csv",
str(artifacts_dir / csv_prefix.name),
]
print(f"Running Locust: {' '.join(cmd)}")
process = subprocess.run(cmd)
if process.returncode:
sys.exit("Locust execution failed")
stats_file = artifacts_dir / f"{csv_prefix.stem}_stats.csv"
if not stats_file.exists():
sys.exit(f"Stats CSV not found: {stats_file}")
return stats_file
def load_percentiles(csv_path: Path) -> pd.DataFrame:
df = pd.read_csv(csv_path)
mapping = {"50%": "p50", "75%": "p75", "90%": "p90", "95%": "p95"}
available = [col for col in mapping if col in df.columns]
renamed = {col: mapping[col] for col in available}
df = df.rename(columns=renamed).set_index("Name")[renamed.values()]
return df.drop(index=["Aggregated"], errors="ignore")
def sanitize_label(label: str) -> str:
text = re.sub(r"[^\w]+", "_", label.strip().lower())
return text.strip("_")
def plot_multi_comparison(metrics: dict[str, pd.DataFrame]) -> None:
common = sorted(set.intersection(*(set(df.index) for df in metrics.values())))
percentiles = list(next(iter(metrics.values())).columns)
groups = len(metrics)
width = 0.8 / groups
for endpoint in common:
fig, ax = plt.subplots(figsize=(10, 5), dpi=100)
for idx, (label, df) in enumerate(metrics.items()):
series = df.loc[endpoint]
positions = [
i + (idx - groups / 2) * width + width / 2
for i in range(len(percentiles))
]
bars = ax.bar(positions, series.values, width, label=label)
for bar in bars:
height = bar.get_height()
ax.annotate(
f"{int(height)}",
xy=(bar.get_x() + bar.get_width() / 2, height),
xytext=(0, 3),
textcoords="offset points",
ha="center",
va="bottom",
fontsize=8,
)
ax.set_xticks(range(len(percentiles)))
ax.set_xticklabels(percentiles)
ax.set_ylabel("Latency (ms)")
ax.set_title(endpoint, fontsize=12)
ax.grid(True, axis="y", linestyle="--", alpha=0.7)
fig.tight_layout()
fig.subplots_adjust(right=0.75)
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), framealpha=0.9)
output = Path("artifacts") / f"comparison_{sanitize_label(endpoint)}.png"
plt.savefig(output)
plt.close(fig)
print(f"Saved chart: {output}")
def main() -> None:
parser = argparse.ArgumentParser(description="Run Locust and compare metrics")
parser.add_argument("--locustfile", required=True, help="Locust file in scenarios/")
parser.add_argument("--host", required=True, help="Target host URL")
parser.add_argument(
"--users", type=int, default=10, help="Number of simulated users"
)
parser.add_argument("--rate", type=int, default=1, help="Hatch rate per second")
parser.add_argument("--time", default="1m", help="Test duration (e.g. 30s, 1m)")
parser.add_argument(
"--metrics-dir", default="baselines", help="Directory with CSV baselines"
)
parser.add_argument("--version", default="current", help="Test version")
args = parser.parse_args()
metrics_dir = Path(args.metrics_dir)
if not metrics_dir.is_dir():
sys.exit(f"Metrics directory not found: {metrics_dir}")
metrics_data: dict[str, pd.DataFrame] = {}
for csv_file in sorted(metrics_dir.glob("*.csv")):
metrics_data[csv_file.stem] = load_percentiles(csv_file)
current_prefix = Path(args.version)
current_csv = run_locust(
locust_file=args.locustfile,
host=args.host,
users=args.users,
hatch_rate=args.rate,
run_time=args.time,
csv_prefix=current_prefix,
)
metrics_data[args.version] = load_percentiles(current_csv)
for endpoint in sorted(
set.intersection(*(set(df.index) for df in metrics_data.values()))
):
parts = [endpoint]
for label, df in metrics_data.items():
s = df.loc[endpoint]
parts.append(f"{label}: p50 {s.p50}, p75 {s.p75}, p90 {s.p90}, p95 {s.p95}")
print(" | ".join(parts))
plot_multi_comparison(metrics_data)
if __name__ == "__main__":
main()

View File

@@ -1,2 +0,0 @@
locust==2.34.1
matplotlib==3.10.1

View File

@@ -1,202 +0,0 @@
from locust import events, task
from utils.config import (
FINDINGS_UI_SORT_VALUES,
L_PROVIDER_NAME,
M_PROVIDER_NAME,
S_PROVIDER_NAME,
TARGET_INSERTED_AT,
)
from utils.helpers import (
APIUserBase,
get_api_token,
get_auth_headers,
get_next_resource_filter,
get_resource_filters_pairs,
get_scan_id_from_provider_name,
get_sort_value,
)
GLOBAL = {
"token": None,
"scan_ids": {},
"resource_filters": None,
"large_resource_filters": None,
}
@events.test_start.add_listener
def on_test_start(environment, **kwargs):
GLOBAL["token"] = get_api_token(environment.host)
GLOBAL["scan_ids"]["small"] = get_scan_id_from_provider_name(
environment.host, GLOBAL["token"], S_PROVIDER_NAME
)
GLOBAL["scan_ids"]["medium"] = get_scan_id_from_provider_name(
environment.host, GLOBAL["token"], M_PROVIDER_NAME
)
GLOBAL["scan_ids"]["large"] = get_scan_id_from_provider_name(
environment.host, GLOBAL["token"], L_PROVIDER_NAME
)
GLOBAL["resource_filters"] = get_resource_filters_pairs(
environment.host, GLOBAL["token"]
)
GLOBAL["large_resource_filters"] = get_resource_filters_pairs(
environment.host, GLOBAL["token"], GLOBAL["scan_ids"]["large"]
)
class APIUser(APIUserBase):
def on_start(self):
self.token = GLOBAL["token"]
self.s_scan_id = GLOBAL["scan_ids"]["small"]
self.m_scan_id = GLOBAL["scan_ids"]["medium"]
self.l_scan_id = GLOBAL["scan_ids"]["large"]
self.available_resource_filters = GLOBAL["resource_filters"]
self.available_resource_filters_large_scan = GLOBAL["large_resource_filters"]
@task
def findings_default(self):
name = "/findings"
page_number = self._next_page(name)
endpoint = (
f"/findings?page[number]={page_number}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&filter[inserted_at]={TARGET_INSERTED_AT}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(3)
def findings_default_include(self):
name = "/findings?include"
page = self._next_page(name)
endpoint = (
f"/findings?page[number]={page}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&filter[inserted_at]={TARGET_INSERTED_AT}"
f"&include=scan.provider,resources"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(3)
def findings_metadata(self):
endpoint = f"/findings/metadata?" f"filter[inserted_at]={TARGET_INSERTED_AT}"
self.client.get(
endpoint, headers=get_auth_headers(self.token), name="/findings/metadata"
)
@task
def findings_scan_small(self):
name = "/findings?filter[scan_id] - 50k"
page_number = self._next_page(name)
endpoint = (
f"/findings?page[number]={page_number}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&filter[scan]={self.s_scan_id}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task
def findings_metadata_scan_small(self):
endpoint = f"/findings/metadata?" f"&filter[scan]={self.s_scan_id}"
self.client.get(
endpoint,
headers=get_auth_headers(self.token),
name="/findings/metadata?filter[scan_id] - 50k",
)
@task(2)
def findings_scan_medium(self):
name = "/findings?filter[scan_id] - 250k"
page_number = self._next_page(name)
endpoint = (
f"/findings?page[number]={page_number}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&filter[scan]={self.m_scan_id}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task
def findings_metadata_scan_medium(self):
endpoint = f"/findings/metadata?" f"&filter[scan]={self.m_scan_id}"
self.client.get(
endpoint,
headers=get_auth_headers(self.token),
name="/findings/metadata?filter[scan_id] - 250k",
)
@task
def findings_scan_large(self):
name = "/findings?filter[scan_id] - 500k"
page_number = self._next_page(name)
endpoint = (
f"/findings?page[number]={page_number}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&filter[scan]={self.l_scan_id}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task
def findings_scan_large_include(self):
name = "/findings?filter[scan_id]&include - 500k"
page_number = self._next_page(name)
endpoint = (
f"/findings?page[number]={page_number}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&filter[scan]={self.l_scan_id}"
f"&include=scan.provider,resources"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task
def findings_metadata_scan_large(self):
endpoint = f"/findings/metadata?" f"&filter[scan]={self.l_scan_id}"
self.client.get(
endpoint,
headers=get_auth_headers(self.token),
name="/findings/metadata?filter[scan_id] - 500k",
)
@task(2)
def findings_resource_filter(self):
name = "/findings?filter[resource_filter]&include"
filter_name, filter_value = get_next_resource_filter(
self.available_resource_filters
)
endpoint = (
f"/findings?filter[{filter_name}]={filter_value}"
f"&filter[inserted_at]={TARGET_INSERTED_AT}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&include=scan.provider,resources"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(3)
def findings_metadata_resource_filter(self):
name = "/findings/metadata?filter[resource_filter]"
filter_name, filter_value = get_next_resource_filter(
self.available_resource_filters
)
endpoint = (
f"/findings?filter[{filter_name}]={filter_value}"
f"&filter[inserted_at]={TARGET_INSERTED_AT}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task
def findings_resource_filter_large_scan_include(self):
name = "/findings?filter[resource_filter][scan]&include - 500k"
filter_name, filter_value = get_next_resource_filter(
self.available_resource_filters
)
endpoint = (
f"/findings?filter[{filter_name}]={filter_value}"
f"&{get_sort_value(FINDINGS_UI_SORT_VALUES)}"
f"&filter[scan]={self.l_scan_id}"
f"&include=scan.provider,resources"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)

Some files were not shown because too many files have changed in this diff Show More