Compare commits

..

5 Commits

Author SHA1 Message Date
HugoPBrito
2ba1a5990a feat: add docstrings 2025-02-12 16:43:22 +01:00
HugoPBrito
2b7dead1f8 feat: rename and adapt logic 2025-02-12 16:37:50 +01:00
HugoPBrito
4aa6e220b0 feat: add service test 2025-02-11 15:48:36 +01:00
HugoPBrito
3f57d0436f feat: add tests 2025-02-10 17:05:50 +01:00
HugoPBrito
1a7f5e7c9a feat: add check logic and metadata 2025-02-10 17:05:13 +01:00
1412 changed files with 23455 additions and 156773 deletions

45
.env
View File

@@ -3,8 +3,8 @@
# For production, it is recommended to use a secure method to store these variables and change the default secret keys.
#### Prowler UI Configuration ####
PROWLER_UI_VERSION="stable"
AUTH_URL=http://localhost:3000
PROWLER_UI_VERSION="latest"
SITE_URL=http://localhost:3000
API_BASE_URL=http://prowler-api:8080/api/v1
NEXT_PUBLIC_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
AUTH_TRUST_HOST=true
@@ -30,30 +30,6 @@ VALKEY_HOST=valkey
VALKEY_PORT=6379
VALKEY_DB=0
# API scan settings
# The path to the directory where scan output should be stored
DJANGO_TMP_OUTPUT_DIRECTORY="/tmp/prowler_api_output"
# The maximum number of findings to process in a single batch
DJANGO_FINDINGS_BATCH_SIZE=1000
# The AWS access key to be used when uploading scan output to an S3 bucket
# If left empty, default AWS credentials resolution behavior will be used
DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID=""
# The AWS secret key to be used when uploading scan output to an S3 bucket
DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY=""
# An optional AWS session token
DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN=""
# The AWS region where your S3 bucket is located (e.g., "us-east-1")
DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION=""
# The name of the S3 bucket where scan output should be stored
DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET=""
# Django settings
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1,prowler-api
DJANGO_BIND_ADDRESS=0.0.0.0
@@ -116,20 +92,3 @@ jQIDAQAB
# openssl rand -base64 32
DJANGO_SECRETS_ENCRYPTION_KEY="oE/ltOhp/n1TdbHjVmzcjDPLcLA41CVI/4Rk+UB5ESc="
DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
DJANGO_SENTRY_DSN=
# Sentry settings
SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.5.0
# Social login credentials
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
SOCIAL_GOOGLE_OAUTH_CLIENT_ID=""
SOCIAL_GOOGLE_OAUTH_CLIENT_SECRET=""
SOCIAL_GITHUB_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/github"
SOCIAL_GITHUB_OAUTH_CLIENT_ID=""
SOCIAL_GITHUB_OAUTH_CLIENT_SECRET=""

View File

@@ -16,17 +16,16 @@ updates:
- "dependencies"
- "pip"
# Dependabot Updates are temporary disabled - 2025/03/19
# - package-ecosystem: "pip"
# directory: "/api"
# schedule:
# interval: "daily"
# open-pull-requests-limit: 10
# target-branch: master
# labels:
# - "dependencies"
# - "pip"
# - "component/api"
- package-ecosystem: "pip"
directory: "/api"
schedule:
interval: "daily"
open-pull-requests-limit: 10
target-branch: master
labels:
- "dependencies"
- "pip"
- "component/api"
- package-ecosystem: "github-actions"
directory: "/"
@@ -38,17 +37,16 @@ updates:
- "dependencies"
- "github_actions"
# Dependabot Updates are temporary disabled - 2025/03/19
# - package-ecosystem: "npm"
# directory: "/ui"
# schedule:
# interval: "daily"
# open-pull-requests-limit: 10
# target-branch: master
# labels:
# - "dependencies"
# - "npm"
# - "component/ui"
- package-ecosystem: "npm"
directory: "/ui"
schedule:
interval: "daily"
open-pull-requests-limit: 10
target-branch: master
labels:
- "dependencies"
- "npm"
- "component/ui"
- package-ecosystem: "docker"
directory: "/"
@@ -94,18 +92,17 @@ updates:
- "docker"
- "v4"
# Dependabot Updates are temporary disabled - 2025/03/19
# v3
# - package-ecosystem: "pip"
# directory: "/"
# schedule:
# interval: "monthly"
# open-pull-requests-limit: 10
# target-branch: v3
# labels:
# - "dependencies"
# - "pip"
# - "v3"
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 10
target-branch: v3
labels:
- "dependencies"
- "pip"
- "v3"
- package-ecosystem: "github-actions"
directory: "/"

10
.github/labeler.yml vendored
View File

@@ -92,13 +92,3 @@ component/api:
component/ui:
- changed-files:
- any-glob-to-any-file: "ui/**"
compliance:
- changed-files:
- any-glob-to-any-file: "prowler/compliance/**"
- any-glob-to-any-file: "prowler/lib/outputs/compliance/**"
- any-glob-to-any-file: "tests/lib/outputs/compliance/**"
review-django-migrations:
- changed-files:
- any-glob-to-any-file: "api/src/backend/api/migrations/**"

View File

@@ -61,40 +61,33 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set short git commit SHA
id: vars
run: |
shortSha=$(git rev-parse --short ${{ github.sha }})
echo "SHORT_SHA=${shortSha}" >> $GITHUB_ENV
uses: actions/checkout@v4
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
uses: docker/setup-buildx-action@v3
- name: Build and push container image (latest)
# Comment the following line for testing
if: github.event_name == 'push'
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
# Set push: false for testing
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.LATEST_TAG }}
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.SHORT_SHA }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
push: true
@@ -103,12 +96,3 @@ jobs:
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.STABLE_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Trigger deployment
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0 # v3.0.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: prowler-api-deploy
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ env.SHORT_SHA }}"}'

View File

@@ -44,16 +44,16 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@1b549b9259bda1cb5ddde3b41741a82a2d15a841 # v3.28.13
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/api-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@1b549b9259bda1cb5ddde3b41741a82a2d15a841 # v3.28.13
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -6,7 +6,6 @@ on:
- "master"
- "v5.*"
paths:
- ".github/workflows/api-pull-request.yml"
- "api/**"
pull_request:
branches:
@@ -15,6 +14,7 @@ on:
paths:
- "api/**"
env:
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432
@@ -26,8 +26,7 @@ env:
VALKEY_HOST: localhost
VALKEY_PORT: 6379
VALKEY_DB: 0
API_WORKING_DIR: ./api
IMAGE_NAME: prowler-api
jobs:
test:
@@ -71,11 +70,11 @@ jobs:
--health-retries 5
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
- name: Test if changes are in not ignored paths
id: are-non-ignored-files-changed
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
uses: tj-actions/changed-files@v45
with:
files: api/**
files_ignore: |
@@ -90,11 +89,11 @@ jobs:
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
python -m pip install --upgrade pip
pipx install poetry==2.1.1
pipx install poetry==1.8.5
- name: Set up Python ${{ matrix.python-version }}
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
@@ -103,7 +102,7 @@ jobs:
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry install --no-root
poetry install
poetry run pip list
VERSION=$(curl --silent "https://api.github.com/repos/hadolint/hadolint/releases/latest" | \
grep '"tag_name":' | \
@@ -167,23 +166,8 @@ jobs:
- name: Upload coverage reports to Codecov
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574 # v5.4.0
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: api
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
with:
context: ${{ env.API_WORKING_DIR }}
push: false
tags: ${{ env.IMAGE_NAME }}:latest
outputs: type=docker
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -23,7 +23,7 @@ jobs:
steps:
- name: Check labels
id: preview_label_check
uses: agilepathway/label-checker@c3d16ad512e7cea5961df85ff2486bb774caf3c5 # v1.6.65
uses: docker://agilepathway/pull-request-label-checker:v1.6.55
with:
allow_failure: true
prefix_mode: true
@@ -33,7 +33,7 @@ jobs:
- name: Backport Action
if: steps.preview_label_check.outputs.label_check == 'success'
uses: sorenlouv/backport-github-action@ad888e978060bc1b2798690dd9d03c4036560947 # v9.5.1
uses: sorenlouv/backport-github-action@v9.5.1
with:
github_token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
auto_backport_label_prefix: ${{ env.BACKPORT_LABEL_PREFIX }}

View File

@@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Leave PR comment with the Prowler Documentation URI
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
uses: peter-evans/create-or-update-comment@v4
with:
issue-number: ${{ env.PR_NUMBER }}
body: |

View File

@@ -1,23 +0,0 @@
name: Prowler - Conventional Commit
on:
pull_request:
types:
- "opened"
- "edited"
- "synchronize"
branches:
- "master"
- "v3"
- "v4.*"
- "v5.*"
jobs:
conventional-commit-check:
runs-on: ubuntu-latest
steps:
- name: conventional-commit-check
id: conventional-commit-check
uses: agenthunt/conventional-commit-checker-action@9e552d650d0e205553ec7792d447929fc78e012b # v2.0.0
with:
pr-title-regex: '^([^\s(]+)(?:\(([^)]+)\))?: (.+)'

View File

@@ -7,11 +7,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: TruffleHog OSS
uses: trufflesecurity/trufflehog@34339eaf08bf5c2a27dbd969812127721f3743ed # v3.88.22
uses: trufflesecurity/trufflehog@v3.88.5
with:
path: ./
base: ${{ github.event.repository.default_branch }}

View File

@@ -14,4 +14,4 @@ jobs:
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
- uses: actions/labeler@v5

View File

@@ -59,10 +59,10 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -108,13 +108,13 @@ jobs:
esac
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to Public ECR
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
registry: public.ecr.aws
username: ${{ secrets.PUBLIC_ECR_AWS_ACCESS_KEY_ID }}
@@ -123,11 +123,11 @@ jobs:
AWS_REGION: ${{ env.AWS_REGION }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
uses: docker/setup-buildx-action@v3
- name: Build and push container image (latest)
if: github.event_name == 'push'
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
uses: docker/build-push-action@v6
with:
push: true
tags: |
@@ -140,7 +140,7 @@ jobs:
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
uses: docker/build-push-action@v6
with:
# Use local context to get changes
# https://github.com/docker/build-push-action#path-context

View File

@@ -50,16 +50,16 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@1b549b9259bda1cb5ddde3b41741a82a2d15a841 # v3.28.13
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/sdk-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@1b549b9259bda1cb5ddde3b41741a82a2d15a841 # v3.28.13
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -21,11 +21,11 @@ jobs:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
- name: Test if changes are in not ignored paths
id: are-non-ignored-files-changed
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
uses: tj-actions/changed-files@v45
with:
files: ./**
files_ignore: |
@@ -39,18 +39,16 @@ jobs:
.backportrc.json
.env
docker-compose*
examples/**
.gitignore
- name: Install poetry
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
python -m pip install --upgrade pip
pipx install poetry==2.1.1
pipx install poetry==1.8.5
- name: Set up Python ${{ matrix.python-version }}
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
@@ -58,7 +56,7 @@ jobs:
- name: Install dependencies
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry install --no-root
poetry install
poetry run pip list
VERSION=$(curl --silent "https://api.github.com/repos/hadolint/hadolint/releases/latest" | \
grep '"tag_name":' | \
@@ -113,7 +111,7 @@ jobs:
- name: Upload coverage reports to Codecov
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574 # v5.4.0
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:

View File

@@ -64,14 +64,14 @@ jobs:
;;
esac
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
- name: Install dependencies
run: |
pipx install poetry==2.1.1
pipx install poetry==1.8.5
- name: Setup Python
uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: ${{ env.CACHE }}

View File

@@ -23,12 +23,12 @@ jobs:
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v4
with:
ref: ${{ env.GITHUB_BRANCH }}
- name: setup python
uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
uses: actions/setup-python@v5
with:
python-version: 3.9 #install the python needed
@@ -38,7 +38,7 @@ jobs:
pip install boto3
- name: Configure AWS Credentials -- DEV
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: ${{ env.AWS_REGION_DEV }}
role-to-assume: ${{ secrets.DEV_IAM_ROLE_ARN }}
@@ -50,13 +50,12 @@ jobs:
# Create pull request
- name: Create Pull Request
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
uses: peter-evans/create-pull-request@v7
with:
author: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
commit-message: "feat(regions_update): Update regions for AWS services"
branch: "aws-services-regions-updated-${{ github.sha }}"
labels: "status/waiting-for-revision, severity/low, provider/aws"
labels: "status/waiting-for-revision, severity/low, provider/aws, backport-to-v3"
title: "chore(regions_update): Changes in regions for AWS services"
body: |
### Description

View File

@@ -61,58 +61,38 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set short git commit SHA
id: vars
run: |
shortSha=$(git rev-parse --short ${{ github.sha }})
echo "SHORT_SHA=${shortSha}" >> $GITHUB_ENV
uses: actions/checkout@v4
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
uses: docker/setup-buildx-action@v3
- name: Build and push container image (latest)
# Comment the following line for testing
if: github.event_name == 'push'
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=${{ env.SHORT_SHA }}
# Set push: false for testing
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.LATEST_TAG }}
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.SHORT_SHA }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
uses: docker/build-push-action@v6
with:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v${{ env.RELEASE_TAG }}
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.RELEASE_TAG }}
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.STABLE_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Trigger deployment
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0 # v3.0.0
with:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
repository: ${{ secrets.CLOUD_DISPATCH }}
event-type: prowler-ui-deploy
client-payload: '{"sha": "${{ github.sha }}", "short_sha": "${{ env.SHORT_SHA }}"}'

View File

@@ -44,16 +44,16 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@1b549b9259bda1cb5ddde3b41741a82a2d15a841 # v3.28.13
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/ui-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@1b549b9259bda1cb5ddde3b41741a82a2d15a841 # v3.28.13
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -6,7 +6,6 @@ on:
- "master"
- "v5.*"
paths:
- ".github/workflows/ui-pull-request.yml"
- "ui/**"
pull_request:
branches:
@@ -14,9 +13,6 @@ on:
- "v5.*"
paths:
- 'ui/**'
env:
UI_WORKING_DIR: ./ui
IMAGE_NAME: prowler-ui
jobs:
test-and-coverage:
@@ -27,11 +23,11 @@ jobs:
node-version: [20.x]
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Setup Node.js ${{ matrix.node-version }}
uses: actions/setup-node@cdca7365b2dadb8aad0a33bc7601856ffabcc48e # v4.3.0
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- name: Install dependencies
@@ -43,20 +39,3 @@ jobs:
- name: Build the application
working-directory: ./ui
run: npm run build
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
with:
context: ${{ env.UI_WORKING_DIR }}
# Always build using `prod` target
target: prod
push: false
tags: ${{ env.IMAGE_NAME }}:latest
outputs: type=docker
build-args: |
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=pk_test_51LwpXXXX

3
.gitignore vendored
View File

@@ -31,7 +31,7 @@ tags
*.DS_Store
# Prowler output
/output
output/
# Prowler found secrets
secrets-*/
@@ -50,7 +50,6 @@ junit-reports/
# .env
ui/.env*
api/.env*
.env.local
# Coverage
.coverage*

View File

@@ -59,7 +59,7 @@ repos:
args: ["--ignore=E266,W503,E203,E501,W605"]
- repo: https://github.com/python-poetry/poetry
rev: 2.1.1
rev: 1.8.0
hooks:
- id: poetry-check
name: API - poetry-check
@@ -68,7 +68,7 @@ repos:
- id: poetry-lock
name: API - poetry-lock
args: ["--directory=./api"]
args: ["--no-update", "--directory=./api"]
pass_filenames: false
- id: poetry-check
@@ -78,7 +78,7 @@ repos:
- id: poetry-lock
name: SDK - poetry-lock
args: ["--directory=./"]
args: ["--no-update", "--directory=./"]
pass_filenames: false

View File

@@ -1,10 +1,10 @@
FROM python:3.12.9-alpine3.20
FROM python:3.12.8-alpine3.20
LABEL maintainer="https://github.com/prowler-cloud/prowler"
# Update system dependencies and install essential tools
#hadolint ignore=DL3018
RUN apk --no-cache upgrade && apk --no-cache add curl git gcc python3-dev musl-dev linux-headers
RUN apk --no-cache upgrade && apk --no-cache add curl git
# Create non-root user
RUN mkdir -p /home/prowler && \
@@ -18,25 +18,21 @@ WORKDIR /home/prowler
COPY prowler/ /home/prowler/prowler/
COPY dashboard/ /home/prowler/dashboard/
COPY pyproject.toml /home/prowler
COPY README.md /home/prowler/
COPY README.md /home/prowler
# Install Python dependencies
ENV HOME='/home/prowler'
ENV PATH="${HOME}/.local/bin:${PATH}"
#hadolint ignore=DL3013
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry
# By default poetry does not compile Python source files to bytecode during installation.
# This speeds up the installation process, but the first execution may take a little more
# time because Python then compiles source files to bytecode automatically. If you want to
# compile source files to bytecode during installation, you can use the --compile option
RUN poetry install --compile && \
rm -rf ~/.cache/pip
ENV PATH="$HOME/.local/bin:$PATH"
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --no-cache-dir .
# Remove deprecated dash dependencies
RUN pip uninstall dash-html-components -y && \
pip uninstall dash-core-components -y
# Remove Prowler directory and build files
USER 0
RUN rm -rf /home/prowler/prowler /home/prowler/pyproject.toml /home/prowler/README.md /home/prowler/build /home/prowler/prowler.egg-info
USER prowler
ENTRYPOINT ["poetry", "run", "prowler"]
ENTRYPOINT ["prowler"]

View File

@@ -71,11 +71,10 @@ It contains hundreds of controls covering CIS, NIST 800, NIST CSF, CISA, RBI, Fe
| Provider | Checks | Services | [Compliance Frameworks](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/compliance/) | [Categories](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/misc/#categories) |
|---|---|---|---|---|
| AWS | 564 | 82 | 33 | 10 |
| GCP | 78 | 13 | 6 | 3 |
| Azure | 140 | 18 | 7 | 3 |
| Kubernetes | 83 | 7 | 4 | 7 |
| Microsoft365 | 5 | 2 | 1 | 0 |
| AWS | 564 | 82 | 30 | 10 |
| GCP | 77 | 13 | 4 | 3 |
| Azure | 140 | 18 | 5 | 3 |
| Kubernetes | 83 | 7 | 2 | 7 |
> You can list the checks, services, compliance frameworks and categories with `prowler <provider> --list-checks`, `prowler <provider> --list-services`, `prowler <provider> --list-compliance` and `prowler <provider> --list-categories`.
@@ -109,7 +108,7 @@ docker compose up -d
**Requirements**
* `git` installed.
* `poetry` v2 installed: [poetry installation](https://python-poetry.org/docs/#installation).
* `poetry` installed: [poetry installation](https://python-poetry.org/docs/#installation).
* `npm` installed: [npm installation](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm).
* `Docker Compose` installed: https://docs.docker.com/compose/install/.
@@ -119,7 +118,7 @@ docker compose up -d
git clone https://github.com/prowler-cloud/prowler
cd prowler/api
poetry install
eval $(poetry env activate)
poetry shell
set -a
source .env
docker compose up postgres valkey -d
@@ -127,11 +126,6 @@ cd src/backend
python manage.py migrate --database admin
gunicorn -c config/guniconf.py config.wsgi:application
```
> [!IMPORTANT]
> Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
>
> If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
> In case you have any doubts, consult the Poetry environment activation guide: https://python-poetry.org/docs/managing-environments/#activating-the-environment
> Now, you can access the API documentation at http://localhost:8080/api/v1/docs.
@@ -141,7 +135,7 @@ gunicorn -c config/guniconf.py config.wsgi:application
git clone https://github.com/prowler-cloud/prowler
cd prowler/api
poetry install
eval $(poetry env activate)
poetry shell
set -a
source .env
cd src/backend
@@ -154,7 +148,7 @@ python -m celery -A config.celery worker -l info -E
git clone https://github.com/prowler-cloud/prowler
cd prowler/api
poetry install
eval $(poetry env activate)
poetry shell
set -a
source .env
cd src/backend
@@ -175,7 +169,7 @@ npm start
## Prowler CLI
### Pip package
Prowler CLI is available as a project in [PyPI](https://pypi.org/project/prowler-cloud/), thus can be installed using pip with Python > 3.9.1, < 3.13:
Prowler CLI is available as a project in [PyPI](https://pypi.org/project/prowler-cloud/), thus can be installed using pip with Python >= 3.9, < 3.13:
```console
pip install prowler
@@ -205,21 +199,15 @@ The container images are available here:
### From GitHub
Python > 3.9.1, < 3.13 is required with pip and poetry:
Python >= 3.9, < 3.13 is required with pip and poetry:
``` console
git clone https://github.com/prowler-cloud/prowler
cd prowler
eval $(poetry env activate)
poetry shell
poetry install
python prowler-cli.py -v
python prowler.py -v
```
> [!IMPORTANT]
> Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
>
> If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
> In case you have any doubts, consult the Poetry environment activation guide: https://python-poetry.org/docs/managing-environments/#activating-the-environment
> If you want to clone Prowler from Windows, use `git config core.longpaths true` to allow long file paths.
# 📐✏️ High level architecture

View File

@@ -23,7 +23,6 @@ DJANGO_SECRETS_ENCRYPTION_KEY=""
DJANGO_MANAGE_DB_PARTITIONS=[True|False]
DJANGO_CELERY_DEADLOCK_ATTEMPTS=5
DJANGO_BROKER_VISIBILITY_TIMEOUT=86400
DJANGO_SENTRY_DSN=
# PostgreSQL settings
# If running django and celery on host, use 'localhost', else use 'postgres-db'
@@ -40,19 +39,3 @@ POSTGRES_DB=prowler_db
VALKEY_HOST=[localhost|valkey]
VALKEY_PORT=6379
VALKEY_DB=0
# Sentry settings
SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
# Social login credentials
DJANGO_GOOGLE_OAUTH_CLIENT_ID=""
DJANGO_GOOGLE_OAUTH_CLIENT_SECRET=""
DJANGO_GOOGLE_OAUTH_CALLBACK_URL=""
DJANGO_GITHUB_OAUTH_CLIENT_ID=""
DJANGO_GITHUB_OAUTH_CLIENT_SECRET=""
DJANGO_GITHUB_OAUTH_CALLBACK_URL=""
# Deletion Task Batch Size
DJANGO_DELETION_BATCH_SIZE=5000

View File

@@ -4,61 +4,17 @@ All notable changes to the **Prowler API** are documented in this file.
---
## [v1.6.0] (Prowler UNRELEASED)
### Added
- Support for developing new integrations [(#7167)](https://github.com/prowler-cloud/prowler/pull/7167).
- HTTP Security Headers [(#7289)](https://github.com/prowler-cloud/prowler/pull/7289).
- New endpoint to get the compliance overviews metadata [(#7333)](https://github.com/prowler-cloud/prowler/pull/7333).
- Support for muted findings [(#7378)](https://github.com/prowler-cloud/prowler/pull/7378).
- Added missing fields to API findings and resources [(#7318)](https://github.com/prowler-cloud/prowler/pull/7318).
---
## [v1.5.3] (Prowler v5.4.3)
### Fixed
- Added duplicated scheduled scans handling ([#7401])(https://github.com/prowler-cloud/prowler/pull/7401).
- Added environment variable to configure the deletion task batch size ([#7423])(https://github.com/prowler-cloud/prowler/pull/7423).
---
## [v1.5.2] (Prowler v5.4.2)
### Changed
- Refactored deletion logic and implemented retry mechanism for deletion tasks [(#7349)](https://github.com/prowler-cloud/prowler/pull/7349).
---
## [v1.5.1] (Prowler v5.4.1)
### Fixed
- Added a handled response in case local files are missing [(#7183)](https://github.com/prowler-cloud/prowler/pull/7183).
- Fixed a race condition when deleting export files after the S3 upload [(#7172)](https://github.com/prowler-cloud/prowler/pull/7172).
- Handled exception when a provider has no secret in test connection [(#7283)](https://github.com/prowler-cloud/prowler/pull/7283).
## [Unreleased]
---
## [v1.5.0] (Prowler v5.4.0)
### Added
- Social login integration with Google and GitHub [(#6906)](https://github.com/prowler-cloud/prowler/pull/6906)
- Add API scan report system, now all scans launched from the API will generate a compressed file with the report in OCSF, CSV and HTML formats [(#6878)](https://github.com/prowler-cloud/prowler/pull/6878).
- Configurable Sentry integration [(#6874)](https://github.com/prowler-cloud/prowler/pull/6874)
### Changed
- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019).
---
## [v1.4.0] (Prowler v5.3.0)
## [v1.4.0] (Prowler v5.3.0) - 2025-02-10
### Changed
- Daily scheduled scan instances are now created beforehand with `SCHEDULED` state [(#6700)](https://github.com/prowler-cloud/prowler/pull/6700).
- Findings endpoints now require at least one date filter [(#6800)](https://github.com/prowler-cloud/prowler/pull/6800).
- Findings metadata endpoint received a performance improvement [(#6863)](https://github.com/prowler-cloud/prowler/pull/6863).
- Increased the allowed length of the provider UID for Kubernetes providers [(#6869)](https://github.com/prowler-cloud/prowler/pull/6869).
- Increase the allowed length of the provider UID for Kubernetes providers [(#6869)](https://github.com/prowler-cloud/prowler/pull/6869).
---

View File

@@ -21,8 +21,7 @@ COPY src/backend/ ./backend/
ENV PATH="/home/prowler/.local/bin:$PATH"
# Add `--no-root` to avoid installing the current project as a package
RUN poetry install --no-root && \
RUN poetry install && \
rm -rf ~/.cache/pip
COPY docker-entrypoint.sh ./docker-entrypoint.sh

View File

@@ -269,66 +269,3 @@ poetry shell
cd src/backend
pytest
```
# Custom commands
Django provides a way to create custom commands that can be run from the command line.
> These commands can be found in: ```prowler/api/src/backend/api/management/commands```
To run a custom command, you need to be in the `prowler/api/src/backend` directory and run:
```console
poetry shell
python manage.py <command_name>
```
## Generate dummy data
```console
python manage.py findings --tenant
<TENANT_ID> --findings <NUM_FINDINGS> --re
sources <NUM_RESOURCES> --batch <TRANSACTION_BATCH_SIZE> --alias <ALIAS>
```
This command creates, for a given tenant, a provider, scan and a set of findings and resources related altogether.
> Scan progress and state are updated in real time.
> - 0-33%: Create resources.
> - 33-66%: Create findings.
> - 66%: Create resource-finding mapping.
>
> The last step is required to access the findings details, since the UI needs that to print all the information.
### Example
```console
~/backend $ poetry run python manage.py findings --tenant
fffb1893-3fc7-4623-a5d9-fae47da1c528 --findings 25000 --re
sources 1000 --batch 5000 --alias test-script
Starting data population
Tenant: fffb1893-3fc7-4623-a5d9-fae47da1c528
Alias: test-script
Resources: 1000
Findings: 25000
Batch size: 5000
Creating resources...
100%|███████████████████████| 1/1 [00:00<00:00, 7.72it/s]
Resources created successfully.
Creating findings...
100%|███████████████████████| 5/5 [00:05<00:00, 1.09s/it]
Findings created successfully.
Creating resource-finding mappings...
100%|███████████████████████| 5/5 [00:02<00:00, 1.81it/s]
Resource-finding mappings created successfully.
Successfully populated test data.
```

View File

@@ -28,7 +28,7 @@ start_prod_server() {
start_worker() {
echo "Starting the worker..."
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans,scan-reports,deletion -E --max-tasks-per-child 1
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans -E --max-tasks-per-child 1
}
start_worker_beat() {

1135
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -2,43 +2,37 @@
build-backend = "poetry.core.masonry.api"
requires = ["poetry-core"]
[project]
authors = [{name = "Prowler Engineering", email = "engineering@prowler.com"}]
dependencies = [
"celery[pytest] (>=5.4.0,<6.0.0)",
"dj-rest-auth[with_social,jwt] (==7.0.1)",
"django==5.1.7",
"django-allauth==65.4.1",
"django-celery-beat (>=2.7.0,<3.0.0)",
"django-celery-results (>=2.5.1,<3.0.0)",
"django-cors-headers==4.4.0",
"django-environ==0.11.2",
"django-filter==24.3",
"django-guid==3.5.0",
"django-postgres-extra (>=2.0.8,<3.0.0)",
"djangorestframework==3.15.2",
"djangorestframework-jsonapi==7.0.2",
"djangorestframework-simplejwt (>=5.3.1,<6.0.0)",
"drf-nested-routers (>=0.94.1,<1.0.0)",
"drf-spectacular==0.27.2",
"drf-spectacular-jsonapi==0.5.1",
"gunicorn==23.0.0",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (>=1.0.1,<2.0.0)",
"sentry-sdk[django] (>=2.20.0,<3.0.0)",
"uuid6==2024.7.10"
]
[tool.poetry]
authors = ["Prowler Team"]
description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
version = "1.6.0"
version = "1.4.0"
[project.scripts]
celery = "src.backend.config.settings.celery"
[tool.poetry.dependencies]
celery = {extras = ["pytest"], version = "^5.4.0"}
django = "5.1.5"
django-celery-beat = "^2.7.0"
django-celery-results = "^2.5.1"
django-cors-headers = "4.4.0"
django-environ = "0.11.2"
django-filter = "24.3"
django-guid = "3.5.0"
django-postgres-extra = "^2.0.8"
djangorestframework = "3.15.2"
djangorestframework-jsonapi = "7.0.2"
djangorestframework-simplejwt = "^5.3.1"
drf-nested-routers = "^0.94.1"
drf-spectacular = "0.27.2"
drf-spectacular-jsonapi = "0.5.1"
gunicorn = "23.0.0"
prowler = {git = "https://github.com/prowler-cloud/prowler.git", branch = "master"}
psycopg2-binary = "2.9.9"
pytest-celery = {extras = ["redis"], version = "^1.0.1"}
# Needed for prowler compatibility
python = ">=3.11,<3.13"
uuid6 = "2024.7.10"
[tool.poetry.group.dev.dependencies]
bandit = "1.7.9"
@@ -56,5 +50,7 @@ pytest-randomly = "3.15.0"
pytest-xdist = "3.6.1"
ruff = "0.5.0"
safety = "3.2.9"
tqdm = "4.67.1"
vulture = "2.14"
[tool.poetry.scripts]
celery = "src.backend.config.settings.celery"

View File

@@ -1,61 +0,0 @@
from allauth.socialaccount.adapter import DefaultSocialAccountAdapter
from django.db import transaction
from api.db_router import MainRouter
from api.db_utils import rls_transaction
from api.models import Membership, Role, Tenant, User, UserRoleRelationship
class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
@staticmethod
def get_user_by_email(email: str):
try:
return User.objects.get(email=email)
except User.DoesNotExist:
return None
def pre_social_login(self, request, sociallogin):
# Link existing accounts with the same email address
email = sociallogin.account.extra_data.get("email")
if email:
existing_user = self.get_user_by_email(email)
if existing_user:
sociallogin.connect(request, existing_user)
def save_user(self, request, sociallogin, form=None):
"""
Called after the user data is fully populated from the provider
and is about to be saved to the DB for the first time.
"""
with transaction.atomic(using=MainRouter.admin_db):
user = super().save_user(request, sociallogin, form)
user.save(using=MainRouter.admin_db)
social_account_name = sociallogin.account.extra_data.get("name")
if social_account_name:
user.name = social_account_name
user.save(using=MainRouter.admin_db)
tenant = Tenant.objects.using(MainRouter.admin_db).create(
name=f"{user.email.split('@')[0]} default tenant"
)
with rls_transaction(str(tenant.id)):
Membership.objects.using(MainRouter.admin_db).create(
user=user, tenant=tenant, role=Membership.RoleChoices.OWNER
)
role = Role.objects.using(MainRouter.admin_db).create(
name="admin",
tenant_id=tenant.id,
manage_users=True,
manage_account=True,
manage_billing=True,
manage_providers=True,
manage_integrations=True,
manage_scans=True,
unlimited_visibility=True,
)
UserRoleRelationship.objects.using(MainRouter.admin_db).create(
user=user,
role=role,
tenant_id=tenant.id,
)
return user

View File

@@ -1,29 +1,22 @@
ALLOWED_APPS = ("django", "socialaccount", "account", "authtoken", "silk")
class MainRouter:
default_db = "default"
admin_db = "admin"
def db_for_read(self, model, **hints): # noqa: F841
model_table_name = model._meta.db_table
if model_table_name.startswith("django_") or any(
model_table_name.startswith(f"{app}_") for app in ALLOWED_APPS
if model_table_name.startswith("django_") or model_table_name.startswith(
"silk_"
):
return self.admin_db
return None
def db_for_write(self, model, **hints): # noqa: F841
model_table_name = model._meta.db_table
if any(model_table_name.startswith(f"{app}_") for app in ALLOWED_APPS):
if model_table_name.startswith("django_") or model_table_name.startswith(
"silk_"
):
return self.admin_db
return None
def allow_migrate(self, db, app_label, model_name=None, **hints): # noqa: F841
return db == self.admin_db
def allow_relation(self, obj1, obj2, **hints): # noqa: F841
# Allow relations if both objects are in either "default" or "admin" db connectors
if {obj1._state.db, obj2._state.db} <= {self.default_db, self.admin_db}:
return True
return None

View File

@@ -6,7 +6,6 @@ from datetime import datetime, timedelta, timezone
from django.conf import settings
from django.contrib.auth.models import BaseUserManager
from django.db import connection, models, transaction
from django_celery_beat.models import PeriodicTask
from psycopg2 import connect as psycopg2_connect
from psycopg2.extensions import AsIs, new_type, register_adapter, register_type
from rest_framework_json_api.serializers import ValidationError
@@ -106,12 +105,11 @@ def generate_random_token(length: int = 14, symbols: str | None = None) -> str:
return "".join(secrets.choice(symbols or _symbols) for _ in range(length))
def batch_delete(tenant_id, queryset, batch_size=settings.DJANGO_DELETION_BATCH_SIZE):
def batch_delete(queryset, batch_size=5000):
"""
Deletes objects in batches and returns the total number of deletions and a summary.
Args:
tenant_id (str): Tenant ID the queryset belongs to.
queryset (QuerySet): The queryset of objects to delete.
batch_size (int): The number of objects to delete in each batch.
@@ -122,16 +120,15 @@ def batch_delete(tenant_id, queryset, batch_size=settings.DJANGO_DELETION_BATCH_
deletion_summary = {}
while True:
with rls_transaction(tenant_id, POSTGRES_TENANT_VAR):
# Get a batch of IDs to delete
batch_ids = set(
queryset.values_list("id", flat=True).order_by("id")[:batch_size]
)
if not batch_ids:
# No more objects to delete
break
# Get a batch of IDs to delete
batch_ids = set(
queryset.values_list("id", flat=True).order_by("id")[:batch_size]
)
if not batch_ids:
# No more objects to delete
break
deleted_count, deleted_info = queryset.filter(id__in=batch_ids).delete()
deleted_count, deleted_info = queryset.filter(id__in=batch_ids).delete()
total_deleted += deleted_count
for model_label, count in deleted_info.items():
@@ -140,18 +137,6 @@ def batch_delete(tenant_id, queryset, batch_size=settings.DJANGO_DELETION_BATCH_
return total_deleted, deletion_summary
def delete_related_daily_task(provider_id: str):
"""
Deletes the periodic task associated with a specific provider.
Args:
provider_id (str): The unique identifier for the provider
whose related periodic task should be deleted.
"""
task_name = f"scan-perform-scheduled-{provider_id}"
PeriodicTask.objects.filter(name=task_name).delete()
# Postgres Enums
@@ -333,15 +318,3 @@ class InvitationStateEnum(EnumType):
class InvitationStateEnumField(PostgresEnumField):
def __init__(self, *args, **kwargs):
super().__init__("invitation_state", *args, **kwargs)
# Postgres enum definition for Integration type
class IntegrationTypeEnum(EnumType):
enum_type_name = "integration_type"
class IntegrationTypeEnumField(PostgresEnumField):
def __init__(self, *args, **kwargs):
super().__init__("integration_type", *args, **kwargs)

View File

@@ -7,7 +7,7 @@ from rest_framework_json_api.serializers import ValidationError
from api.db_utils import POSTGRES_TENANT_VAR, SET_CONFIG_QUERY
def set_tenant(func=None, *, keep_tenant=False):
def set_tenant(func):
"""
Decorator to set the tenant context for a Celery task based on the provided tenant_id.
@@ -40,29 +40,20 @@ def set_tenant(func=None, *, keep_tenant=False):
# The tenant context will be set before the task logic executes.
"""
def decorator(func):
@wraps(func)
@transaction.atomic
def wrapper(*args, **kwargs):
try:
if not keep_tenant:
tenant_id = kwargs.pop("tenant_id")
else:
tenant_id = kwargs["tenant_id"]
except KeyError:
raise KeyError("This task requires the tenant_id")
try:
uuid.UUID(tenant_id)
except ValueError:
raise ValidationError("Tenant ID must be a valid UUID")
with connection.cursor() as cursor:
cursor.execute(SET_CONFIG_QUERY, [POSTGRES_TENANT_VAR, tenant_id])
@wraps(func)
@transaction.atomic
def wrapper(*args, **kwargs):
try:
tenant_id = kwargs.pop("tenant_id")
except KeyError:
raise KeyError("This task requires the tenant_id")
try:
uuid.UUID(tenant_id)
except ValueError:
raise ValidationError("Tenant ID must be a valid UUID")
with connection.cursor() as cursor:
cursor.execute(SET_CONFIG_QUERY, [POSTGRES_TENANT_VAR, tenant_id])
return func(*args, **kwargs)
return func(*args, **kwargs)
return wrapper
if func is None:
return decorator
else:
return decorator(func)
return wrapper

View File

@@ -24,7 +24,6 @@ from api.db_utils import (
from api.models import (
ComplianceOverview,
Finding,
Integration,
Invitation,
Membership,
PermissionChoices,
@@ -287,9 +286,6 @@ class FindingFilter(FilterSet):
status = ChoiceFilter(choices=StatusChoices.choices)
severity = ChoiceFilter(choices=SeverityChoices)
impact = ChoiceFilter(choices=SeverityChoices)
muted = BooleanFilter(
help_text="If this filter is not provided, muted and non-muted findings will be returned."
)
resources = UUIDInFilter(field_name="resource__id", lookup_expr="in")
@@ -451,7 +447,9 @@ class FindingFilter(FilterSet):
)
return (
queryset.filter(id__gte=start).filter(id__lt=end).filter(scan_id=value_uuid)
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan__id=value_uuid)
)
def filter_scan_id_in(self, queryset, name, value):
@@ -476,32 +474,31 @@ class FindingFilter(FilterSet):
]
)
if start == end:
return queryset.filter(id__gte=start).filter(scan_id__in=uuid_list)
return queryset.filter(id__gte=start).filter(scan__id__in=uuid_list)
else:
return (
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan_id__in=uuid_list)
.filter(scan__id__in=uuid_list)
)
def filter_inserted_at(self, queryset, name, value):
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
return queryset.filter(id__gte=start, id__lt=end)
return queryset.filter(id__gte=start).filter(inserted_at__date=value)
def filter_inserted_at_gte(self, queryset, name, value):
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
return queryset.filter(id__gte=start)
return queryset.filter(id__gte=start).filter(inserted_at__gte=value)
def filter_inserted_at_lte(self, queryset, name, value):
datetime_value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(value))
return queryset.filter(id__lt=end)
return queryset.filter(id__lte=end).filter(inserted_at__lte=value)
def filter_resource_tag(self, queryset, name, value):
overall_query = Q()
@@ -617,6 +614,12 @@ class ScanSummaryFilter(FilterSet):
field_name="scan__provider__provider", choices=Provider.ProviderChoices.choices
)
region = CharFilter(field_name="region")
muted_findings = BooleanFilter(method="filter_muted_findings")
def filter_muted_findings(self, queryset, name, value):
if not value:
return queryset.exclude(muted__gt=0)
return queryset
class Meta:
model = ScanSummary
@@ -627,6 +630,8 @@ class ScanSummaryFilter(FilterSet):
class ServiceOverviewFilter(ScanSummaryFilter):
muted_findings = None
def is_valid(self):
# Check if at least one of the inserted_at filters is present
inserted_at_filters = [
@@ -644,19 +649,3 @@ class ServiceOverviewFilter(ScanSummaryFilter):
}
)
return super().is_valid()
class IntegrationFilter(FilterSet):
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
integration_type = ChoiceFilter(choices=Integration.IntegrationChoices.choices)
integration_type__in = ChoiceInFilter(
choices=Integration.IntegrationChoices.choices,
field_name="integration_type",
lookup_expr="in",
)
class Meta:
model = Integration
fields = {
"inserted_at": ["date", "gte", "lte"],
}

View File

@@ -122,22 +122,6 @@
"scanner_args": {}
}
},
{
"model": "api.provider",
"pk": "7791914f-d646-4fe2-b2ed-73f2c6499a36",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"inserted_at": "2024-10-18T10:45:26.352Z",
"updated_at": "2024-10-18T11:16:23.533Z",
"provider": "kubernetes",
"uid": "gke_lucky-coast-419309_us-central1_autopilot-cluster-2",
"alias": "k8s_testing_2",
"connected": true,
"connection_last_checked_at": "2024-10-18T11:16:23.503Z",
"metadata": {},
"scanner_args": {}
}
},
{
"model": "api.providersecret",
"pk": "11491b47-75ae-4f71-ad8d-3e630a72182e",

View File

@@ -11,7 +11,9 @@
"unique_resource_count": 1,
"duration": 5,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"inserted_at": "2024-09-01T17:25:27.050Z",
"started_at": "2024-09-01T17:25:27.050Z",
@@ -31,7 +33,9 @@
"unique_resource_count": 1,
"duration": 20,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"inserted_at": "2024-09-02T17:24:27.050Z",
"started_at": "2024-09-02T17:24:27.050Z",
@@ -51,7 +55,9 @@
"unique_resource_count": 10,
"duration": 10,
"scanner_args": {
"checks_to_execute": ["cloudsql_instance_automated_backups"]
"checks_to_execute": [
"cloudsql_instance_automated_backups"
]
},
"inserted_at": "2024-09-02T19:26:27.050Z",
"started_at": "2024-09-02T19:26:27.050Z",
@@ -71,7 +77,9 @@
"unique_resource_count": 1,
"duration": 35,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"inserted_at": "2024-09-02T19:27:27.050Z",
"started_at": "2024-09-02T19:27:27.050Z",
@@ -89,7 +97,9 @@
"name": "test scheduled aws scan",
"state": "available",
"scanner_args": {
"checks_to_execute": ["cloudformation_stack_outputs_find_secrets"]
"checks_to_execute": [
"cloudformation_stack_outputs_find_secrets"
]
},
"scheduled_at": "2030-09-02T19:20:27.050Z",
"inserted_at": "2024-09-02T19:24:27.050Z",
@@ -168,7 +178,9 @@
"unique_resource_count": 19,
"progress": 100,
"scanner_args": {
"checks_to_execute": ["accessanalyzer_enabled"]
"checks_to_execute": [
"accessanalyzer_enabled"
]
},
"duration": 7,
"scheduled_at": null,
@@ -178,56 +190,6 @@
"completed_at": "2024-10-18T10:46:05.127Z"
}
},
{
"model": "api.scan",
"pk": "6dd8925f-a52d-48de-a546-d2d90db30ab1",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"name": "real scan azure",
"provider": "1b59e032-3eb6-4694-93a5-df84cd9b3ce2",
"trigger": "manual",
"state": "completed",
"unique_resource_count": 20,
"progress": 100,
"scanner_args": {
"checks_to_execute": [
"accessanalyzer_enabled",
"account_security_contact_information_is_registered"
]
},
"duration": 4,
"scheduled_at": null,
"inserted_at": "2024-10-18T11:16:21.358Z",
"updated_at": "2024-10-18T11:16:26.060Z",
"started_at": "2024-10-18T11:16:21.593Z",
"completed_at": "2024-10-18T11:16:26.060Z"
}
},
{
"model": "api.scan",
"pk": "4ca7ce89-3236-41a8-a369-8937bc152af5",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"name": "real scan k8s",
"provider": "7791914f-d646-4fe2-b2ed-73f2c6499a36",
"trigger": "manual",
"state": "completed",
"unique_resource_count": 20,
"progress": 100,
"scanner_args": {
"checks_to_execute": [
"accessanalyzer_enabled",
"account_security_contact_information_is_registered"
]
},
"duration": 4,
"scheduled_at": null,
"inserted_at": "2024-10-18T11:16:21.358Z",
"updated_at": "2024-10-18T11:16:26.060Z",
"started_at": "2024-10-18T11:16:21.593Z",
"completed_at": "2024-10-18T11:16:26.060Z"
}
},
{
"model": "api.scan",
"pk": "01929f57-c0ee-7553-be0b-cbde006fb6f7",

View File

@@ -1,237 +0,0 @@
import random
from datetime import datetime, timezone
from math import ceil
from uuid import uuid4
from django.core.management.base import BaseCommand
from tqdm import tqdm
from api.db_utils import rls_transaction
from api.models import (
Finding,
Provider,
Resource,
ResourceFindingMapping,
Scan,
StatusChoices,
)
from prowler.lib.check.models import CheckMetadata
class Command(BaseCommand):
help = "Populates the database with test data for performance testing."
def add_arguments(self, parser):
parser.add_argument(
"--tenant",
type=str,
required=True,
help="Tenant id for which the data will be populated.",
)
parser.add_argument(
"--resources",
type=int,
required=True,
help="The number of resources to create.",
)
parser.add_argument(
"--findings",
type=int,
required=True,
help="The number of findings to create.",
)
parser.add_argument(
"--batch", type=int, required=True, help="The batch size for bulk creation."
)
parser.add_argument(
"--alias",
type=str,
required=False,
help="Optional alias for the provider and scan",
)
def handle(self, *args, **options):
tenant_id = options["tenant"]
num_resources = options["resources"]
num_findings = options["findings"]
batch_size = options["batch"]
alias = options["alias"] or "Testing"
uid_token = str(uuid4())
self.stdout.write(self.style.NOTICE("Starting data population"))
self.stdout.write(self.style.NOTICE(f"\tTenant: {tenant_id}"))
self.stdout.write(self.style.NOTICE(f"\tAlias: {alias}"))
self.stdout.write(self.style.NOTICE(f"\tResources: {num_resources}"))
self.stdout.write(self.style.NOTICE(f"\tFindings: {num_findings}"))
self.stdout.write(self.style.NOTICE(f"\tBatch size: {batch_size}\n\n"))
# Resource metadata
possible_regions = [
"us-east-1",
"us-east-2",
"us-west-1",
"us-west-2",
"ca-central-1",
"eu-central-1",
"eu-west-1",
"eu-west-2",
"eu-west-3",
"ap-southeast-1",
"ap-southeast-2",
"ap-northeast-1",
"ap-northeast-2",
"ap-south-1",
"sa-east-1",
]
possible_services = []
possible_types = []
bulk_check_metadata = CheckMetadata.get_bulk(provider="aws")
for check_metadata in bulk_check_metadata.values():
if check_metadata.ServiceName not in possible_services:
possible_services.append(check_metadata.ServiceName)
if (
check_metadata.ResourceType
and check_metadata.ResourceType not in possible_types
):
possible_types.append(check_metadata.ResourceType)
with rls_transaction(tenant_id):
provider, _ = Provider.all_objects.get_or_create(
tenant_id=tenant_id,
provider="aws",
connected=True,
uid=str(random.randint(100000000000, 999999999999)),
defaults={
"alias": alias,
},
)
with rls_transaction(tenant_id):
scan = Scan.all_objects.create(
tenant_id=tenant_id,
provider=provider,
name=alias,
trigger="manual",
state="executing",
progress=0,
started_at=datetime.now(timezone.utc),
)
scan_state = "completed"
try:
# Create resources
resources = []
for i in range(num_resources):
resources.append(
Resource(
tenant_id=tenant_id,
provider_id=provider.id,
uid=f"testing-{uid_token}-{i}",
name=f"Testing {uid_token}-{i}",
region=random.choice(possible_regions),
service=random.choice(possible_services),
type=random.choice(possible_types),
)
)
num_batches = ceil(len(resources) / batch_size)
self.stdout.write(self.style.WARNING("Creating resources..."))
for i in tqdm(range(0, len(resources), batch_size), total=num_batches):
with rls_transaction(tenant_id):
Resource.all_objects.bulk_create(resources[i : i + batch_size])
self.stdout.write(self.style.SUCCESS("Resources created successfully.\n\n"))
with rls_transaction(tenant_id):
scan.progress = 33
scan.save()
# Create Findings
findings = []
possible_deltas = ["new", "changed", None]
possible_severities = ["critical", "high", "medium", "low"]
findings_resources_mapping = []
for i in range(num_findings):
severity = random.choice(possible_severities)
check_id = random.randint(1, 1000)
assigned_resource_num = random.randint(0, len(resources) - 1)
assigned_resource = resources[assigned_resource_num]
findings_resources_mapping.append(assigned_resource_num)
findings.append(
Finding(
tenant_id=tenant_id,
scan=scan,
uid=f"testing-{uid_token}-{i}",
delta=random.choice(possible_deltas),
check_id=f"check-{check_id}",
status=random.choice(list(StatusChoices)),
severity=severity,
impact=severity,
raw_result={},
check_metadata={
"checktitle": f"Test title for check {check_id}",
"risk": f"Testing risk {uid_token}-{i}",
"provider": "aws",
"severity": severity,
"categories": ["category1", "category2", "category3"],
"description": "This is a random description that should not matter for testing purposes.",
"servicename": assigned_resource.service,
"resourcetype": assigned_resource.type,
},
)
)
num_batches = ceil(len(findings) / batch_size)
self.stdout.write(self.style.WARNING("Creating findings..."))
for i in tqdm(range(0, len(findings), batch_size), total=num_batches):
with rls_transaction(tenant_id):
Finding.all_objects.bulk_create(findings[i : i + batch_size])
self.stdout.write(self.style.SUCCESS("Findings created successfully.\n\n"))
with rls_transaction(tenant_id):
scan.progress = 66
scan.save()
# Create ResourceFindingMapping
mappings = []
for index, f in enumerate(findings):
mappings.append(
ResourceFindingMapping(
tenant_id=tenant_id,
resource=resources[findings_resources_mapping[index]],
finding=f,
)
)
num_batches = ceil(len(mappings) / batch_size)
self.stdout.write(
self.style.WARNING("Creating resource-finding mappings...")
)
for i in tqdm(range(0, len(mappings), batch_size), total=num_batches):
with rls_transaction(tenant_id):
ResourceFindingMapping.objects.bulk_create(
mappings[i : i + batch_size]
)
self.stdout.write(
self.style.SUCCESS(
"Resource-finding mappings created successfully.\n\n"
)
)
except Exception as e:
self.stdout.write(self.style.ERROR(f"Failed to populate test data: {e}"))
scan_state = "failed"
finally:
scan.completed_at = datetime.now(timezone.utc)
scan.duration = int(
(datetime.now(timezone.utc) - scan.started_at).total_seconds()
)
scan.progress = 100
scan.state = scan_state
scan.unique_resource_count = num_resources
with rls_transaction(tenant_id):
scan.save()
self.stdout.write(self.style.NOTICE("Successfully populated test data."))

View File

@@ -1,109 +0,0 @@
from functools import partial
from django.db import connection, migrations
def create_index_on_partitions(
apps, schema_editor, parent_table: str, index_name: str, index_details: str
):
with connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]
# Iterate over partitions and create index concurrently.
# Note: PostgreSQL does not allow CONCURRENTLY inside a transaction,
# so we need atomic = False for this migration.
for partition in partitions:
sql = (
f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {partition.replace('.', '_')}_{index_name} ON {partition} "
f"{index_details};"
)
schema_editor.execute(sql)
def drop_index_on_partitions(apps, schema_editor, parent_table: str, index_name: str):
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]
# Iterate over partitions and drop index concurrently.
for partition in partitions:
partition_index = f"{partition.replace('.', '_')}_{index_name}"
sql = f"DROP INDEX CONCURRENTLY IF EXISTS {partition_index};"
schema_editor.execute(sql)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0009_increase_provider_uid_maximum_length"),
]
operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
index_details="(tenant_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
index_details="(tenant_id, scan_id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
index_details="(tenant_id, scan_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
index_details="(tenant_id, id) where delta = 'new'",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
),
),
]

View File

@@ -1,49 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0010_findings_performance_indexes_partitions"),
]
operations = [
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
condition=models.Q(("delta", "new")),
fields=["tenant_id", "id"],
name="find_delta_new_idx",
),
),
migrations.AddIndex(
model_name="resourcetagmapping",
index=models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
),
migrations.AddIndex(
model_name="resource",
index=models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
),
]

View File

@@ -1,15 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0011_findings_performance_indexes_parent"),
]
operations = [
migrations.AddField(
model_name="scan",
name="output_location",
field=models.CharField(blank=True, max_length=200, null=True),
),
]

View File

@@ -1,35 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-03 15:46
from functools import partial
from django.db import migrations
from api.db_utils import IntegrationTypeEnum, PostgresEnumMigration, register_enum
from api.models import Integration
IntegrationTypeEnumMigration = PostgresEnumMigration(
enum_name="integration_type",
enum_values=tuple(
integration_type[0]
for integration_type in Integration.IntegrationChoices.choices
),
)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0012_scan_report_output"),
]
operations = [
migrations.RunPython(
IntegrationTypeEnumMigration.create_enum_type,
reverse_code=IntegrationTypeEnumMigration.drop_enum_type,
),
migrations.RunPython(
partial(register_enum, enum_class=IntegrationTypeEnum),
reverse_code=migrations.RunPython.noop,
),
]

View File

@@ -1,131 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-03 15:46
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
from api.rls import RowLevelSecurityConstraint
class Migration(migrations.Migration):
dependencies = [
("api", "0013_integrations_enum"),
]
operations = [
migrations.CreateModel(
name="Integration",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
("enabled", models.BooleanField(default=False)),
("connected", models.BooleanField(blank=True, null=True)),
(
"connection_last_checked_at",
models.DateTimeField(blank=True, null=True),
),
(
"integration_type",
api.db_utils.IntegrationTypeEnumField(
choices=[
("amazon_s3", "Amazon S3"),
("saml", "SAML"),
("aws_security_hub", "AWS Security Hub"),
("jira", "JIRA"),
("slack", "Slack"),
]
),
),
("configuration", models.JSONField(default=dict)),
("_credentials", models.BinaryField(db_column="credentials")),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={"db_table": "integrations", "abstract": False},
),
migrations.AddConstraint(
model_name="integration",
constraint=RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_integration",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
migrations.CreateModel(
name="IntegrationProviderRelationship",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
(
"integration",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="api.integration",
),
),
(
"provider",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.provider"
),
),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "integration_provider_mappings",
"constraints": [
models.UniqueConstraint(
fields=("integration_id", "provider_id"),
name="unique_integration_provider_rel",
),
],
},
),
migrations.AddConstraint(
model_name="IntegrationProviderRelationship",
constraint=RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_integrationproviderrelationship",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
migrations.AddField(
model_name="integration",
name="providers",
field=models.ManyToManyField(
blank=True,
related_name="integrations",
through="api.IntegrationProviderRelationship",
to="api.provider",
),
),
]

View File

@@ -1,26 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-25 11:29
from django.db import migrations, models
import api.db_utils
class Migration(migrations.Migration):
dependencies = [
("api", "0014_integrations"),
]
operations = [
migrations.AddField(
model_name="finding",
name="muted",
field=models.BooleanField(default=False),
),
migrations.AlterField(
model_name="finding",
name="status",
field=api.db_utils.StatusEnumField(
choices=[("FAIL", "Fail"), ("PASS", "Pass"), ("MANUAL", "Manual")]
),
),
]

View File

@@ -1,32 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-31 10:46
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0015_finding_muted"),
]
operations = [
migrations.AddField(
model_name="finding",
name="compliance",
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AddField(
model_name="resource",
name="details",
field=models.TextField(blank=True, null=True),
),
migrations.AddField(
model_name="resource",
name="metadata",
field=models.TextField(blank=True, null=True),
),
migrations.AddField(
model_name="resource",
name="partition",
field=models.TextField(blank=True, null=True),
),
]

View File

@@ -21,7 +21,6 @@ from uuid6 import uuid7
from api.db_utils import (
CustomUserManager,
FindingDeltaEnumField,
IntegrationTypeEnumField,
InvitationStateEnumField,
MemberRoleEnumField,
ProviderEnumField,
@@ -59,6 +58,7 @@ class StatusChoices(models.TextChoices):
FAIL = "FAIL", _("Fail")
PASS = "PASS", _("Pass")
MANUAL = "MANUAL", _("Manual")
MUTED = "MUTED", _("Muted")
class StateChoices(models.TextChoices):
@@ -414,7 +414,6 @@ class Scan(RowLevelSecurityProtectedModel):
scheduler_task = models.ForeignKey(
PeriodicTask, on_delete=models.CASCADE, null=True, blank=True
)
output_location = models.CharField(blank=True, null=True, max_length=200)
# TODO: mutelist foreign key
class Meta(RowLevelSecurityProtectedModel.Meta):
@@ -518,11 +517,6 @@ class Resource(RowLevelSecurityProtectedModel):
editable=False,
)
metadata = models.TextField(blank=True, null=True)
details = models.TextField(blank=True, null=True)
partition = models.TextField(blank=True, null=True)
# Relationships
tags = models.ManyToManyField(
ResourceTag,
verbose_name="Tags associated with the resource, by provider",
@@ -558,10 +552,6 @@ class Resource(RowLevelSecurityProtectedModel):
fields=["uid", "region", "service", "name"],
name="resource_uid_reg_serv_name_idx",
),
models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
]
@@ -609,12 +599,6 @@ class ResourceTagMapping(RowLevelSecurityProtectedModel):
),
]
indexes = [
models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
]
class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
"""
@@ -660,8 +644,6 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
tags = models.JSONField(default=dict, null=True, blank=True)
check_id = models.CharField(max_length=100, blank=False, null=False)
check_metadata = models.JSONField(default=dict, null=False)
muted = models.BooleanField(default=False, null=False)
compliance = models.JSONField(default=dict, null=True, blank=True)
# Relationships
scan = models.ForeignKey(to=Scan, related_name="findings", on_delete=models.CASCADE)
@@ -715,17 +697,7 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
],
name="findings_filter_idx",
),
models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
models.Index(
fields=["tenant_id", "id"],
condition=Q(delta="new"),
name="find_delta_new_idx",
),
]
class JSONAPIMeta:
@@ -1145,80 +1117,3 @@ class ScanSummary(RowLevelSecurityProtectedModel):
class JSONAPIMeta:
resource_name = "scan-summaries"
class Integration(RowLevelSecurityProtectedModel):
class IntegrationChoices(models.TextChoices):
S3 = "amazon_s3", _("Amazon S3")
SAML = "saml", _("SAML")
AWS_SECURITY_HUB = "aws_security_hub", _("AWS Security Hub")
JIRA = "jira", _("JIRA")
SLACK = "slack", _("Slack")
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
enabled = models.BooleanField(default=False)
connected = models.BooleanField(null=True, blank=True)
connection_last_checked_at = models.DateTimeField(null=True, blank=True)
integration_type = IntegrationTypeEnumField(choices=IntegrationChoices.choices)
configuration = models.JSONField(default=dict)
_credentials = models.BinaryField(db_column="credentials")
providers = models.ManyToManyField(
Provider,
related_name="integrations",
through="IntegrationProviderRelationship",
blank=True,
)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "integrations"
constraints = [
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
class JSONAPIMeta:
resource_name = "integrations"
@property
def credentials(self):
if isinstance(self._credentials, memoryview):
encrypted_bytes = self._credentials.tobytes()
elif isinstance(self._credentials, str):
encrypted_bytes = self._credentials.encode()
else:
encrypted_bytes = self._credentials
decrypted_data = fernet.decrypt(encrypted_bytes)
return json.loads(decrypted_data.decode())
@credentials.setter
def credentials(self, value):
encrypted_data = fernet.encrypt(json.dumps(value).encode())
self._credentials = encrypted_data
class IntegrationProviderRelationship(RowLevelSecurityProtectedModel):
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
integration = models.ForeignKey(Integration, on_delete=models.CASCADE)
provider = models.ForeignKey(Provider, on_delete=models.CASCADE)
inserted_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "integration_provider_mappings"
constraints = [
models.UniqueConstraint(
fields=["integration_id", "provider_id"],
name="unique_integration_provider_rel",
),
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]

View File

@@ -2,7 +2,8 @@ from typing import Any
from uuid import uuid4
from django.core.exceptions import ValidationError
from django.db import DEFAULT_DB_ALIAS, models
from django.db import DEFAULT_DB_ALIAS
from django.db import models
from django.db.backends.ddl_references import Statement, Table
from api.db_utils import DB_USER, POSTGRES_TENANT_VAR
@@ -58,11 +59,11 @@ class RowLevelSecurityConstraint(models.BaseConstraint):
drop_sql_query = """
ALTER TABLE %(table_name)s NO FORCE ROW LEVEL SECURITY;
ALTER TABLE %(table_name)s DISABLE ROW LEVEL SECURITY;
REVOKE ALL ON TABLE %(table_name)s FROM %(db_user)s;
REVOKE ALL ON TABLE %(table_name) TO %(db_user)s;
"""
drop_policy_sql_query = """
DROP POLICY IF EXISTS %(db_user)s_%(raw_table_name)s_{statement} ON %(table_name)s;
DROP POLICY IF EXISTS %(db_user)s_%(table_name)s_{statement} on %(table_name)s;
"""
def __init__(
@@ -87,7 +88,9 @@ class RowLevelSecurityConstraint(models.BaseConstraint):
f"{grant_queries}{self.grant_sql_query.format(statement=statement)}"
)
full_create_sql_query = f"{self.rls_sql_query}{policy_queries}{grant_queries}"
full_create_sql_query = (
f"{self.rls_sql_query}" f"{policy_queries}" f"{grant_queries}"
)
table_name = model._meta.db_table
if self.partition_name:
@@ -104,20 +107,16 @@ class RowLevelSecurityConstraint(models.BaseConstraint):
def remove_sql(self, model: Any, schema_editor: Any) -> Any:
field_column = schema_editor.quote_name(self.target_field)
raw_table_name = model._meta.db_table
table_name = raw_table_name
if self.partition_name:
raw_table_name = f"{raw_table_name}_{self.partition_name}"
table_name = raw_table_name
full_drop_sql_query = (
f"{self.drop_sql_query}"
f"{''.join([self.drop_policy_sql_query.format(statement=statement) for statement in self.statements])}"
f"{''.join([self.drop_policy_sql_query.format(statement) for statement in self.statements])}"
)
table_name = model._meta.db_table
if self.partition_name:
table_name = f"{table_name}_{self.partition_name}"
return Statement(
full_drop_sql_query,
table_name=Table(table_name, schema_editor.quote_name),
raw_table_name=raw_table_name,
field_column=field_column,
db_user=DB_USER,
partition_name=self.partition_name,

View File

@@ -1,12 +1,12 @@
from celery import states
from celery.signals import before_task_publish
from config.celery import celery_app
from django.db.models.signals import post_delete
from django.dispatch import receiver
from django_celery_beat.models import PeriodicTask
from django_celery_results.backends.database import DatabaseBackend
from api.db_utils import delete_related_daily_task
from api.models import Provider
from config.celery import celery_app
def create_task_result_on_publish(sender=None, headers=None, **kwargs): # noqa: F841
@@ -31,4 +31,5 @@ before_task_publish.connect(
@receiver(post_delete, sender=Provider)
def delete_provider_scan_task(sender, instance, **kwargs): # noqa: F841
# Delete the associated periodic task when the provider is deleted
delete_related_daily_task(instance.id)
task_name = f"scan-perform-scheduled-{instance.id}"
PeriodicTask.objects.filter(name=task_name).delete()

File diff suppressed because it is too large Load Diff

View File

@@ -3,8 +3,6 @@ from conftest import TEST_PASSWORD, get_api_tokens, get_authorization_header
from django.urls import reverse
from rest_framework.test import APIClient
from api.models import Membership, User
@pytest.mark.django_db
def test_basic_authentication():
@@ -179,122 +177,3 @@ def test_user_me_when_inviting_users(create_test_user, tenants_fixture, roles_fi
user2_me = client.get(reverse("user-me"), headers=user2_headers)
assert user2_me.status_code == 200
assert user2_me.json()["data"]["attributes"]["email"] == user2_email
@pytest.mark.django_db
class TestTokenSwitchTenant:
def test_switch_tenant_with_valid_token(self, tenants_fixture, providers_fixture):
client = APIClient()
test_user = "test_email@prowler.com"
test_password = "test_password"
# Check that we can create a new user without any kind of authentication
user_creation_response = client.post(
reverse("user-list"),
data={
"data": {
"type": "users",
"attributes": {
"name": "test",
"email": test_user,
"password": test_password,
},
}
},
format="vnd.api+json",
)
assert user_creation_response.status_code == 201
# Create a new relationship between this user and another tenant
tenant_id = tenants_fixture[0].id
user_instance = User.objects.get(email=test_user)
Membership.objects.create(user=user_instance, tenant_id=tenant_id)
# Check that using our new user's credentials we can authenticate and get the providers
access_token, _ = get_api_tokens(client, test_user, test_password)
auth_headers = get_authorization_header(access_token)
user_me_response = client.get(
reverse("user-me"),
headers=auth_headers,
)
assert user_me_response.status_code == 200
# Assert this user belongs to two tenants
assert (
user_me_response.json()["data"]["relationships"]["memberships"]["meta"][
"count"
]
== 2
)
provider_response = client.get(
reverse("provider-list"),
headers=auth_headers,
)
assert provider_response.status_code == 200
# Empty response since there are no providers in this tenant
assert not provider_response.json()["data"]
switch_tenant_response = client.post(
reverse("token-switch"),
data={
"data": {
"type": "tokens-switch-tenant",
"attributes": {"tenant_id": tenant_id},
}
},
headers=auth_headers,
)
assert switch_tenant_response.status_code == 200
new_access_token = switch_tenant_response.json()["data"]["attributes"]["access"]
new_auth_headers = get_authorization_header(new_access_token)
provider_response = client.get(
reverse("provider-list"),
headers=new_auth_headers,
)
assert provider_response.status_code == 200
# Now it must be data because we switched to another tenant with providers
assert provider_response.json()["data"]
def test_switch_tenant_with_invalid_token(self, create_test_user, tenants_fixture):
client = APIClient()
access_token, refresh_token = get_api_tokens(
client, create_test_user.email, TEST_PASSWORD
)
auth_headers = get_authorization_header(access_token)
invalid_token_response = client.post(
reverse("token-switch"),
data={
"data": {
"type": "tokens-switch-tenant",
"attributes": {"tenant_id": "invalid_tenant_id"},
}
},
headers=auth_headers,
)
assert invalid_token_response.status_code == 400
assert invalid_token_response.json()["errors"][0]["code"] == "invalid"
assert (
invalid_token_response.json()["errors"][0]["detail"]
== "Must be a valid UUID."
)
invalid_tenant_response = client.post(
reverse("token-switch"),
data={
"data": {
"type": "tokens-switch-tenant",
"attributes": {"tenant_id": tenants_fixture[-1].id},
}
},
headers=auth_headers,
)
assert invalid_tenant_response.status_code == 400
assert invalid_tenant_response.json()["errors"][0]["code"] == "invalid"
assert invalid_tenant_response.json()["errors"][0]["detail"] == (
"Tenant does not exist or user is not a " "member."
)

View File

@@ -131,10 +131,9 @@ class TestBatchDelete:
return provider_count
@pytest.mark.django_db
def test_batch_delete(self, tenants_fixture, create_test_providers):
tenant_id = str(tenants_fixture[0].id)
def test_batch_delete(self, create_test_providers):
_, summary = batch_delete(
tenant_id, Provider.objects.all(), batch_size=create_test_providers // 2
Provider.objects.all(), batch_size=create_test_providers // 2
)
assert Provider.objects.all().count() == 0
assert summary == {"api.Provider": create_test_providers}

View File

@@ -1,19 +1,7 @@
from unittest.mock import ANY, Mock, patch
import pytest
from django.urls import reverse
from rest_framework import status
from api.models import (
Membership,
ProviderGroup,
ProviderGroupMembership,
Role,
RoleProviderGroupRelationship,
User,
UserRoleRelationship,
)
from api.v1.serializers import TokenSerializer
from unittest.mock import patch, ANY, Mock
@pytest.mark.django_db
@@ -316,96 +304,3 @@ class TestProviderViewSet:
reverse("provider-connection", kwargs={"pk": provider.id})
)
assert response.status_code == status.HTTP_403_FORBIDDEN
@pytest.mark.django_db
class TestLimitedVisibility:
TEST_EMAIL = "rbac@rbac.com"
TEST_PASSWORD = "thisisapassword123"
@pytest.fixture
def limited_admin_user(
self, django_db_setup, django_db_blocker, tenants_fixture, providers_fixture
):
with django_db_blocker.unblock():
tenant = tenants_fixture[0]
provider = providers_fixture[0]
user = User.objects.create_user(
name="testing",
email=self.TEST_EMAIL,
password=self.TEST_PASSWORD,
)
Membership.objects.create(
user=user,
tenant=tenant,
role=Membership.RoleChoices.OWNER,
)
role = Role.objects.create(
name="limited_visibility",
tenant=tenant,
manage_users=True,
manage_account=True,
manage_billing=True,
manage_providers=True,
manage_integrations=True,
manage_scans=True,
unlimited_visibility=False,
)
UserRoleRelationship.objects.create(
user=user,
role=role,
tenant=tenant,
)
provider_group = ProviderGroup.objects.create(
name="limited_visibility_group",
tenant=tenant,
)
ProviderGroupMembership.objects.create(
tenant=tenant,
provider=provider,
provider_group=provider_group,
)
RoleProviderGroupRelationship.objects.create(
tenant=tenant, role=role, provider_group=provider_group
)
return user
@pytest.fixture
def authenticated_client_rbac_limited(
self, limited_admin_user, tenants_fixture, client
):
client.user = limited_admin_user
tenant_id = tenants_fixture[0].id
serializer = TokenSerializer(
data={
"type": "tokens",
"email": self.TEST_EMAIL,
"password": self.TEST_PASSWORD,
"tenant_id": tenant_id,
}
)
serializer.is_valid(raise_exception=True)
access_token = serializer.validated_data["access"]
client.defaults["HTTP_AUTHORIZATION"] = f"Bearer {access_token}"
return client
def test_integrations(
self, authenticated_client_rbac_limited, integrations_fixture, providers_fixture
):
# Integration 2 is related to provider1 and provider 2
# This user cannot see provider 2
integration = integrations_fixture[1]
response = authenticated_client_rbac_limited.get(
reverse("integration-detail", kwargs={"pk": integration.id})
)
assert response.status_code == status.HTTP_200_OK
assert integration.providers.count() == 2
assert (
response.json()["data"]["relationships"]["providers"]["meta"]["count"] == 1
)

View File

@@ -1,24 +1,25 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
from unittest.mock import patch, MagicMock
import pytest
from rest_framework.exceptions import NotFound, ValidationError
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Invitation, Provider
from api.utils import (
get_prowler_provider_kwargs,
initialize_prowler_provider,
merge_dicts,
prowler_provider_connection_test,
return_prowler_provider,
validate_invitation,
)
from prowler.providers.aws.aws_provider import AwsProvider
from prowler.providers.azure.azure_provider import AzureProvider
from prowler.providers.gcp.gcp_provider import GcpProvider
from prowler.providers.kubernetes.kubernetes_provider import KubernetesProvider
from rest_framework.exceptions import ValidationError, NotFound
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Invitation
from api.models import Provider
from api.utils import (
merge_dicts,
return_prowler_provider,
initialize_prowler_provider,
prowler_provider_connection_test,
get_prowler_provider_kwargs,
)
from api.utils import validate_invitation
class TestMergeDicts:
@@ -143,18 +144,6 @@ class TestProwlerProviderConnectionTest:
key="value", provider_id="1234567890", raise_on_exception=False
)
@pytest.mark.django_db
@patch("api.utils.return_prowler_provider")
def test_prowler_provider_connection_test_without_secret(
self, mock_return_prowler_provider, providers_fixture
):
mock_return_prowler_provider.return_value = MagicMock()
connection = prowler_provider_connection_test(providers_fixture[0])
assert connection.is_connected is False
assert isinstance(connection.error, Provider.secret.RelatedObjectDoesNotExist)
assert str(connection.error) == "Provider has no secret."
class TestGetProwlerProviderKwargs:
@pytest.mark.parametrize(
@@ -285,10 +274,9 @@ class TestValidateInvitation:
expired_time = datetime.now(timezone.utc) - timedelta(days=1)
invitation.expires_at = expired_time
with (
patch("api.utils.Invitation.objects.using") as mock_using,
patch("api.utils.datetime") as mock_datetime,
):
with patch("api.utils.Invitation.objects.using") as mock_using, patch(
"api.utils.datetime"
) as mock_datetime:
mock_db = mock_using.return_value
mock_db.get.return_value = invitation
mock_datetime.now.return_value = datetime.now(timezone.utc)

View File

@@ -1,21 +1,15 @@
import glob
import io
import json
import os
from datetime import datetime, timedelta, timezone
from unittest.mock import ANY, Mock, patch
import jwt
import pytest
from botocore.exceptions import NoCredentialsError
from conftest import API_JSON_CONTENT_TYPE, TEST_PASSWORD, TEST_USER
from django.conf import settings
from django.urls import reverse
from rest_framework import status
from api.models import (
ComplianceOverview,
Integration,
Invitation,
Membership,
Provider,
@@ -26,7 +20,6 @@ from api.models import (
RoleProviderGroupRelationship,
Scan,
StateChoices,
Task,
User,
UserRoleRelationship,
)
@@ -41,14 +34,6 @@ def today_after_n_days(n_days: int) -> str:
)
class TestViewSet:
def test_security_headers(self, client):
response = client.get("/")
assert response.headers["X-Content-Type-Options"] == "nosniff"
assert response.headers["X-Frame-Options"] == "DENY"
assert response.headers["Referrer-Policy"] == "strict-origin-when-cross-origin"
@pytest.mark.django_db
class TestUserViewSet:
def test_users_list(self, authenticated_client, create_test_user):
@@ -2094,9 +2079,9 @@ class TestScanViewSet:
("started_at.gte", "2024-01-01", 3),
("started_at.lte", "2024-01-01", 0),
("trigger", Scan.TriggerChoices.MANUAL, 1),
("state", StateChoices.AVAILABLE, 1),
("state", StateChoices.AVAILABLE, 2),
("state", StateChoices.FAILED, 1),
("state.in", f"{StateChoices.FAILED},{StateChoices.AVAILABLE}", 2),
("state.in", f"{StateChoices.FAILED},{StateChoices.AVAILABLE}", 3),
("trigger", Scan.TriggerChoices.MANUAL, 1),
]
),
@@ -2171,181 +2156,6 @@ class TestScanViewSet:
response = authenticated_client.get(reverse("scan-list"), {"sort": "invalid"})
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_report_executing(self, authenticated_client, scans_fixture):
"""
When the scan is still executing (state == EXECUTING), the view should return
the task data with HTTP 202 and a Content-Location header.
"""
scan = scans_fixture[0]
scan.state = StateChoices.EXECUTING
scan.save()
task = Task.objects.create(tenant_id=scan.tenant_id)
dummy_task_data = {"id": str(task.id), "state": StateChoices.EXECUTING}
scan.task = task
scan.save()
with patch(
"api.v1.views.TaskSerializer",
return_value=type("DummySerializer", (), {"data": dummy_task_data}),
):
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_202_ACCEPTED
assert "Content-Location" in response
assert dummy_task_data["id"] in response["Content-Location"]
def test_report_celery_task_executing(self, authenticated_client, scans_fixture):
"""
When the scan is not executing but a related celery task exists and is running,
the view should return that task data with HTTP 202.
"""
scan = scans_fixture[0]
scan.state = StateChoices.COMPLETED
scan.output_location = "dummy"
scan.save()
dummy_task = Task.objects.create(tenant_id=scan.tenant_id)
dummy_task.id = "dummy-task-id"
dummy_task_data = {"id": dummy_task.id, "state": StateChoices.EXECUTING}
with (
patch("api.v1.views.Task.objects.get", return_value=dummy_task),
patch(
"api.v1.views.TaskSerializer",
return_value=type("DummySerializer", (), {"data": dummy_task_data}),
),
):
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_202_ACCEPTED
assert "Content-Location" in response
assert dummy_task_data["id"] in response["Content-Location"]
def test_report_no_output_location(self, authenticated_client, scans_fixture):
"""
If the scan does not have an output_location, the view should return a 404.
"""
scan = scans_fixture[0]
scan.state = StateChoices.COMPLETED
scan.output_location = ""
scan.save()
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_404_NOT_FOUND
assert response.json()["errors"]["detail"] == "The scan has no reports."
def test_report_s3_no_credentials(
self, authenticated_client, scans_fixture, monkeypatch
):
"""
When output_location is an S3 URL and get_s3_client() raises a credentials exception,
the view should return HTTP 403 with the proper error message.
"""
scan = scans_fixture[0]
bucket = "test-bucket"
key = "report.zip"
scan.output_location = f"s3://{bucket}/{key}"
scan.state = StateChoices.COMPLETED
scan.save()
def fake_get_s3_client():
raise NoCredentialsError()
monkeypatch.setattr("api.v1.views.get_s3_client", fake_get_s3_client)
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_403_FORBIDDEN
assert (
response.json()["errors"]["detail"]
== "There is a problem with credentials."
)
def test_report_s3_success(self, authenticated_client, scans_fixture, monkeypatch):
"""
When output_location is an S3 URL and the S3 client returns the file successfully,
the view should return the ZIP file with HTTP 200 and proper headers.
"""
scan = scans_fixture[0]
bucket = "test-bucket"
key = "report.zip"
scan.output_location = f"s3://{bucket}/{key}"
scan.state = StateChoices.COMPLETED
scan.save()
monkeypatch.setattr(
"api.v1.views.env", type("env", (), {"str": lambda self, key: bucket})()
)
class FakeS3Client:
def get_object(self, Bucket, Key):
assert Bucket == bucket
assert Key == key
return {"Body": io.BytesIO(b"s3 zip content")}
monkeypatch.setattr("api.v1.views.get_s3_client", lambda: FakeS3Client())
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == 200
expected_filename = os.path.basename("report.zip")
content_disposition = response.get("Content-Disposition")
assert content_disposition.startswith('attachment; filename="')
assert f'filename="{expected_filename}"' in content_disposition
assert response.content == b"s3 zip content"
def test_report_s3_success_no_local_files(
self, authenticated_client, scans_fixture, monkeypatch
):
"""
When output_location is a local path and glob.glob returns an empty list,
the view should return HTTP 404 with detail "The scan has no reports."
"""
scan = scans_fixture[0]
scan.output_location = "/tmp/nonexistent_report_pattern.zip"
scan.state = StateChoices.COMPLETED
scan.save()
monkeypatch.setattr("api.v1.views.glob.glob", lambda pattern: [])
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == 404
assert response.json()["errors"]["detail"] == "The scan has no reports."
def test_report_local_file(
self, authenticated_client, scans_fixture, tmp_path, monkeypatch
):
"""
When output_location is a local file path, the view should read the file from disk
and return it with proper headers.
"""
scan = scans_fixture[0]
file_content = b"local zip file content"
file_path = tmp_path / "report.zip"
file_path.write_bytes(file_content)
scan.output_location = str(file_path)
scan.state = StateChoices.COMPLETED
scan.save()
monkeypatch.setattr(
glob,
"glob",
lambda pattern: [str(file_path)] if pattern == str(file_path) else [],
)
url = reverse("scan-report", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == 200
assert response.content == file_content
content_disposition = response.get("Content-Disposition")
assert content_disposition.startswith('attachment; filename="')
assert f'filename="{file_path.name}"' in content_disposition
@pytest.mark.django_db
class TestTaskViewSet:
@@ -2625,7 +2435,7 @@ class TestFindingViewSet:
[
("resources", ["resources"]),
("scan", ["scans"]),
("resources,scan.provider", ["resources", "scans", "providers"]),
("resources.provider,scan", ["resources", "scans", "providers"]),
],
)
def test_findings_list_include(
@@ -2681,8 +2491,8 @@ class TestFindingViewSet:
("search", "orange juice", 1),
# full text search on resource
("search", "ec2", 2),
# full text search on finding tags (disabled for now)
# ("search", "value2", 2),
# full text search on finding tags
("search", "value2", 2),
# Temporary disabled until we implement tag filtering in the UI
# ("resource_tag_key", "key", 2),
# ("resource_tag_key__in", "key,key2", 2),
@@ -2693,8 +2503,6 @@ class TestFindingViewSet:
# ("resource_tags", "key:value", 2),
# ("resource_tags", "not:exists", 0),
# ("resource_tags", "not:exists,key:value", 2),
("muted", True, 1),
("muted", False, 1),
]
),
)
@@ -4511,33 +4319,6 @@ class TestComplianceOverviewViewSet:
assert len(response.json()["data"]) == 1
assert response.json()["data"][0]["id"] == str(compliance_overview1.id)
def test_compliance_overview_metadata(
self, authenticated_client, compliance_overviews_fixture
):
response = authenticated_client.get(
reverse("complianceoverview-metadata"),
{"filter[scan_id]": str(compliance_overviews_fixture[0].scan_id)},
)
data = response.json()
expected_regions = set(
ComplianceOverview.objects.all()
.values_list("region", flat=True)
.distinct("region")
)
assert response.status_code == status.HTTP_200_OK
assert data["data"]["type"] == "compliance-overviews-metadata"
assert data["data"]["id"] is None
assert set(data["data"]["attributes"]["regions"]) == expected_regions
def test_compliance_overview_metadata_missing_scan_id(self, authenticated_client):
# Attempt to list compliance overviews without providing filter[scan_id]
response = authenticated_client.get(reverse("complianceoverview-metadata"))
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json()["errors"][0]["source"]["pointer"] == "filter[scan_id]"
assert response.json()["errors"][0]["code"] == "required"
@pytest.mark.django_db
class TestOverviewViewSet:
@@ -4629,415 +4410,3 @@ class TestScheduleViewSet:
reverse("schedule-daily"), data=json_payload, format="json"
)
assert response.status_code == status.HTTP_404_NOT_FOUND
@pytest.mark.django_db
class TestIntegrationViewSet:
def test_integrations_list(self, authenticated_client, integrations_fixture):
response = authenticated_client.get(reverse("integration-list"))
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == len(integrations_fixture)
def test_integrations_retrieve(self, authenticated_client, integrations_fixture):
integration1, *_ = integrations_fixture
response = authenticated_client.get(
reverse("integration-detail", kwargs={"pk": integration1.id}),
)
assert response.status_code == status.HTTP_200_OK
assert response.json()["data"]["id"] == str(integration1.id)
assert (
response.json()["data"]["attributes"]["configuration"]
== integration1.configuration
)
def test_integrations_invalid_retrieve(self, authenticated_client):
response = authenticated_client.get(
reverse(
"integration-detail",
kwargs={"pk": "f498b103-c760-4785-9a3e-e23fafbb7b02"},
)
)
assert response.status_code == status.HTTP_404_NOT_FOUND
@pytest.mark.parametrize(
"include_values, expected_resources",
[
("providers", ["providers"]),
],
)
def test_integrations_list_include(
self,
include_values,
expected_resources,
authenticated_client,
integrations_fixture,
):
response = authenticated_client.get(
reverse("integration-list"), {"include": include_values}
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == len(integrations_fixture)
assert "included" in response.json()
included_data = response.json()["included"]
for expected_type in expected_resources:
assert any(
d.get("type") == expected_type for d in included_data
), f"Expected type '{expected_type}' not found in included data"
@pytest.mark.parametrize(
"integration_type, configuration, credentials",
[
# Amazon S3 - AWS credentials
(
Integration.IntegrationChoices.S3,
{
"bucket_name": "bucket-name",
"output_directory": "output-directory",
},
{
"role_arn": "arn:aws",
"external_id": "external-id",
},
),
# Amazon S3 - No credentials (AWS self-hosted)
(
Integration.IntegrationChoices.S3,
{
"bucket_name": "bucket-name",
"output_directory": "output-directory",
},
{},
),
],
)
def test_integrations_create_valid(
self,
authenticated_client,
providers_fixture,
integration_type,
configuration,
credentials,
):
provider = Provider.objects.first()
data = {
"data": {
"type": "integrations",
"attributes": {
"integration_type": integration_type,
"configuration": configuration,
"credentials": credentials,
},
"relationships": {
"providers": {
"data": [{"type": "providers", "id": str(provider.id)}]
}
},
}
}
response = authenticated_client.post(
reverse("integration-list"),
data=json.dumps(data),
content_type="application/vnd.api+json",
)
assert response.status_code == status.HTTP_201_CREATED
assert Integration.objects.count() == 1
integration = Integration.objects.first()
assert integration.configuration == data["data"]["attributes"]["configuration"]
assert (
integration.integration_type
== data["data"]["attributes"]["integration_type"]
)
assert "credentials" not in response.json()["data"]["attributes"]
assert (
str(provider.id)
== data["data"]["relationships"]["providers"]["data"][0]["id"]
)
def test_integrations_create_valid_relationships(
self,
authenticated_client,
providers_fixture,
):
provider1, provider2, *_ = providers_fixture
data = {
"data": {
"type": "integrations",
"attributes": {
"integration_type": Integration.IntegrationChoices.S3,
"configuration": {
"bucket_name": "bucket-name",
"output_directory": "output-directory",
},
"credentials": {
"role_arn": "arn:aws",
"external_id": "external-id",
},
},
"relationships": {
"providers": {
"data": [
{"type": "providers", "id": str(provider1.id)},
{"type": "providers", "id": str(provider2.id)},
]
}
},
}
}
response = authenticated_client.post(
reverse("integration-list"),
data=json.dumps(data),
content_type="application/vnd.api+json",
)
assert response.status_code == status.HTTP_201_CREATED
assert Integration.objects.first().providers.count() == 2
@pytest.mark.parametrize(
"attributes, error_code, error_pointer",
(
[
(
{
"integration_type": "whatever",
"configuration": {
"bucket_name": "bucket-name",
"output_directory": "output-directory",
},
"credentials": {
"role_arn": "arn:aws",
"external_id": "external-id",
},
},
"invalid_choice",
"integration_type",
),
(
{
"integration_type": "amazon_s3",
"configuration": {},
"credentials": {
"role_arn": "arn:aws",
"external_id": "external-id",
},
},
"required",
"bucket_name",
),
(
{
"integration_type": "amazon_s3",
"configuration": {
"bucket_name": "bucket_name",
"output_directory": "output_directory",
"invalid_key": "invalid_value",
},
"credentials": {
"role_arn": "arn:aws",
"external_id": "external-id",
},
},
"invalid",
None,
),
(
{
"integration_type": "amazon_s3",
"configuration": {
"bucket_name": "bucket_name",
"output_directory": "output_directory",
},
"credentials": {"invalid_key": "invalid_key"},
},
"invalid",
None,
),
]
),
)
def test_integrations_invalid_create(
self,
authenticated_client,
attributes,
error_code,
error_pointer,
):
data = {
"data": {
"type": "integrations",
"attributes": attributes,
}
}
response = authenticated_client.post(
reverse("integration-list"),
data=json.dumps(data),
content_type="application/vnd.api+json",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json()["errors"][0]["code"] == error_code
assert (
response.json()["errors"][0]["source"]["pointer"]
== f"/data/attributes/{error_pointer}"
if error_pointer
else "/data"
)
def test_integrations_partial_update(
self, authenticated_client, integrations_fixture
):
integration, *_ = integrations_fixture
data = {
"data": {
"type": "integrations",
"id": str(integration.id),
"attributes": {
"credentials": {
"aws_access_key_id": "new_value",
},
# integration_type is `amazon_s3`
"configuration": {
"bucket_name": "new_bucket_name",
"output_directory": "new_output_directory",
},
},
}
}
response = authenticated_client.patch(
reverse("integration-detail", kwargs={"pk": integration.id}),
data=json.dumps(data),
content_type="application/vnd.api+json",
)
assert response.status_code == status.HTTP_200_OK
integration.refresh_from_db()
assert integration.credentials["aws_access_key_id"] == "new_value"
assert integration.configuration["bucket_name"] == "new_bucket_name"
assert integration.configuration["output_directory"] == "new_output_directory"
def test_integrations_partial_update_relationships(
self, authenticated_client, integrations_fixture
):
integration, *_ = integrations_fixture
data = {
"data": {
"type": "integrations",
"id": str(integration.id),
"attributes": {
"credentials": {
"aws_access_key_id": "new_value",
},
# integration_type is `amazon_s3`
"configuration": {
"bucket_name": "new_bucket_name",
"output_directory": "new_output_directory",
},
},
"relationships": {"providers": {"data": []}},
}
}
assert integration.providers.count() > 0
response = authenticated_client.patch(
reverse("integration-detail", kwargs={"pk": integration.id}),
data=json.dumps(data),
content_type="application/vnd.api+json",
)
assert response.status_code == status.HTTP_200_OK
integration.refresh_from_db()
assert integration.providers.count() == 0
def test_integrations_partial_update_invalid_content_type(
self, authenticated_client, integrations_fixture
):
integration, *_ = integrations_fixture
response = authenticated_client.patch(
reverse("integration-detail", kwargs={"pk": integration.id}),
data={},
)
assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE
def test_integrations_partial_update_invalid_content(
self, authenticated_client, integrations_fixture
):
integration, *_ = integrations_fixture
data = {
"data": {
"type": "integrations",
"id": str(integration.id),
"attributes": {"invalid_config": "value"},
}
}
response = authenticated_client.patch(
reverse("integration-detail", kwargs={"pk": integration.id}),
data=json.dumps(data),
content_type="application/vnd.api+json",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_integrations_delete(
self,
authenticated_client,
integrations_fixture,
):
integration, *_ = integrations_fixture
response = authenticated_client.delete(
reverse("integration-detail", kwargs={"pk": integration.id})
)
assert response.status_code == status.HTTP_204_NO_CONTENT
def test_integrations_delete_invalid(self, authenticated_client):
response = authenticated_client.delete(
reverse(
"integration-detail",
kwargs={"pk": "e67d0283-440f-48d1-b5f8-38d0763474f4"},
)
)
assert response.status_code == status.HTTP_404_NOT_FOUND
@pytest.mark.parametrize(
"filter_name, filter_value, expected_count",
(
[
("inserted_at", TODAY, 2),
("inserted_at.gte", "2024-01-01", 2),
("inserted_at.lte", "2024-01-01", 0),
("integration_type", Integration.IntegrationChoices.S3, 2),
("integration_type", Integration.IntegrationChoices.SLACK, 0),
(
"integration_type__in",
f"{Integration.IntegrationChoices.S3},{Integration.IntegrationChoices.SLACK}",
2,
),
]
),
)
def test_integrations_filters(
self,
authenticated_client,
integrations_fixture,
filter_name,
filter_value,
expected_count,
):
response = authenticated_client.get(
reverse("integration-list"),
{f"filter[{filter_name}]": filter_value},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == expected_count
@pytest.mark.parametrize(
"filter_name",
(
[
"invalid",
]
),
)
def test_integrations_filters_invalid(self, authenticated_client, filter_name):
response = authenticated_client.get(
reverse("integration-list"),
{f"filter[{filter_name}]": "whatever"},
)
assert response.status_code == status.HTTP_400_BAD_REQUEST

View File

@@ -1,25 +1,15 @@
from datetime import datetime, timezone
from allauth.socialaccount.providers.oauth2.client import OAuth2Client
from rest_framework.exceptions import NotFound, ValidationError
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Invitation, Provider
from prowler.providers.aws.aws_provider import AwsProvider
from prowler.providers.azure.azure_provider import AzureProvider
from prowler.providers.common.models import Connection
from prowler.providers.gcp.gcp_provider import GcpProvider
from prowler.providers.kubernetes.kubernetes_provider import KubernetesProvider
from rest_framework.exceptions import ValidationError, NotFound
class CustomOAuth2Client(OAuth2Client):
def __init__(self, client_id, secret, *args, **kwargs):
# Remove any duplicate "scope_delimiter" from kwargs
# Bug present in dj-rest-auth after version v7.0.1
# https://github.com/iMerica/dj-rest-auth/issues/673
kwargs.pop("scope_delimiter", None)
super().__init__(client_id, secret, *args, **kwargs)
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Provider, Invitation
def merge_dicts(default_dict: dict, replacement_dict: dict) -> dict:
@@ -130,10 +120,7 @@ def prowler_provider_connection_test(provider: Provider) -> Connection:
Connection: A connection object representing the result of the connection test for the specified provider.
"""
prowler_provider = return_prowler_provider(provider)
try:
prowler_provider_kwargs = provider.secret.secret
except Provider.secret.RelatedObjectDoesNotExist as secret_error:
return Connection(is_connected=False, error=secret_error)
prowler_provider_kwargs = provider.secret.secret
return prowler_provider.test_connection(
**prowler_provider_kwargs, provider_id=provider.uid, raise_on_exception=False
)

View File

@@ -106,7 +106,7 @@ def uuid7_end(uuid_obj: UUID, offset_months: int = 1) -> UUID:
Args:
uuid_obj: A UUIDv7 object.
offset_months: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
offset_days: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
partitions are not being used, if so the value will be the one set at FINDINGS_TABLE_PARTITION_MONTHS.
Returns:

View File

@@ -1,122 +0,0 @@
from drf_spectacular.utils import extend_schema_field
from rest_framework_json_api import serializers
from rest_framework_json_api.serializers import ValidationError
class BaseValidateSerializer(serializers.Serializer):
def validate(self, data):
if hasattr(self, "initial_data"):
initial_data = set(self.initial_data.keys()) - {"id", "type"}
unknown_keys = initial_data - set(self.fields.keys())
if unknown_keys:
raise ValidationError(f"Invalid fields: {unknown_keys}")
return data
# Integrations
class S3ConfigSerializer(BaseValidateSerializer):
bucket_name = serializers.CharField()
output_directory = serializers.CharField()
class Meta:
resource_name = "integrations"
class AWSCredentialSerializer(BaseValidateSerializer):
role_arn = serializers.CharField(required=False)
external_id = serializers.CharField(required=False)
role_session_name = serializers.CharField(required=False)
session_duration = serializers.IntegerField(
required=False, min_value=900, max_value=43200
)
aws_access_key_id = serializers.CharField(required=False)
aws_secret_access_key = serializers.CharField(required=False)
aws_session_token = serializers.CharField(required=False)
class Meta:
resource_name = "integrations"
@extend_schema_field(
{
"oneOf": [
{
"type": "object",
"title": "AWS Credentials",
"properties": {
"role_arn": {
"type": "string",
"description": "The Amazon Resource Name (ARN) of the role to assume. Required for AWS role "
"assumption.",
},
"external_id": {
"type": "string",
"description": "An identifier to enhance security for role assumption.",
},
"aws_access_key_id": {
"type": "string",
"description": "The AWS access key ID. Only required if the environment lacks pre-configured "
"AWS credentials.",
},
"aws_secret_access_key": {
"type": "string",
"description": "The AWS secret access key. Required if 'aws_access_key_id' is provided or if "
"no AWS credentials are pre-configured.",
},
"aws_session_token": {
"type": "string",
"description": "The session token for temporary credentials, if applicable.",
},
"session_duration": {
"type": "integer",
"minimum": 900,
"maximum": 43200,
"default": 3600,
"description": "The duration (in seconds) for the role session.",
},
"role_session_name": {
"type": "string",
"description": "An identifier for the role session, useful for tracking sessions in AWS logs. "
"The regex used to validate this parameter is a string of characters consisting of "
"upper- and lower-case alphanumeric characters with no spaces. You can also include "
"underscores or any of the following characters: =,.@-\n\n"
"Examples:\n"
"- MySession123\n"
"- User_Session-1\n"
"- Test.Session@2",
"pattern": "^[a-zA-Z0-9=,.@_-]+$",
},
},
},
]
}
)
class IntegrationCredentialField(serializers.JSONField):
pass
@extend_schema_field(
{
"oneOf": [
{
"type": "object",
"title": "Amazon S3",
"properties": {
"bucket_name": {
"type": "string",
"description": "The name of the S3 bucket where files will be stored.",
},
"output_directory": {
"type": "string",
"description": "The directory path within the bucket where files will be saved.",
},
},
"required": ["bucket_name", "output_directory"],
},
]
}
)
class IntegrationConfigField(serializers.JSONField):
pass

View File

@@ -16,8 +16,6 @@ from rest_framework_simplejwt.tokens import RefreshToken
from api.models import (
ComplianceOverview,
Finding,
Integration,
IntegrationProviderRelationship,
Invitation,
InvitationRoleRelationship,
Membership,
@@ -36,75 +34,11 @@ from api.models import (
UserRoleRelationship,
)
from api.rls import Tenant
from api.v1.serializer_utils.integrations import (
AWSCredentialSerializer,
IntegrationConfigField,
IntegrationCredentialField,
S3ConfigSerializer,
)
# Tokens
def generate_tokens(user: User, tenant_id: str) -> dict:
try:
refresh = RefreshToken.for_user(user)
except InvalidKeyError:
# Handle invalid key error
raise ValidationError(
{
"detail": "Token generation failed due to invalid key configuration. Provide valid "
"DJANGO_TOKEN_SIGNING_KEY and DJANGO_TOKEN_VERIFYING_KEY in the environment."
}
)
except Exception as e:
raise ValidationError({"detail": str(e)})
# Post-process the tokens
# Set the tenant_id
refresh["tenant_id"] = tenant_id
# Set the nbf (not before) claim to the iat (issued at) claim. At this moment, simplejwt does not provide a
# way to set the nbf claim
refresh.payload["nbf"] = refresh["iat"]
# Get the access token
access = refresh.access_token
if settings.SIMPLE_JWT["UPDATE_LAST_LOGIN"]:
update_last_login(None, user)
return {"access": str(access), "refresh": str(refresh)}
class BaseTokenSerializer(TokenObtainPairSerializer):
def custom_validate(self, attrs, social: bool = False):
email = attrs.get("email")
password = attrs.get("password")
tenant_id = str(attrs.get("tenant_id", ""))
# Authenticate user
user = (
User.objects.get(email=email)
if social
else authenticate(username=email, password=password)
)
if user is None:
raise ValidationError("Invalid credentials")
if tenant_id:
if not user.is_member_of_tenant(tenant_id):
raise ValidationError("Tenant does not exist or user is not a member.")
else:
first_membership = user.memberships.order_by("date_joined").first()
if first_membership is None:
raise ValidationError("User has no memberships.")
tenant_id = str(first_membership.tenant_id)
return generate_tokens(user, tenant_id)
class TokenSerializer(BaseTokenSerializer):
class TokenSerializer(TokenObtainPairSerializer):
email = serializers.EmailField(write_only=True)
password = serializers.CharField(write_only=True)
tenant_id = serializers.UUIDField(
@@ -122,25 +56,53 @@ class TokenSerializer(BaseTokenSerializer):
resource_name = "tokens"
def validate(self, attrs):
return super().custom_validate(attrs)
email = attrs.get("email")
password = attrs.get("password")
tenant_id = str(attrs.get("tenant_id", ""))
# Authenticate user
user = authenticate(username=email, password=password)
if user is None:
raise ValidationError("Invalid credentials")
class TokenSocialLoginSerializer(BaseTokenSerializer):
email = serializers.EmailField(write_only=True)
if tenant_id:
if not user.is_member_of_tenant(tenant_id):
raise ValidationError("Tenant does not exist or user is not a member.")
else:
first_membership = user.memberships.order_by("date_joined").first()
if first_membership is None:
raise ValidationError("User has no memberships.")
tenant_id = str(first_membership.tenant_id)
# Output tokens
refresh = serializers.CharField(read_only=True)
access = serializers.CharField(read_only=True)
# Generate tokens
try:
refresh = RefreshToken.for_user(user)
except InvalidKeyError:
# Handle invalid key error
raise ValidationError(
{
"detail": "Token generation failed due to invalid key configuration. Provide valid "
"DJANGO_TOKEN_SIGNING_KEY and DJANGO_TOKEN_VERIFYING_KEY in the environment."
}
)
except Exception as e:
raise ValidationError({"detail": str(e)})
class JSONAPIMeta:
resource_name = "tokens"
# Post-process the tokens
# Set the tenant_id
refresh["tenant_id"] = tenant_id
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fields.pop("password", None)
# Set the nbf (not before) claim to the iat (issued at) claim. At this moment, simplejwt does not provide a
# way to set the nbf claim
refresh.payload["nbf"] = refresh["iat"]
def validate(self, attrs):
return super().custom_validate(attrs, social=True)
# Get the access token
access = refresh.access_token
if settings.SIMPLE_JWT["UPDATE_LAST_LOGIN"]:
update_last_login(None, user)
return {"access": str(access), "refresh": str(refresh)}
# TODO: Check if we can change the parent class to TokenRefreshSerializer from rest_framework_simplejwt.serializers
@@ -178,30 +140,6 @@ class TokenRefreshSerializer(serializers.Serializer):
raise ValidationError({"refresh": "Invalid or expired token"})
class TokenSwitchTenantSerializer(serializers.Serializer):
tenant_id = serializers.UUIDField(
write_only=True, help_text="The tenant ID for which to request a new token."
)
access = serializers.CharField(read_only=True)
refresh = serializers.CharField(read_only=True)
class JSONAPIMeta:
resource_name = "tokens-switch-tenant"
def validate(self, attrs):
request = self.context["request"]
user = request.user
if not user.is_authenticated:
raise ValidationError("Invalid or expired token.")
tenant_id = str(attrs.get("tenant_id"))
if not user.is_member_of_tenant(tenant_id):
raise ValidationError("Tenant does not exist or user is not a member.")
return generate_tokens(user, tenant_id)
# Base
@@ -753,43 +691,6 @@ class ProviderSerializer(RLSSerializer):
}
class ProviderIncludeSerializer(RLSSerializer):
"""
Serializer for the Provider model.
"""
provider = ProviderEnumSerializerField()
connection = serializers.SerializerMethodField(read_only=True)
class Meta:
model = Provider
fields = [
"id",
"inserted_at",
"updated_at",
"provider",
"uid",
"alias",
"connection",
# "scanner_args",
]
@extend_schema_field(
{
"type": "object",
"properties": {
"connected": {"type": "boolean"},
"last_checked_at": {"type": "string", "format": "date-time"},
},
}
)
def get_connection(self, obj):
return {
"connected": obj.connected,
"last_checked_at": obj.connection_last_checked_at,
}
class ProviderCreateSerializer(RLSSerializer, BaseWriteSerializer):
class Meta:
model = Provider
@@ -851,39 +752,6 @@ class ScanSerializer(RLSSerializer):
"url",
]
included_serializers = {
"provider": "api.v1.serializers.ProviderIncludeSerializer",
}
class ScanIncludeSerializer(RLSSerializer):
trigger = serializers.ChoiceField(
choices=Scan.TriggerChoices.choices, read_only=True
)
state = StateEnumSerializerField(read_only=True)
class Meta:
model = Scan
fields = [
"id",
"name",
"trigger",
"state",
"unique_resource_count",
"progress",
# "scanner_args",
"duration",
"inserted_at",
"started_at",
"completed_at",
"scheduled_at",
"provider",
]
included_serializers = {
"provider": "api.v1.serializers.ProviderIncludeSerializer",
}
class ScanCreateSerializer(RLSSerializer, BaseWriteSerializer):
class Meta:
@@ -951,14 +819,6 @@ class ScanTaskSerializer(RLSSerializer):
]
class ScanReportSerializer(serializers.Serializer):
id = serializers.CharField(source="scan")
class Meta:
resource_name = "scan-reports"
fields = ["id"]
class ResourceTagSerializer(RLSSerializer):
"""
Serializer for the ResourceTag model
@@ -1024,51 +884,6 @@ class ResourceSerializer(RLSSerializer):
return fields
class ResourceIncludeSerializer(RLSSerializer):
"""
Serializer for the Resource model.
"""
tags = serializers.SerializerMethodField()
type_ = serializers.CharField(read_only=True)
class Meta:
model = Resource
fields = [
"id",
"inserted_at",
"updated_at",
"uid",
"name",
"region",
"service",
"type_",
"tags",
]
extra_kwargs = {
"id": {"read_only": True},
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
}
@extend_schema_field(
{
"type": "object",
"description": "Tags associated with the resource",
"example": {"env": "prod", "owner": "johndoe"},
}
)
def get_tags(self, obj):
return obj.get_tags(self.context.get("tenant_id"))
def get_fields(self):
"""`type` is a Python reserved keyword."""
fields = super().get_fields()
type_ = fields.pop("type_")
fields["type"] = type_
return fields
class FindingSerializer(RLSSerializer):
"""
Serializer for the Finding model.
@@ -1091,7 +906,6 @@ class FindingSerializer(RLSSerializer):
"inserted_at",
"updated_at",
"first_seen_at",
"muted",
"url",
# Relationships
"scan",
@@ -1099,8 +913,8 @@ class FindingSerializer(RLSSerializer):
]
included_serializers = {
"scan": ScanIncludeSerializer,
"resources": ResourceIncludeSerializer,
"scan": ScanSerializer,
"resources": ResourceSerializer,
}
@@ -1619,8 +1433,8 @@ class RoleSerializer(RLSSerializer, BaseWriteSerializer):
"manage_account",
# Disable for the first release
# "manage_billing",
# "manage_integrations",
# /Disable for the first release
"manage_integrations",
"manage_providers",
"manage_scans",
"permission_state",
@@ -1902,13 +1716,6 @@ class ComplianceOverviewFullSerializer(ComplianceOverviewSerializer):
return obj.requirements
class ComplianceOverviewMetadataSerializer(serializers.Serializer):
regions = serializers.ListField(child=serializers.CharField(), allow_empty=True)
class Meta:
resource_name = "compliance-overviews-metadata"
# Overviews
@@ -2033,201 +1840,3 @@ class ScheduleDailyCreateSerializer(serializers.Serializer):
if unknown_keys:
raise ValidationError(f"Invalid fields: {unknown_keys}")
return data
# Integrations
class BaseWriteIntegrationSerializer(BaseWriteSerializer):
@staticmethod
def validate_integration_data(
integration_type: str,
providers: list[Provider], # noqa
configuration: dict,
credentials: dict,
):
if integration_type == Integration.IntegrationChoices.S3:
config_serializer = S3ConfigSerializer
credentials_serializers = [AWSCredentialSerializer]
# TODO: This will be required for AWS Security Hub
# if providers and not all(
# provider.provider == Provider.ProviderChoices.AWS
# for provider in providers
# ):
# raise serializers.ValidationError(
# {"providers": "All providers must be AWS for the S3 integration."}
# )
else:
raise serializers.ValidationError(
{
"integration_type": f"Integration type not supported yet: {integration_type}"
}
)
config_serializer(data=configuration).is_valid(raise_exception=True)
for cred_serializer in credentials_serializers:
try:
cred_serializer(data=credentials).is_valid(raise_exception=True)
break
except ValidationError:
continue
else:
raise ValidationError(
{"credentials": "Invalid credentials for the integration type."}
)
class IntegrationSerializer(RLSSerializer):
"""
Serializer for the Integration model.
"""
providers = serializers.ResourceRelatedField(
queryset=Provider.objects.all(), many=True
)
class Meta:
model = Integration
fields = [
"id",
"inserted_at",
"updated_at",
"enabled",
"connected",
"connection_last_checked_at",
"integration_type",
"configuration",
"providers",
"url",
]
included_serializers = {
"providers": "api.v1.serializers.ProviderIncludeSerializer",
}
def to_representation(self, instance):
representation = super().to_representation(instance)
allowed_providers = self.context.get("allowed_providers")
if allowed_providers:
allowed_provider_ids = {str(provider.id) for provider in allowed_providers}
representation["providers"] = [
provider
for provider in representation["providers"]
if provider["id"] in allowed_provider_ids
]
return representation
class IntegrationCreateSerializer(BaseWriteIntegrationSerializer):
credentials = IntegrationCredentialField(write_only=True)
configuration = IntegrationConfigField()
providers = serializers.ResourceRelatedField(
queryset=Provider.objects.all(), many=True, required=False
)
class Meta:
model = Integration
fields = [
"inserted_at",
"updated_at",
"enabled",
"connected",
"connection_last_checked_at",
"integration_type",
"configuration",
"credentials",
"providers",
]
extra_kwargs = {
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
"connected": {"read_only": True},
"enabled": {"read_only": True},
"connection_last_checked_at": {"read_only": True},
}
def validate(self, attrs):
integration_type = attrs.get("integration_type")
providers = attrs.get("providers")
configuration = attrs.get("configuration")
credentials = attrs.get("credentials")
validated_attrs = super().validate(attrs)
self.validate_integration_data(
integration_type, providers, configuration, credentials
)
return validated_attrs
def create(self, validated_data):
tenant_id = self.context.get("tenant_id")
providers = validated_data.pop("providers", [])
integration = Integration.objects.create(tenant_id=tenant_id, **validated_data)
through_model_instances = [
IntegrationProviderRelationship(
integration=integration,
provider=provider,
tenant_id=tenant_id,
)
for provider in providers
]
IntegrationProviderRelationship.objects.bulk_create(through_model_instances)
return integration
class IntegrationUpdateSerializer(BaseWriteIntegrationSerializer):
credentials = IntegrationCredentialField(write_only=True, required=False)
configuration = IntegrationConfigField(required=False)
providers = serializers.ResourceRelatedField(
queryset=Provider.objects.all(), many=True, required=False
)
class Meta:
model = Integration
fields = [
"inserted_at",
"updated_at",
"enabled",
"connected",
"connection_last_checked_at",
"integration_type",
"configuration",
"credentials",
"providers",
]
extra_kwargs = {
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
"connected": {"read_only": True},
"connection_last_checked_at": {"read_only": True},
"integration_type": {"read_only": True},
}
def validate(self, attrs):
integration_type = self.instance.integration_type
providers = attrs.get("providers")
configuration = attrs.get("configuration") or self.instance.configuration
credentials = attrs.get("credentials") or self.instance.credentials
validated_attrs = super().validate(attrs)
self.validate_integration_data(
integration_type, providers, configuration, credentials
)
return validated_attrs
def update(self, instance, validated_data):
tenant_id = self.context.get("tenant_id")
if validated_data.get("providers") is not None:
instance.providers.clear()
new_relationships = [
IntegrationProviderRelationship(
integration=instance, provider=provider, tenant_id=tenant_id
)
for provider in validated_data["providers"]
]
IntegrationProviderRelationship.objects.bulk_create(new_relationships)
return super().update(instance, validated_data)

View File

@@ -6,11 +6,7 @@ from api.v1.views import (
ComplianceOverviewViewSet,
CustomTokenObtainView,
CustomTokenRefreshView,
CustomTokenSwitchTenantView,
FindingViewSet,
GithubSocialLoginView,
GoogleSocialLoginView,
IntegrationViewSet,
InvitationAcceptViewSet,
InvitationViewSet,
MembershipViewSet,
@@ -48,7 +44,6 @@ router.register(
)
router.register(r"overviews", OverviewViewSet, basename="overview")
router.register(r"schedules", ScheduleViewSet, basename="schedule")
router.register(r"integrations", IntegrationViewSet, basename="integration")
tenants_router = routers.NestedSimpleRouter(router, r"tenants", lookup="tenant")
tenants_router.register(
@@ -61,7 +56,6 @@ users_router.register(r"memberships", MembershipViewSet, basename="user-membersh
urlpatterns = [
path("tokens", CustomTokenObtainView.as_view(), name="token-obtain"),
path("tokens/refresh", CustomTokenRefreshView.as_view(), name="token-refresh"),
path("tokens/switch", CustomTokenSwitchTenantView.as_view(), name="token-switch"),
path(
"providers/secrets",
ProviderSecretViewSet.as_view({"get": "list", "post": "create"}),
@@ -112,8 +106,6 @@ urlpatterns = [
),
name="provider_group-providers-relationship",
),
path("tokens/google", GoogleSocialLoginView.as_view(), name="token-google"),
path("tokens/github", GithubSocialLoginView.as_view(), name="token-github"),
path("", include(router.urls)),
path("", include(tenants_router.urls)),
path("", include(users_router.urls)),

View File

@@ -1,24 +1,10 @@
import glob
import os
import sentry_sdk
from allauth.socialaccount.providers.github.views import GitHubOAuth2Adapter
from allauth.socialaccount.providers.google.views import GoogleOAuth2Adapter
from botocore.exceptions import ClientError, NoCredentialsError, ParamValidationError
from celery.result import AsyncResult
from config.env import env
from config.settings.social_login import (
GITHUB_OAUTH_CALLBACK_URL,
GOOGLE_OAUTH_CALLBACK_URL,
)
from dj_rest_auth.registration.views import SocialLoginView
from django.conf import settings as django_settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import Count, Exists, F, OuterRef, Prefetch, Q, Subquery, Sum
from django.db.models import Count, F, OuterRef, Prefetch, Q, Subquery, Sum
from django.db.models.functions import Coalesce
from django.http import HttpResponse
from django.urls import reverse
from django.utils.decorators import method_decorator
from django.views.decorators.cache import cache_control
@@ -45,21 +31,19 @@ from rest_framework.permissions import SAFE_METHODS
from rest_framework_json_api.views import RelationshipView, Response
from rest_framework_simplejwt.exceptions import InvalidToken, TokenError
from tasks.beat import schedule_provider_scan
from tasks.jobs.export import get_s3_client
from tasks.tasks import (
check_provider_connection_task,
delete_provider_task,
delete_tenant_task,
perform_scan_summary_task,
perform_scan_task,
)
from api.base_views import BaseRLSViewSet, BaseTenantViewset, BaseUserViewset
from api.db_router import MainRouter
from api.db_utils import delete_related_daily_task
from api.filters import (
ComplianceOverviewFilter,
FindingFilter,
IntegrationFilter,
InvitationFilter,
MembershipFilter,
ProviderFilter,
@@ -77,7 +61,6 @@ from api.filters import (
from api.models import (
ComplianceOverview,
Finding,
Integration,
Invitation,
Membership,
Provider,
@@ -85,7 +68,6 @@ from api.models import (
ProviderGroupMembership,
ProviderSecret,
Resource,
ResourceFindingMapping,
Role,
RoleProviderGroupRelationship,
Scan,
@@ -99,17 +81,14 @@ from api.models import (
from api.pagination import ComplianceOverviewPagination
from api.rbac.permissions import Permissions, get_providers, get_role
from api.rls import Tenant
from api.utils import CustomOAuth2Client, validate_invitation
from api.utils import validate_invitation
from api.uuid_utils import datetime_to_uuid7
from api.v1.serializers import (
ComplianceOverviewFullSerializer,
ComplianceOverviewMetadataSerializer,
ComplianceOverviewSerializer,
FindingDynamicFilterSerializer,
FindingMetadataSerializer,
FindingSerializer,
IntegrationCreateSerializer,
IntegrationSerializer,
IntegrationUpdateSerializer,
InvitationAcceptSerializer,
InvitationCreateSerializer,
InvitationSerializer,
@@ -135,7 +114,6 @@ from api.v1.serializers import (
RoleSerializer,
RoleUpdateSerializer,
ScanCreateSerializer,
ScanReportSerializer,
ScanSerializer,
ScanUpdateSerializer,
ScheduleDailyCreateSerializer,
@@ -143,8 +121,6 @@ from api.v1.serializers import (
TenantSerializer,
TokenRefreshSerializer,
TokenSerializer,
TokenSocialLoginSerializer,
TokenSwitchTenantSerializer,
UserCreateSerializer,
UserRoleRelationshipSerializer,
UserSerializer,
@@ -211,43 +187,13 @@ class CustomTokenRefreshView(GenericAPIView):
)
@extend_schema(
tags=["Token"],
summary="Switch tenant using a valid tenant ID",
description="Switch tenant by providing a valid tenant ID. The authenticated user must belong to the tenant.",
)
class CustomTokenSwitchTenantView(GenericAPIView):
permission_classes = [permissions.IsAuthenticated]
resource_name = "tokens-switch-tenant"
serializer_class = TokenSwitchTenantSerializer
http_method_names = ["post"]
def post(self, request):
serializer = TokenSwitchTenantSerializer(
data=request.data, context={"request": request}
)
try:
serializer.is_valid(raise_exception=True)
except TokenError as e:
raise InvalidToken(e.args[0])
return Response(
data={
"type": "tokens-switch-tenant",
"attributes": serializer.validated_data,
},
status=status.HTTP_200_OK,
)
@extend_schema(exclude=True)
class SchemaView(SpectacularAPIView):
serializer_class = None
def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.6.0"
spectacular_settings.VERSION = "1.4.0"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)
@@ -303,67 +249,10 @@ class SchemaView(SpectacularAPIView):
"description": "Endpoints for task management, allowing retrieval of task status and "
"revoking tasks that have not started.",
},
{
"name": "Integration",
"description": "Endpoints for managing third-party integrations, including registration, configuration,"
" retrieval, and deletion of integrations such as S3, JIRA, or other services.",
},
]
return super().get(request, *args, **kwargs)
@extend_schema(exclude=True)
class GoogleSocialLoginView(SocialLoginView):
adapter_class = GoogleOAuth2Adapter
client_class = CustomOAuth2Client
callback_url = GOOGLE_OAUTH_CALLBACK_URL
def get_response(self):
original_response = super().get_response()
if self.user and self.user.is_authenticated:
serializer = TokenSocialLoginSerializer(data={"email": self.user.email})
try:
serializer.is_valid(raise_exception=True)
except TokenError as e:
raise InvalidToken(e.args[0])
return Response(
data={
"type": "google-social-tokens",
"attributes": serializer.validated_data,
},
status=status.HTTP_200_OK,
)
return original_response
@extend_schema(exclude=True)
class GithubSocialLoginView(SocialLoginView):
adapter_class = GitHubOAuth2Adapter
client_class = CustomOAuth2Client
callback_url = GITHUB_OAUTH_CALLBACK_URL
def get_response(self):
original_response = super().get_response()
if self.user and self.user.is_authenticated:
serializer = TokenSocialLoginSerializer(data={"email": self.user.email})
try:
serializer.is_valid(raise_exception=True)
except TokenError as e:
raise InvalidToken(e.args[0])
return Response(
data={
"type": "github-social-tokens",
"attributes": serializer.validated_data,
},
status=status.HTTP_200_OK,
)
return original_response
@extend_schema_view(
list=extend_schema(
tags=["User"],
@@ -1089,7 +978,6 @@ class ProviderViewSet(BaseRLSViewSet):
provider = get_object_or_404(Provider, pk=pk)
provider.is_deleted = True
provider.save()
delete_related_daily_task(str(provider.id))
with transaction.atomic():
task = delete_provider_task.delay(
@@ -1137,18 +1025,6 @@ class ProviderViewSet(BaseRLSViewSet):
request=ScanCreateSerializer,
responses={202: OpenApiResponse(response=TaskSerializer)},
),
report=extend_schema(
tags=["Scan"],
summary="Download ZIP report",
description="Returns a ZIP file containing the requested report",
request=ScanReportSerializer,
responses={
200: OpenApiResponse(description="Report obtained successfully"),
202: OpenApiResponse(description="The task is in progress"),
403: OpenApiResponse(description="There is a problem with credentials"),
404: OpenApiResponse(description="The scan has no reports"),
},
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
@@ -1197,10 +1073,6 @@ class ScanViewSet(BaseRLSViewSet):
return ScanCreateSerializer
elif self.action == "partial_update":
return ScanUpdateSerializer
elif self.action == "report":
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
return ScanReportSerializer
return super().get_serializer_class()
def partial_update(self, request, *args, **kwargs):
@@ -1218,100 +1090,6 @@ class ScanViewSet(BaseRLSViewSet):
)
return Response(data=read_serializer.data, status=status.HTTP_200_OK)
@action(detail=True, methods=["get"], url_name="report")
def report(self, request, pk=None):
scan_instance = self.get_object()
if scan_instance.state == StateChoices.EXECUTING:
# If the scan is still running, return the task
prowler_task = Task.objects.get(id=scan_instance.task.id)
self.response_serializer_class = TaskSerializer
output_serializer = self.get_serializer(prowler_task)
return Response(
data=output_serializer.data,
status=status.HTTP_202_ACCEPTED,
headers={
"Content-Location": reverse(
"task-detail", kwargs={"pk": output_serializer.data["id"]}
)
},
)
try:
output_celery_task = Task.objects.get(
task_runner_task__task_name="scan-report",
task_runner_task__task_args__contains=pk,
)
self.response_serializer_class = TaskSerializer
output_serializer = self.get_serializer(output_celery_task)
if output_serializer.data["state"] == StateChoices.EXECUTING:
# If the task is still running, return the task
return Response(
data=output_serializer.data,
status=status.HTTP_202_ACCEPTED,
headers={
"Content-Location": reverse(
"task-detail", kwargs={"pk": output_serializer.data["id"]}
)
},
)
except Task.DoesNotExist:
# If the task does not exist, it means that the task is removed from the database
pass
output_location = scan_instance.output_location
if not output_location:
return Response(
{"detail": "The scan has no reports."},
status=status.HTTP_404_NOT_FOUND,
)
if scan_instance.output_location.startswith("s3://"):
try:
s3_client = get_s3_client()
except (ClientError, NoCredentialsError, ParamValidationError):
return Response(
{"detail": "There is a problem with credentials."},
status=status.HTTP_403_FORBIDDEN,
)
bucket_name = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET")
key = output_location[len(f"s3://{bucket_name}/") :]
try:
s3_object = s3_client.get_object(Bucket=bucket_name, Key=key)
except ClientError as e:
error_code = e.response.get("Error", {}).get("Code")
if error_code == "NoSuchKey":
return Response(
{"detail": "The scan has no reports."},
status=status.HTTP_404_NOT_FOUND,
)
return Response(
{"detail": "There is a problem with credentials."},
status=status.HTTP_403_FORBIDDEN,
)
file_content = s3_object["Body"].read()
filename = os.path.basename(output_location.split("/")[-1])
else:
zip_files = glob.glob(output_location)
try:
file_path = zip_files[0]
except IndexError as e:
sentry_sdk.capture_exception(e)
return Response(
{"detail": "The scan has no reports."},
status=status.HTTP_404_NOT_FOUND,
)
with open(file_path, "rb") as f:
file_content = f.read()
filename = os.path.basename(file_path)
response = HttpResponse(
file_content, content_type="application/x-zip-compressed"
)
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response
def create(self, request, *args, **kwargs):
input_serializer = self.get_serializer(data=request.data)
input_serializer.is_valid(raise_exception=True)
@@ -1326,6 +1104,10 @@ class ScanViewSet(BaseRLSViewSet):
# Disabled for now
# checks_to_execute=scan.scanner_args.get("checks_to_execute"),
},
link=perform_scan_summary_task.si(
tenant_id=self.request.tenant_id,
scan_id=str(scan.id),
),
)
scan.task_id = task.id
@@ -1528,10 +1310,17 @@ class ResourceViewSet(BaseRLSViewSet):
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
class FindingViewSet(BaseRLSViewSet):
queryset = Finding.all_objects.all()
queryset = Finding.objects.all()
serializer_class = FindingSerializer
filterset_class = FindingFilter
prefetch_for_includes = {
"__all__": [],
"resources": [
Prefetch("resources", queryset=Resource.objects.select_related("findings"))
],
"scan": [Prefetch("scan", queryset=Scan.objects.select_related("findings"))],
}
http_method_names = ["get"]
filterset_class = FindingFilter
ordering = ["-inserted_at"]
ordering_fields = [
"status",
@@ -1540,18 +1329,6 @@ class FindingViewSet(BaseRLSViewSet):
"inserted_at",
"updated_at",
]
prefetch_for_includes = {
"__all__": [],
"resources": [
Prefetch(
"resources",
queryset=Resource.all_objects.prefetch_related("tags", "findings"),
)
],
"scan": [
Prefetch("scan", queryset=Scan.all_objects.select_related("findings"))
],
}
# RBAC required permissions (implicit -> MANAGE_PROVIDERS enable unlimited visibility or check the visibility of
# the provider through the provider group)
required_permissions = []
@@ -1565,34 +1342,41 @@ class FindingViewSet(BaseRLSViewSet):
return super().get_serializer_class()
def get_queryset(self):
tenant_id = self.request.tenant_id
user_roles = get_role(self.request.user)
if user_roles.unlimited_visibility:
# User has unlimited visibility, return all findings
queryset = Finding.all_objects.filter(tenant_id=tenant_id)
# User has unlimited visibility, return all scans
queryset = Finding.objects.filter(tenant_id=self.request.tenant_id)
else:
# User lacks permission, filter findings based on provider groups associated with the role
queryset = Finding.all_objects.filter(
# User lacks permission, filter providers based on provider groups associated with the role
queryset = Finding.objects.filter(
scan__provider__in=get_providers(user_roles)
)
search_value = self.request.query_params.get("filter[search]", None)
if search_value:
# Django's ORM will build a LEFT JOIN and OUTER JOIN on any "through" tables, resulting in duplicates
# The duplicates then require a `distinct` query
search_query = SearchQuery(
search_value, config="simple", search_type="plain"
)
resource_match = Resource.all_objects.filter(
text_search=search_query,
id__in=ResourceFindingMapping.objects.filter(
resource_id=OuterRef("pk"),
tenant_id=tenant_id,
).values("resource_id"),
)
queryset = queryset.filter(
Q(text_search=search_query) | Q(Exists(resource_match))
)
Q(impact_extended__contains=search_value)
| Q(status_extended__contains=search_value)
| Q(check_id=search_value)
| Q(check_id__icontains=search_value)
| Q(text_search=search_query)
| Q(resources__uid=search_value)
| Q(resources__name=search_value)
| Q(resources__region=search_value)
| Q(resources__service=search_value)
| Q(resources__type=search_value)
| Q(resources__uid__contains=search_value)
| Q(resources__name__contains=search_value)
| Q(resources__region__contains=search_value)
| Q(resources__service__contains=search_value)
| Q(resources__tags__text_search=search_query)
| Q(resources__text_search=search_query)
).distinct()
return queryset
@@ -1602,22 +1386,10 @@ class FindingViewSet(BaseRLSViewSet):
return queryset
return super().filter_queryset(queryset)
def list(self, request, *args, **kwargs):
base_qs = self.filter_queryset(self.get_queryset())
paginated_ids = self.paginate_queryset(base_qs.values_list("id", flat=True))
if paginated_ids is not None:
ids = list(paginated_ids)
findings = (
Finding.all_objects.filter(tenant_id=self.request.tenant_id, id__in=ids)
.select_related("scan")
.prefetch_related("resources")
)
# Re-sort in Python to preserve ordering:
findings = sorted(findings, key=lambda x: ids.index(x.id))
serializer = self.get_serializer(findings, many=True)
return self.get_paginated_response(serializer.data)
serializer = self.get_serializer(base_qs, many=True)
return Response(serializer.data)
def inserted_at_to_uuidv7(self, inserted_at):
if inserted_at is None:
return None
return datetime_to_uuid7(inserted_at)
@action(detail=False, methods=["get"], url_name="findings_services_regions")
def findings_services_regions(self, request):
@@ -2057,21 +1829,6 @@ class RoleProviderGroupRelationshipView(RelationshipView, BaseRLSViewSet):
description="Fetch detailed information about a specific compliance overview by its ID, including detailed "
"requirement information and check's status.",
),
metadata=extend_schema(
tags=["Compliance Overview"],
summary="Retrieve metadata values from compliance overviews",
description="Fetch unique metadata values from a set of compliance overviews. This is useful for dynamic "
"filtering.",
parameters=[
OpenApiParameter(
name="filter[scan_id]",
required=True,
type=OpenApiTypes.UUID,
location=OpenApiParameter.QUERY,
description="Related scan ID.",
),
],
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
@@ -2133,8 +1890,6 @@ class ComplianceOverviewViewSet(BaseRLSViewSet):
def get_serializer_class(self):
if self.action == "retrieve":
return ComplianceOverviewFullSerializer
elif self.action == "metadata":
return ComplianceOverviewMetadataSerializer
return super().get_serializer_class()
def list(self, request, *args, **kwargs):
@@ -2151,35 +1906,6 @@ class ComplianceOverviewViewSet(BaseRLSViewSet):
)
return super().list(request, *args, **kwargs)
@action(detail=False, methods=["get"], url_name="metadata")
def metadata(self, request):
scan_id = request.query_params.get("filter[scan_id]")
if not scan_id:
raise ValidationError(
[
{
"detail": "This query parameter is required.",
"status": 400,
"source": {"pointer": "filter[scan_id]"},
"code": "required",
}
]
)
tenant_id = self.request.tenant_id
regions = list(
ComplianceOverview.objects.filter(tenant_id=tenant_id, scan_id=scan_id)
.values_list("region", flat=True)
.order_by("region")
.distinct()
)
result = {"regions": regions}
serializer = self.get_serializer(data=result)
serializer.is_valid(raise_exception=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@extend_schema(tags=["Overview"])
@extend_schema_view(
@@ -2500,67 +2226,3 @@ class ScheduleViewSet(BaseRLSViewSet):
)
},
)
@extend_schema_view(
list=extend_schema(
tags=["Integration"],
summary="List all integrations",
description="Retrieve a list of all configured integrations with options for filtering by various criteria.",
),
retrieve=extend_schema(
tags=["Integration"],
summary="Retrieve integration details",
description="Fetch detailed information about a specific integration by its ID.",
),
create=extend_schema(
tags=["Integration"],
summary="Create a new integration",
description="Register a new integration with the system, providing necessary configuration details.",
),
partial_update=extend_schema(
tags=["Integration"],
summary="Partially update an integration",
description="Modify certain fields of an existing integration without affecting other settings.",
),
destroy=extend_schema(
tags=["Integration"],
summary="Delete an integration",
description="Remove an integration from the system by its ID.",
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
class IntegrationViewSet(BaseRLSViewSet):
queryset = Integration.objects.all()
serializer_class = IntegrationSerializer
http_method_names = ["get", "post", "patch", "delete"]
filterset_class = IntegrationFilter
ordering = ["integration_type", "-inserted_at"]
# RBAC required permissions
required_permissions = [Permissions.MANAGE_INTEGRATIONS]
allowed_providers = None
def get_queryset(self):
user_roles = get_role(self.request.user)
if user_roles.unlimited_visibility:
# User has unlimited visibility, return all integrations
queryset = Integration.objects.filter(tenant_id=self.request.tenant_id)
else:
# User lacks permission, filter providers based on provider groups associated with the role
allowed_providers = get_providers(user_roles)
queryset = Integration.objects.filter(providers__in=allowed_providers)
self.allowed_providers = allowed_providers
return queryset
def get_serializer_class(self):
if self.action == "create":
return IntegrationCreateSerializer
elif self.action == "partial_update":
return IntegrationUpdateSerializer
return super().get_serializer_class()
def get_serializer_context(self):
context = super().get_serializer_context()
context["allowed_providers"] = self.allowed_providers
return context

View File

@@ -50,9 +50,9 @@ class RLSTask(Task):
tenant_id = kwargs.get("tenant_id")
with rls_transaction(tenant_id):
APITask.objects.update_or_create(
APITask.objects.create(
id=task_result_instance.task_id,
tenant_id=tenant_id,
defaults={"task_runner_task": task_result_instance},
task_runner_task=task_result_instance,
)
return result

View File

@@ -2,9 +2,10 @@ import json
import logging
from enum import StrEnum
from config.env import env
from django_guid.log_filters import CorrelationId
from config.env import env
class BackendLogger(StrEnum):
GUNICORN = "gunicorn"
@@ -38,9 +39,9 @@ class NDJSONFormatter(logging.Formatter):
"funcName": record.funcName,
"process": record.process,
"thread": record.thread,
"transaction_id": (
record.transaction_id if hasattr(record, "transaction_id") else None
),
"transaction_id": record.transaction_id
if hasattr(record, "transaction_id")
else None,
}
# Add REST API extra fields

View File

@@ -4,8 +4,6 @@ from config.custom_logging import LOGGING # noqa
from config.env import BASE_DIR, env # noqa
from config.settings.celery import * # noqa
from config.settings.partitions import * # noqa
from config.settings.sentry import * # noqa
from config.settings.social_login import * # noqa
SECRET_KEY = env("SECRET_KEY", default="secret")
DEBUG = env.bool("DJANGO_DEBUG", default=False)
@@ -31,13 +29,6 @@ INSTALLED_APPS = [
"django_celery_results",
"django_celery_beat",
"rest_framework_simplejwt.token_blacklist",
"allauth",
"allauth.account",
"allauth.socialaccount",
"allauth.socialaccount.providers.google",
"allauth.socialaccount.providers.github",
"dj_rest_auth.registration",
"rest_framework.authtoken",
]
MIDDLEWARE = [
@@ -51,11 +42,8 @@ MIDDLEWARE = [
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"api.middleware.APILoggingMiddleware",
"allauth.account.middleware.AccountMiddleware",
]
SITE_ID = 1
CORS_ALLOWED_ORIGINS = ["http://localhost", "http://127.0.0.1"]
ROOT_URLCONF = "config.urls"
@@ -219,27 +207,4 @@ CACHE_STALE_WHILE_REVALIDATE = env.int("DJANGO_STALE_WHILE_REVALIDATE", 60)
TESTING = False
FINDINGS_MAX_DAYS_IN_RANGE = env.int("DJANGO_FINDINGS_MAX_DAYS_IN_RANGE", 7)
# API export settings
DJANGO_TMP_OUTPUT_DIRECTORY = env.str(
"DJANGO_TMP_OUTPUT_DIRECTORY", "/tmp/prowler_api_output"
)
DJANGO_FINDINGS_BATCH_SIZE = env.str("DJANGO_FINDINGS_BATCH_SIZE", 1000)
DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID = env.str("DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID", "")
DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY = env.str(
"DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY", ""
)
DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN = env.str("DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN", "")
DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION = env.str("DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION", "")
# HTTP Security Headers
SECURE_CONTENT_TYPE_NOSNIFF = True
X_FRAME_OPTIONS = "DENY"
SECURE_REFERRER_POLICY = "strict-origin-when-cross-origin"
DJANGO_DELETION_BATCH_SIZE = env.int("DJANGO_DELETION_BATCH_SIZE", 5000)

View File

@@ -1,90 +0,0 @@
import sentry_sdk
from config.env import env
IGNORED_EXCEPTIONS = [
# Provider is not connected due to credentials errors
"is not connected",
# Authentication Errors from AWS
"InvalidToken",
"AccessDeniedException",
"AuthorizationErrorException",
"UnrecognizedClientException",
"UnauthorizedOperation",
"AuthFailure",
"InvalidClientTokenId",
"AccessDenied",
"No Shodan API Key", # Shodan Check
"RequestLimitExceeded", # For now we don't want to log the RequestLimitExceeded errors
"ThrottlingException",
"Rate exceeded",
"SubscriptionRequiredException",
"UnknownOperationException",
"OptInRequired",
"ReadTimeout",
"LimitExceeded",
"ConnectTimeoutError",
"ExpiredToken",
"IncompleteSignature",
"RegionDisabledException",
"TooManyRequestsException",
"SignatureDoesNotMatch",
"InvalidParameterValueException",
"InvalidInputException",
"ValidationException",
"AWSSecretAccessKeyInvalidError",
"InvalidAction",
"Pool is closed", # The following comes from urllib3: eu-west-1 -- HTTPClientError[126]: An HTTP Client raised an unhandled exception: AWSHTTPSConnectionPool(host='hostname.s3.eu-west-1.amazonaws.com', port=443): Pool is closed.
# Authentication Errors from GCP
"ClientAuthenticationError",
"AuthorizationFailed",
"Reauthentication is needed",
"Permission denied to get service",
"API has not been used in project",
"HttpError 404 when requesting",
"GCPNoAccesibleProjectsError",
# Authentication Errors from Azure
"ClientAuthenticationError",
"AuthorizationFailed",
"Subscription Not Registered",
"AzureNotValidClientIdError",
"AzureNotValidClientSecretError",
"AzureNotValidTenantIdError",
"AzureTenantIdAndClientSecretNotBelongingToClientIdError",
"AzureTenantIdAndClientIdNotBelongingToClientSecretError",
"AzureClientIdAndClientSecretNotBelongingToTenantIdError",
"AzureHTTPResponseError",
"Error with credentials provided",
# AWS Service is not available in a region
"EndpointConnectionError",
]
def before_send(event, hint):
"""
before_send handles the Sentry events in order to sent them or not
"""
# Ignore logs with the ignored_exceptions
# https://docs.python.org/3/library/logging.html#logrecord-objects
if "log_record" in hint:
log_msg = hint["log_record"].msg
log_lvl = hint["log_record"].levelno
# Handle Error events and discard the rest
if log_lvl == 40 and any(ignored in log_msg for ignored in IGNORED_EXCEPTIONS):
return
return event
sentry_sdk.init(
dsn=env.str("DJANGO_SENTRY_DSN", ""),
# Add data like request headers and IP for users,
# see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info
before_send=before_send,
send_default_pii=True,
_experiments={
# Set continuous_profiling_auto_start to True
# to automatically start the profiler on when
# possible.
"continuous_profiling_auto_start": True,
},
)

View File

@@ -1,53 +0,0 @@
from config.env import env
# Provider Oauth settings
GOOGLE_OAUTH_CLIENT_ID = env("SOCIAL_GOOGLE_OAUTH_CLIENT_ID", default="")
GOOGLE_OAUTH_CLIENT_SECRET = env("SOCIAL_GOOGLE_OAUTH_CLIENT_SECRET", default="")
GOOGLE_OAUTH_CALLBACK_URL = env("SOCIAL_GOOGLE_OAUTH_CALLBACK_URL", default="")
GITHUB_OAUTH_CLIENT_ID = env("SOCIAL_GITHUB_OAUTH_CLIENT_ID", default="")
GITHUB_OAUTH_CLIENT_SECRET = env("SOCIAL_GITHUB_OAUTH_CLIENT_SECRET", default="")
GITHUB_OAUTH_CALLBACK_URL = env("SOCIAL_GITHUB_OAUTH_CALLBACK_URL", default="")
# Allauth settings
ACCOUNT_LOGIN_METHODS = {"email"} # Use Email / Password authentication
ACCOUNT_USERNAME_REQUIRED = False
ACCOUNT_EMAIL_REQUIRED = True
ACCOUNT_EMAIL_VERIFICATION = "none" # Do not require email confirmation
ACCOUNT_USER_MODEL_USERNAME_FIELD = None
REST_AUTH = {
"TOKEN_MODEL": None,
"REST_USE_JWT": True,
}
# django-allauth (social)
# Authenticate if local account with this email address already exists
SOCIALACCOUNT_EMAIL_AUTHENTICATION = True
# Connect local account and social account if local account with that email address already exists
SOCIALACCOUNT_EMAIL_AUTHENTICATION_AUTO_CONNECT = True
SOCIALACCOUNT_ADAPTER = "api.adapters.ProwlerSocialAccountAdapter"
SOCIALACCOUNT_PROVIDERS = {
"google": {
"APP": {
"client_id": GOOGLE_OAUTH_CLIENT_ID,
"secret": GOOGLE_OAUTH_CLIENT_SECRET,
"key": "",
},
"SCOPE": [
"email",
"profile",
],
"AUTH_PARAMS": {
"access_type": "online",
},
},
"github": {
"APP": {
"client_id": GITHUB_OAUTH_CLIENT_ID,
"secret": GITHUB_OAUTH_CLIENT_SECRET,
},
"SCOPE": [
"user",
"read:org",
],
},
}

View File

@@ -15,8 +15,6 @@ from api.db_utils import rls_transaction
from api.models import (
ComplianceOverview,
Finding,
Integration,
IntegrationProviderRelationship,
Invitation,
Membership,
Provider,
@@ -488,7 +486,7 @@ def scans_fixture(tenants_fixture, providers_fixture):
name="Scan 1",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
state=StateChoices.AVAILABLE,
tenant_id=tenant.id,
started_at="2024-01-02T00:00:00Z",
)
@@ -655,7 +653,6 @@ def findings_fixture(scans_fixture, resources_fixture):
"Description": "test description orange juice",
},
first_seen_at="2024-01-02T00:00:00Z",
muted=True,
)
finding2.add_resources([resource2])
@@ -880,46 +877,6 @@ def scan_summaries_fixture(tenants_fixture, providers_fixture):
)
@pytest.fixture
def integrations_fixture(providers_fixture):
provider1, provider2, *_ = providers_fixture
tenant_id = provider1.tenant_id
integration1 = Integration.objects.create(
tenant_id=tenant_id,
enabled=True,
connected=True,
integration_type="amazon_s3",
configuration={"key": "value"},
credentials={"psswd": "1234"},
)
IntegrationProviderRelationship.objects.create(
tenant_id=tenant_id,
integration=integration1,
provider=provider1,
)
integration2 = Integration.objects.create(
tenant_id=tenant_id,
enabled=True,
connected=True,
integration_type="amazon_s3",
configuration={"key": "value"},
credentials={"psswd": "1234"},
)
IntegrationProviderRelationship.objects.create(
tenant_id=tenant_id,
integration=integration2,
provider=provider1,
)
IntegrationProviderRelationship.objects.create(
tenant_id=tenant_id,
integration=integration2,
provider=provider2,
)
return integration1, integration2
def get_authorization_header(access_token: str) -> dict:
return {"Authorization": f"Bearer {access_token}"}

View File

@@ -1,5 +1,5 @@
from celery.utils.log import get_task_logger
from django.db import DatabaseError
from django.db import transaction
from api.db_router import MainRouter
from api.db_utils import batch_delete, rls_transaction
@@ -8,12 +8,11 @@ from api.models import Finding, Provider, Resource, Scan, ScanSummary, Tenant
logger = get_task_logger(__name__)
def delete_provider(tenant_id: str, pk: str):
def delete_provider(pk: str):
"""
Gracefully deletes an instance of a provider along with its related data.
Args:
tenant_id (str): Tenant ID the resources belong to.
pk (str): The primary key of the Provider instance to delete.
Returns:
@@ -23,31 +22,33 @@ def delete_provider(tenant_id: str, pk: str):
Raises:
Provider.DoesNotExist: If no instance with the provided primary key exists.
"""
with rls_transaction(tenant_id):
instance = Provider.all_objects.get(pk=pk)
deletion_summary = {}
deletion_steps = [
("Scan Summaries", ScanSummary.all_objects.filter(scan__provider=instance)),
("Findings", Finding.all_objects.filter(scan__provider=instance)),
("Resources", Resource.all_objects.filter(provider=instance)),
("Scans", Scan.all_objects.filter(provider=instance)),
]
instance = Provider.all_objects.get(pk=pk)
deletion_summary = {}
for step_name, queryset in deletion_steps:
try:
_, step_summary = batch_delete(tenant_id, queryset)
deletion_summary.update(step_summary)
except DatabaseError as db_error:
logger.error(f"Error deleting {step_name}: {db_error}")
raise
with transaction.atomic():
# Delete Scan Summaries
scan_summaries_qs = ScanSummary.all_objects.filter(scan__provider=instance)
_, scans_summ_summary = batch_delete(scan_summaries_qs)
deletion_summary.update(scans_summ_summary)
try:
with rls_transaction(tenant_id):
_, provider_summary = instance.delete()
# Delete Findings
findings_qs = Finding.all_objects.filter(scan__provider=instance)
_, findings_summary = batch_delete(findings_qs)
deletion_summary.update(findings_summary)
# Delete Resources
resources_qs = Resource.all_objects.filter(provider=instance)
_, resources_summary = batch_delete(resources_qs)
deletion_summary.update(resources_summary)
# Delete Scans
scans_qs = Scan.all_objects.filter(provider=instance)
_, scans_summary = batch_delete(scans_qs)
deletion_summary.update(scans_summary)
provider_deleted_count, provider_summary = instance.delete()
deletion_summary.update(provider_summary)
except DatabaseError as db_error:
logger.error(f"Error deleting Provider: {db_error}")
raise
return deletion_summary
@@ -65,8 +66,9 @@ def delete_tenant(pk: str):
deletion_summary = {}
for provider in Provider.objects.using(MainRouter.admin_db).filter(tenant_id=pk):
summary = delete_provider(pk, provider.id)
deletion_summary.update(summary)
with rls_transaction(pk):
summary = delete_provider(provider.id)
deletion_summary.update(summary)
Tenant.objects.using(MainRouter.admin_db).filter(id=pk).delete()

View File

@@ -1,156 +0,0 @@
import os
import zipfile
import boto3
import config.django.base as base
from botocore.exceptions import ClientError, NoCredentialsError, ParamValidationError
from celery.utils.log import get_task_logger
from django.conf import settings
from prowler.config.config import (
csv_file_suffix,
html_file_suffix,
json_ocsf_file_suffix,
output_file_timestamp,
)
from prowler.lib.outputs.csv.csv import CSV
from prowler.lib.outputs.html.html import HTML
from prowler.lib.outputs.ocsf.ocsf import OCSF
logger = get_task_logger(__name__)
# Predefined mapping for output formats and their configurations
OUTPUT_FORMATS_MAPPING = {
"csv": {
"class": CSV,
"suffix": csv_file_suffix,
"kwargs": {},
},
"json-ocsf": {"class": OCSF, "suffix": json_ocsf_file_suffix, "kwargs": {}},
"html": {"class": HTML, "suffix": html_file_suffix, "kwargs": {"stats": {}}},
}
def _compress_output_files(output_directory: str) -> str:
"""
Compress output files from all configured output formats into a ZIP archive.
Args:
output_directory (str): The directory where the output files are located.
The function looks up all known suffixes in OUTPUT_FORMATS_MAPPING
and compresses those files into a single ZIP.
Returns:
str: The full path to the newly created ZIP archive.
"""
zip_path = f"{output_directory}.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
for suffix in [config["suffix"] for config in OUTPUT_FORMATS_MAPPING.values()]:
zipf.write(
f"{output_directory}{suffix}",
f"output/{output_directory.split('/')[-1]}{suffix}",
)
return zip_path
def get_s3_client():
"""
Create and return a boto3 S3 client using AWS credentials from environment variables.
This function attempts to initialize an S3 client by reading the AWS access key, secret key,
session token, and region from environment variables. It then validates the client by listing
available S3 buckets. If an error occurs during this process (for example, due to missing or
invalid credentials), it falls back to creating an S3 client without explicitly provided credentials,
which may rely on other configuration sources (e.g., IAM roles).
Returns:
boto3.client: A configured S3 client instance.
Raises:
ClientError, NoCredentialsError, or ParamValidationError if both attempts to create a client fail.
"""
s3_client = None
try:
s3_client = boto3.client(
"s3",
aws_access_key_id=settings.DJANGO_OUTPUT_S3_AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.DJANGO_OUTPUT_S3_AWS_SECRET_ACCESS_KEY,
aws_session_token=settings.DJANGO_OUTPUT_S3_AWS_SESSION_TOKEN,
region_name=settings.DJANGO_OUTPUT_S3_AWS_DEFAULT_REGION,
)
s3_client.list_buckets()
except (ClientError, NoCredentialsError, ParamValidationError, ValueError):
s3_client = boto3.client("s3")
s3_client.list_buckets()
return s3_client
def _upload_to_s3(tenant_id: str, zip_path: str, scan_id: str) -> str:
"""
Upload the specified ZIP file to an S3 bucket.
If the S3 bucket environment variables are not configured,
the function returns None without performing an upload.
Args:
tenant_id (str): The tenant identifier, used as part of the S3 key prefix.
zip_path (str): The local file system path to the ZIP file to be uploaded.
scan_id (str): The scan identifier, used as part of the S3 key prefix.
Returns:
str: The S3 URI of the uploaded file (e.g., "s3://<bucket>/<key>") if successful.
None: If the required environment variables for the S3 bucket are not set.
Raises:
botocore.exceptions.ClientError: If the upload attempt to S3 fails for any reason.
"""
if not base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET:
return
try:
s3 = get_s3_client()
s3_key = f"{tenant_id}/{scan_id}/{os.path.basename(zip_path)}"
s3.upload_file(
Filename=zip_path,
Bucket=base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET,
Key=s3_key,
)
return f"s3://{base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET}/{s3_key}"
except (ClientError, NoCredentialsError, ParamValidationError, ValueError) as e:
logger.error(f"S3 upload failed: {str(e)}")
def _generate_output_directory(
output_directory, prowler_provider: object, tenant_id: str, scan_id: str
) -> str:
"""
Generate a file system path for the output directory of a prowler scan.
This function constructs the output directory path by combining a base
temporary output directory, the tenant ID, the scan ID, and details about
the prowler provider along with a timestamp. The resulting path is used to
store the output files of a prowler scan.
Note:
This function depends on one external variable:
- `output_file_timestamp`: A timestamp (as a string) used to uniquely identify the output.
Args:
output_directory (str): The base output directory.
prowler_provider (object): An identifier or descriptor for the prowler provider.
Typically, this is a string indicating the provider (e.g., "aws").
tenant_id (str): The unique identifier for the tenant.
scan_id (str): The unique identifier for the scan.
Returns:
str: The constructed file system path for the prowler scan output directory.
Example:
>>> _generate_output_directory("/tmp", "aws", "tenant-1234", "scan-5678")
'/tmp/tenant-1234/aws/scan-5678/prowler-output-2023-02-15T12:34:56'
"""
path = (
f"{output_directory}/{tenant_id}/{scan_id}/prowler-output-"
f"{prowler_provider}-{output_file_timestamp}"
)
os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
return path

View File

@@ -1,4 +1,3 @@
import json
import time
from copy import deepcopy
from datetime import datetime, timezone
@@ -7,7 +6,6 @@ from celery.utils.log import get_task_logger
from config.settings.celery import CELERY_DEADLOCK_ATTEMPTS
from django.db import IntegrityError, OperationalError
from django.db.models import Case, Count, IntegerField, Sum, When
from tasks.utils import CustomEncoder
from api.compliance import (
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE,
@@ -193,17 +191,6 @@ def perform_prowler_scan(
if resource_instance.type != finding.resource_type:
resource_instance.type = finding.resource_type
updated_fields.append("type")
if resource_instance.metadata != finding.resource_metadata:
resource_instance.metadata = json.dumps(
finding.resource_metadata, cls=CustomEncoder
)
updated_fields.append("metadata")
if resource_instance.details != finding.resource_details:
resource_instance.details = finding.resource_details
updated_fields.append("details")
if resource_instance.partition != finding.partition:
resource_instance.partition = finding.partition
updated_fields.append("partition")
if updated_fields:
with rls_transaction(tenant_id):
resource_instance.save(update_fields=updated_fields)
@@ -280,8 +267,6 @@ def perform_prowler_scan(
check_id=finding.check_id,
scan=scan_instance,
first_seen_at=last_first_seen_at,
muted=finding.muted,
compliance=finding.compliance,
)
finding_instance.add_resources([resource_instance])
@@ -359,18 +344,9 @@ def perform_prowler_scan(
total_requirements=compliance["total_requirements"],
)
)
try:
with rls_transaction(tenant_id):
ComplianceOverview.objects.bulk_create(
compliance_overview_objects, batch_size=100
)
except Exception as overview_exception:
import sentry_sdk
with rls_transaction(tenant_id):
ComplianceOverview.objects.bulk_create(compliance_overview_objects)
sentry_sdk.capture_exception(overview_exception)
logger.error(
f"Error storing compliance overview for scan {scan_id}: {overview_exception}"
)
if exception is not None:
raise exception
@@ -417,21 +393,21 @@ def aggregate_findings(tenant_id: str, scan_id: str):
).annotate(
fail=Sum(
Case(
When(status="FAIL", muted=False, then=1),
When(status="FAIL", then=1),
default=0,
output_field=IntegerField(),
)
),
_pass=Sum(
Case(
When(status="PASS", muted=False, then=1),
When(status="PASS", then=1),
default=0,
output_field=IntegerField(),
)
),
muted_count=Sum(
muted=Sum(
Case(
When(muted=True, then=1),
When(status="MUTED", then=1),
default=0,
output_field=IntegerField(),
)
@@ -439,63 +415,63 @@ def aggregate_findings(tenant_id: str, scan_id: str):
total=Count("id"),
new=Sum(
Case(
When(delta="new", muted=False, then=1),
When(delta="new", then=1),
default=0,
output_field=IntegerField(),
)
),
changed=Sum(
Case(
When(delta="changed", muted=False, then=1),
When(delta="changed", then=1),
default=0,
output_field=IntegerField(),
)
),
unchanged=Sum(
Case(
When(delta__isnull=True, muted=False, then=1),
When(delta__isnull=True, then=1),
default=0,
output_field=IntegerField(),
)
),
fail_new=Sum(
Case(
When(delta="new", status="FAIL", muted=False, then=1),
When(delta="new", status="FAIL", then=1),
default=0,
output_field=IntegerField(),
)
),
fail_changed=Sum(
Case(
When(delta="changed", status="FAIL", muted=False, then=1),
When(delta="changed", status="FAIL", then=1),
default=0,
output_field=IntegerField(),
)
),
pass_new=Sum(
Case(
When(delta="new", status="PASS", muted=False, then=1),
When(delta="new", status="PASS", then=1),
default=0,
output_field=IntegerField(),
)
),
pass_changed=Sum(
Case(
When(delta="changed", status="PASS", muted=False, then=1),
When(delta="changed", status="PASS", then=1),
default=0,
output_field=IntegerField(),
)
),
muted_new=Sum(
Case(
When(delta="new", muted=True, then=1),
When(delta="new", status="MUTED", then=1),
default=0,
output_field=IntegerField(),
)
),
muted_changed=Sum(
Case(
When(delta="changed", muted=True, then=1),
When(delta="changed", status="MUTED", then=1),
default=0,
output_field=IntegerField(),
)
@@ -513,7 +489,7 @@ def aggregate_findings(tenant_id: str, scan_id: str):
region=agg["resources__region"],
fail=agg["fail"],
_pass=agg["_pass"],
muted=agg["muted_count"],
muted=agg["muted"],
total=agg["total"],
new=agg["new"],
changed=agg["changed"],

View File

@@ -1,31 +1,14 @@
from datetime import datetime, timedelta, timezone
from pathlib import Path
from shutil import rmtree
from celery import chain, shared_task
from celery.utils.log import get_task_logger
from celery import shared_task
from config.celery import RLSTask
from config.django.base import DJANGO_FINDINGS_BATCH_SIZE, DJANGO_TMP_OUTPUT_DIRECTORY
from django_celery_beat.models import PeriodicTask
from tasks.jobs.connection import check_provider_connection
from tasks.jobs.deletion import delete_provider, delete_tenant
from tasks.jobs.export import (
OUTPUT_FORMATS_MAPPING,
_compress_output_files,
_generate_output_directory,
_upload_to_s3,
)
from tasks.jobs.scan import aggregate_findings, perform_prowler_scan
from tasks.utils import batched, get_next_execution_datetime
from tasks.utils import get_next_execution_datetime
from api.db_utils import rls_transaction
from api.decorators import set_tenant
from api.models import Finding, Provider, Scan, ScanSummary, StateChoices
from api.utils import initialize_prowler_provider
from api.v1.serializers import ScanTaskSerializer
from prowler.lib.outputs.finding import Finding as FindingOutput
logger = get_task_logger(__name__)
from api.models import Scan, StateChoices
@shared_task(base=RLSTask, name="provider-connection-check")
@@ -45,10 +28,9 @@ def check_provider_connection_task(provider_id: str):
return check_provider_connection(provider_id=provider_id)
@shared_task(
base=RLSTask, name="provider-deletion", queue="deletion", autoretry_for=(Exception,)
)
def delete_provider_task(provider_id: str, tenant_id: str):
@shared_task(base=RLSTask, name="provider-deletion")
@set_tenant
def delete_provider_task(provider_id: str):
"""
Task to delete a specific Provider instance.
@@ -56,7 +38,6 @@ def delete_provider_task(provider_id: str, tenant_id: str):
Args:
provider_id (str): The primary key of the `Provider` instance to be deleted.
tenant_id (str): Tenant ID the provider belongs to.
Returns:
tuple: A tuple containing:
@@ -64,7 +45,7 @@ def delete_provider_task(provider_id: str, tenant_id: str):
- A dictionary with the count of deleted instances per model,
including related models if cascading deletes were triggered.
"""
return delete_provider(tenant_id=tenant_id, pk=provider_id)
return delete_provider(pk=provider_id)
@shared_task(base=RLSTask, name="scan-perform", queue="scans")
@@ -87,22 +68,13 @@ def perform_scan_task(
Returns:
dict: The result of the scan execution, typically including the status and results of the performed checks.
"""
result = perform_prowler_scan(
return perform_prowler_scan(
tenant_id=tenant_id,
scan_id=scan_id,
provider_id=provider_id,
checks_to_execute=checks_to_execute,
)
chain(
perform_scan_summary_task.si(tenant_id, scan_id),
generate_outputs.si(
scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
),
).apply_async()
return result
@shared_task(base=RLSTask, bind=True, name="scan-perform-scheduled", queue="scans")
def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
@@ -130,43 +102,6 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
periodic_task_instance = PeriodicTask.objects.get(
name=f"scan-perform-scheduled-{provider_id}"
)
executed_scan = Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
task__task_runner_task__task_id=task_id,
).order_by("completed_at")
if (
Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
scheduler_task_id=periodic_task_instance.id,
scheduled_at__date=datetime.now(timezone.utc).date(),
).exists()
or executed_scan.exists()
):
# Duplicated task execution due to visibility timeout or scan is already running
logger.warning(f"Duplicated scheduled scan for provider {provider_id}.")
try:
affected_scan = executed_scan.first()
if not affected_scan:
raise ValueError(
"Error retrieving affected scan details after detecting duplicated scheduled "
"scan."
)
# Return the affected scan details to avoid losing data
serializer = ScanTaskSerializer(instance=affected_scan)
except Exception as duplicated_scan_exception:
logger.error(
f"Duplicated scheduled scan for provider {provider_id}. Error retrieving affected scan details: "
f"{str(duplicated_scan_exception)}"
)
raise duplicated_scan_exception
return serializer.data
next_scan_datetime = get_next_execution_datetime(task_id, provider_id)
scan_instance, _ = Scan.objects.get_or_create(
tenant_id=tenant_id,
@@ -174,11 +109,7 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
scheduler_task_id=periodic_task_instance.id,
defaults={
"state": StateChoices.SCHEDULED,
"name": "Daily scheduled scan",
"scheduled_at": next_scan_datetime - timedelta(days=1),
},
defaults={"state": StateChoices.SCHEDULED},
)
scan_instance.task_id = task_id
@@ -204,13 +135,12 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
scheduler_task_id=periodic_task_instance.id,
)
chain(
perform_scan_summary_task.si(tenant_id, scan_instance.id),
generate_outputs.si(
scan_id=str(scan_instance.id), provider_id=provider_id, tenant_id=tenant_id
),
).apply_async()
perform_scan_summary_task.apply_async(
kwargs={
"tenant_id": tenant_id,
"scan_id": str(scan_instance.id),
}
)
return result
@@ -219,116 +149,6 @@ def perform_scan_summary_task(tenant_id: str, scan_id: str):
return aggregate_findings(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(name="tenant-deletion", queue="deletion", autoretry_for=(Exception,))
@shared_task(name="tenant-deletion")
def delete_tenant_task(tenant_id: str):
return delete_tenant(pk=tenant_id)
@shared_task(
base=RLSTask,
name="scan-report",
queue="scan-reports",
)
@set_tenant(keep_tenant=True)
def generate_outputs(scan_id: str, provider_id: str, tenant_id: str):
"""
Process findings in batches and generate output files in multiple formats.
This function retrieves findings associated with a scan, processes them
in batches of 50, and writes each batch to the corresponding output files.
It reuses output writer instances across batches, updates them with each
batch of transformed findings, and uses a flag to indicate when the final
batch is being processed. Finally, the output files are compressed and
uploaded to S3.
Args:
tenant_id (str): The tenant identifier.
scan_id (str): The scan identifier.
provider_id (str): The provider_id id to be used in generating outputs.
"""
# Check if the scan has findings
if not ScanSummary.objects.filter(scan_id=scan_id).exists():
logger.info(f"No findings found for scan {scan_id}")
return {"upload": False}
# Initialize the prowler provider
prowler_provider = initialize_prowler_provider(Provider.objects.get(id=provider_id))
# Get the provider UID
provider_uid = Provider.objects.get(id=provider_id).uid
# Generate and ensure the output directory exists
output_directory = _generate_output_directory(
DJANGO_TMP_OUTPUT_DIRECTORY, provider_uid, tenant_id, scan_id
)
# Define auxiliary variables
output_writers = {}
scan_summary = FindingOutput._transform_findings_stats(
ScanSummary.objects.filter(scan_id=scan_id)
)
# Retrieve findings queryset
findings_qs = Finding.all_objects.filter(scan_id=scan_id).order_by("uid")
# Process findings in batches
for batch, is_last_batch in batched(
findings_qs.iterator(), DJANGO_FINDINGS_BATCH_SIZE
):
finding_outputs = [
FindingOutput.transform_api_finding(finding, prowler_provider)
for finding in batch
]
# Generate output files
for mode, config in OUTPUT_FORMATS_MAPPING.items():
kwargs = dict(config.get("kwargs", {}))
if mode == "html":
kwargs["provider"] = prowler_provider
kwargs["stats"] = scan_summary
writer_class = config["class"]
if writer_class in output_writers:
writer = output_writers[writer_class]
writer.transform(finding_outputs)
writer.close_file = is_last_batch
else:
writer = writer_class(
findings=finding_outputs,
file_path=output_directory,
file_extension=config["suffix"],
from_cli=False,
)
writer.close_file = is_last_batch
output_writers[writer_class] = writer
# Write the current batch using the writer
writer.batch_write_data_to_file(**kwargs)
# TODO: Refactor the output classes to avoid this manual reset
writer._data = []
# Compress output files
output_directory = _compress_output_files(output_directory)
# Save to configured storage
uploaded = _upload_to_s3(tenant_id, output_directory, scan_id)
if uploaded:
# Remove the local files after upload
try:
rmtree(Path(output_directory).parent, ignore_errors=True)
except FileNotFoundError as e:
logger.error(f"Error deleting output files: {e}")
output_directory = uploaded
uploaded = True
else:
uploaded = False
# Update the scan instance with the output path
Scan.all_objects.filter(id=scan_id).update(output_location=output_directory)
logger.info(f"Scan output files generated, output location: {output_directory}")
return {"upload": uploaded}

View File

@@ -9,19 +9,17 @@ from api.models import Provider, Tenant
class TestDeleteProvider:
def test_delete_provider_success(self, providers_fixture):
instance = providers_fixture[0]
tenant_id = str(instance.tenant_id)
result = delete_provider(tenant_id, instance.id)
result = delete_provider(instance.id)
assert result
with pytest.raises(ObjectDoesNotExist):
Provider.objects.get(pk=instance.id)
def test_delete_provider_does_not_exist(self, tenants_fixture):
tenant_id = str(tenants_fixture[0].id)
def test_delete_provider_does_not_exist(self):
non_existent_pk = "babf6796-cfcc-4fd3-9dcf-88d012247645"
with pytest.raises(ObjectDoesNotExist):
delete_provider(tenant_id, non_existent_pk)
delete_provider(non_existent_pk)
@pytest.mark.django_db

View File

@@ -1,4 +1,3 @@
import json
import uuid
from unittest.mock import MagicMock, patch
@@ -8,7 +7,6 @@ from tasks.jobs.scan import (
_store_resources,
perform_prowler_scan,
)
from tasks.utils import CustomEncoder
from api.models import (
Finding,
@@ -109,13 +107,7 @@ class TestPerformScan:
finding.service_name = "service_name"
finding.resource_type = "resource_type"
finding.resource_tags = {"tag1": "value1", "tag2": "value2"}
finding.muted = False
finding.raw = {}
finding.resource_metadata = {"test": "metadata"}
finding.resource_details = {"details": "test"}
finding.partition = "partition"
finding.muted = True
finding.compliance = {"compliance1": "PASS"}
# Mock the ProwlerScan instance
mock_prowler_scan_instance = MagicMock()
@@ -153,8 +145,6 @@ class TestPerformScan:
assert scan_finding.severity == finding.severity
assert scan_finding.check_id == finding.check_id
assert scan_finding.raw_result == finding.raw
assert scan_finding.muted
assert scan_finding.compliance == finding.compliance
assert scan_resource.tenant == tenant
assert scan_resource.uid == finding.resource_uid
@@ -162,11 +152,6 @@ class TestPerformScan:
assert scan_resource.service == finding.service_name
assert scan_resource.type == finding.resource_type
assert scan_resource.name == finding.resource_name
assert scan_resource.metadata == json.dumps(
finding.resource_metadata, cls=CustomEncoder
)
assert scan_resource.details == f"{finding.resource_details}"
assert scan_resource.partition == finding.partition
# Assert that the resource tags have been created and associated
tags = scan_resource.tags.all()

View File

@@ -4,7 +4,7 @@ from unittest.mock import patch
import pytest
from django_celery_beat.models import IntervalSchedule, PeriodicTask
from django_celery_results.models import TaskResult
from tasks.utils import batched, get_next_execution_datetime
from tasks.utils import get_next_execution_datetime
@pytest.mark.django_db
@@ -74,29 +74,3 @@ class TestGetNextExecutionDatetime:
get_next_execution_datetime(
task_id=task_result.task_id, provider_id="nonexistent"
)
class TestBatchedFunction:
def test_empty_iterable(self):
result = list(batched([], 3))
assert result == [([], True)]
def test_exact_batches(self):
result = list(batched([1, 2, 3, 4], 2))
expected = [([1, 2], False), ([3, 4], False), ([], True)]
assert result == expected
def test_inexact_batches(self):
result = list(batched([1, 2, 3, 4, 5], 2))
expected = [([1, 2], False), ([3, 4], False), ([5], True)]
assert result == expected
def test_batch_size_one(self):
result = list(batched([1, 2, 3], 1))
expected = [([1], False), ([2], False), ([3], False), ([], True)]
assert result == expected
def test_batch_size_greater_than_length(self):
result = list(batched([1, 2, 3], 5))
expected = [([1, 2, 3], True)]
assert result == expected

View File

@@ -1,32 +1,9 @@
import json
from datetime import datetime, timedelta, timezone
from enum import Enum
from django_celery_beat.models import PeriodicTask
from django_celery_results.models import TaskResult
class CustomEncoder(json.JSONEncoder):
def default(self, o):
# Enum serialization
if isinstance(o, Enum):
return o.value
# Datetime and timedelta serialization
if isinstance(o, datetime):
return o.isoformat(timespec="seconds")
if isinstance(o, timedelta):
return o.total_seconds()
# Custom object serialization
try:
return super().default(o)
except TypeError:
try:
return o.__dict__
except AttributeError:
return str(o)
def get_next_execution_datetime(task_id: int, provider_id: str) -> datetime:
task_instance = TaskResult.objects.get(task_id=task_id)
try:
@@ -47,27 +24,3 @@ def get_next_execution_datetime(task_id: int, provider_id: str) -> datetime:
)
return current_scheduled_time + timedelta(**{interval.period: interval.every})
def batched(iterable, batch_size):
"""
Yield successive batches from an iterable.
Args:
iterable: An iterable source of items.
batch_size (int): The number of items per batch.
Yields:
tuple: A pair (batch, is_last_batch) where:
- batch (list): A list of items (with length equal to batch_size,
except possibly for the last batch).
- is_last_batch (bool): True if this is the final batch, False otherwise.
"""
batch = []
for item in iterable:
batch.append(item)
if len(batch) == batch_size:
yield batch, False
batch = []
yield batch, True

View File

@@ -89,7 +89,7 @@ for accountId in $ACCOUNTS_IN_ORGS; do
# Run Prowler
echo -e "Assessing AWS Account: $accountId, using Role: $ROLE on $(date)"
# remove -g cislevel for a full report and add other formats if needed
./prowler/prowler-cli.py --role arn:"$PARTITION":iam::"$accountId":role/"$ROLE" --compliance cis_1.5_aws -M html
./prowler/prowler.py --role arn:"$PARTITION":iam::"$accountId":role/"$ROLE" --compliance cis_1.5_aws -M html
echo "Report stored locally at: prowler/output/ directory"
TOTAL_SEC=$((SECONDS - START_TIME))
echo -e "Completed AWS Account: $accountId, using Role: $ROLE on $(date)"

View File

@@ -17,7 +17,7 @@ spec:
image: toniblyx/prowler:latest
imagePullPolicy: Always
command:
- "./prowler-cli.py"
- "./prowler.py"
args: [ "-B", "$(awsS3Bucket)" ]
env:
- name: AWS_ACCESS_KEY_ID

View File

@@ -562,11 +562,8 @@ def get_section_containers_format1(data, section_1, section_2):
direct_internal_items.append(internal_section_container)
# Cut the title if it's too long
tittle_external = section[:70] + " ..." if len(section) > 70 else section
accordion_item = dbc.AccordionItem(
title=f"{tittle_external}", children=direct_internal_items
title=f"{section}", children=direct_internal_items
)
section_container = html.Div(
[

View File

@@ -1,24 +0,0 @@
import warnings
from dashboard.common_methods import get_section_containers_cis
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_DESCRIPTION",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
].copy()
return get_section_containers_cis(
aux, "REQUIREMENTS_ID", "REQUIREMENTS_ATTRIBUTES_SECTION"
)

View File

@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_container_iso
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ATTRIBUTES_CATEGORY",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_NAME",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_container_iso(
aux, "REQUIREMENTS_ATTRIBUTES_CATEGORY", "REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID"
)

View File

@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_container_iso
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ATTRIBUTES_CATEGORY",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_NAME",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_container_iso(
aux, "REQUIREMENTS_ATTRIBUTES_CATEGORY", "REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID"
)

View File

@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_container_iso
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ATTRIBUTES_CATEGORY",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_NAME",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_container_iso(
aux, "REQUIREMENTS_ATTRIBUTES_CATEGORY", "REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID"
)

View File

@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_container_iso
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ATTRIBUTES_CATEGORY",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_NAME",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_container_iso(
aux, "REQUIREMENTS_ATTRIBUTES_CATEGORY", "REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID"
)

View File

@@ -1,6 +1,6 @@
import warnings
from dashboard.common_methods import get_section_containers_format1
from dashboard.common_methods import get_section_containers_format4
warnings.filterwarnings("ignore")
@@ -9,15 +9,12 @@ def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
].copy()
]
return get_section_containers_format1(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)
return get_section_containers_format4(aux, "REQUIREMENTS_ID")

View File

@@ -1,24 +0,0 @@
import warnings
from dashboard.common_methods import get_section_containers_format3
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"REQUIREMENTS_DESCRIPTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_containers_format3(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)

View File

@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_containers_format3
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"REQUIREMENTS_DESCRIPTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_containers_format3(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)

View File

@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_containers_format3
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"REQUIREMENTS_DESCRIPTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_containers_format3(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)

View File

@@ -1,24 +0,0 @@
import warnings
from dashboard.common_methods import get_section_containers_format3
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ID",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"REQUIREMENTS_DESCRIPTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_containers_format3(
aux, "REQUIREMENTS_ATTRIBUTES_SECTION", "REQUIREMENTS_ID"
)

View File

@@ -76,6 +76,7 @@ def load_csv_files(csv_files):
result = result.replace("_AZURE", " - AZURE")
if "KUBERNETES" in result:
result = result.replace("_KUBERNETES", " - KUBERNETES")
result = result[result.find("CIS_") :]
results.append(result)
unique_results = set(results)

View File

@@ -165,21 +165,9 @@ else:
)
# For the timestamp, remove the two columns and keep only the date
data["TIMESTAMP"] = pd.to_datetime(data["TIMESTAMP"])
# Handle findings from v3 outputs
if "FINDING_UNIQUE_ID" in data.columns:
data.rename(columns={"FINDING_UNIQUE_ID": "FINDING_UID"}, inplace=True)
if "ACCOUNT_ID" in data.columns:
data.rename(columns={"ACCOUNT_ID": "ACCOUNT_UID"}, inplace=True)
if "ASSESSMENT_START_TIME" in data.columns:
data.rename(columns={"ASSESSMENT_START_TIME": "TIMESTAMP"}, inplace=True)
if "RESOURCE_ID" in data.columns:
data.rename(columns={"RESOURCE_ID": "RESOURCE_UID"}, inplace=True)
# Remove dupplicates on the finding_uid colummn but keep the last one taking into account the timestamp
data = data.sort_values("TIMESTAMP").drop_duplicates("FINDING_UID", keep="last")
data["ASSESSMENT_TIME"] = data["TIMESTAMP"].dt.strftime("%Y-%m-%d")
data["ASSESSMENT_TIME"] = data["TIMESTAMP"].dt.strftime("%Y-%m-%d %H:%M:%S")
data_valid = pd.DataFrame()
for account in data["ACCOUNT_UID"].unique():
all_times = data[data["ACCOUNT_UID"] == account]["ASSESSMENT_TIME"].unique()

View File

@@ -16,7 +16,6 @@ services:
volumes:
- "./api/src/backend:/home/prowler/backend"
- "./api/pyproject.toml:/home/prowler/pyproject.toml"
- "/tmp/prowler_api_output:/tmp/prowler_api_output"
depends_on:
postgres:
condition: service_healthy
@@ -86,8 +85,6 @@ services:
env_file:
- path: .env
required: false
volumes:
- "/tmp/prowler_api_output:/tmp/prowler_api_output"
depends_on:
valkey:
condition: service_healthy

View File

@@ -7,8 +7,6 @@ services:
required: false
ports:
- "${DJANGO_PORT:-8080}:${DJANGO_PORT:-8080}"
volumes:
- "/tmp/prowler_api_output:/tmp/prowler_api_output"
depends_on:
postgres:
condition: service_healthy
@@ -67,8 +65,6 @@ services:
env_file:
- path: .env
required: false
volumes:
- "/tmp/prowler_api_output:/tmp/prowler_api_output"
depends_on:
valkey:
condition: service_healthy

View File

@@ -294,7 +294,7 @@ Each Prowler check has metadata associated which is stored at the same level of
# Code holds different methods to remediate the FAIL finding
"Code": {
# CLI holds the command in the provider native CLI to remediate it
"CLI": "aws ec2 modify-image-attribute --region <REGION> --image-id <EC2_AMI_ID> --launch-permission {\"Remove\":[{\"Group\":\"all\"}]}",
"CLI": "https://docs.prowler.com/checks/public_8#cli-command",
# NativeIaC holds the native IaC code to remediate it, use "https://docs.bridgecrew.io/docs"
"NativeIaC": "",
# Other holds the other commands, scripts or code to remediate it, use "https://www.trendmicro.com/cloudoneconformity"

View File

@@ -18,7 +18,7 @@ This file should inside the *.vscode* folder and its name has to be *launch.json
"name": "Debug AWS Check",
"type": "debugpy",
"request": "launch",
"program": "prowler-cli.py",
"program": "prowler.py",
"args": [
"aws",
"--log-level",
@@ -33,7 +33,7 @@ This file should inside the *.vscode* folder and its name has to be *launch.json
"name": "Debug Azure Check",
"type": "debugpy",
"request": "launch",
"program": "prowler-cli.py",
"program": "prowler.py",
"args": [
"azure",
"--sp-env-auth",
@@ -49,7 +49,7 @@ This file should inside the *.vscode* folder and its name has to be *launch.json
"name": "Debug GCP Check",
"type": "debugpy",
"request": "launch",
"program": "prowler-cli.py",
"program": "prowler.py",
"args": [
"gcp",
"--log-level",
@@ -64,7 +64,7 @@ This file should inside the *.vscode* folder and its name has to be *launch.json
"name": "Debug K8s Check",
"type": "debugpy",
"request": "launch",
"program": "prowler-cli.py",
"program": "prowler.py",
"args": [
"kubernetes",
"--log-level",

View File

@@ -19,13 +19,8 @@ For isolation and to avoid conflicts with other environments, we recommend using
Then install all dependencies including the ones for developers:
```
poetry install --with dev
eval $(poetry env activate) \
poetry shell
```
> [!IMPORTANT]
> Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
>
> If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
> In case you have any doubts, consult the Poetry environment activation guide: https://python-poetry.org/docs/managing-environments/#activating-the-environment
## Contributing with your code or fixes to Prowler

View File

@@ -175,7 +175,6 @@ Due to the complexity and differences of each provider use the rest of the provi
- [GCP](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/gcp_provider.py)
- [Azure](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/azure_provider.py)
- [Kubernetes](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/kubernetes_provider.py)
- [Microsoft365](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/microsoft365/microsoft365_provider.py)
To facilitate understanding here is a pseudocode of how the most basic provider could be with examples.

View File

@@ -237,4 +237,3 @@ It is really important to check if the current Prowler's permissions for each pr
- AWS: https://docs.prowler.cloud/en/latest/getting-started/requirements/#aws-authentication
- Azure: https://docs.prowler.cloud/en/latest/getting-started/requirements/#permissions
- GCP: https://docs.prowler.cloud/en/latest/getting-started/requirements/#gcp-authentication
- Microsoft365: https://docs.prowler.cloud/en/latest/getting-started/requirements/#microsoft365-authentication

View File

@@ -59,7 +59,7 @@ export AZURE_CLIENT_SECRET="XXXXXXX"
```
If you try to execute Prowler with the `--sp-env-auth` flag and those variables are empty or not exported, the execution is going to fail.
Follow the instructions in the [Create Prowler Service Principal](../tutorials/azure/create-prowler-service-principal.md#how-to-create-prowler-service-principal-application) section to create a service principal.
Follow the instructions in the [Create Prowler Service Principal](../tutorials/azure/create-prowler-service-principal.md#how-to-create-prowler-service-principal) section to create a service principal.
### AZ CLI / Browser / Managed Identity authentication
@@ -79,7 +79,7 @@ Prowler for Azure needs two types of permission scopes to be set:
???+ note
Please, notice that the field `assignableScopes` in the JSON custom role file must be changed to be the subscription or management group where the role is going to be assigned. The valid formats for the field are `/subscriptions/<subscription-id>` or `/providers/Microsoft.Management/managementGroups/<management-group-id>`.
To assign the permissions, follow the instructions in the [Microsoft Entra ID permissions](../tutorials/azure/create-prowler-service-principal.md#assigning-the-proper-permissions) section and the [Azure subscriptions permissions](../tutorials/azure/subscriptions.md#assign-the-appropriate-permissions-to-the-identity-that-is-going-to-be-assumed-by-prowler) section, respectively.
To assign the permissions, follow the instructions in the [Microsoft Entra ID permissions](../tutorials/azure/create-prowler-service-principal.md#assigning-the-proper-permissions) section and the [Azure subscriptions permissions](../tutorials/azure/subscriptions.md#assigning-proper-permissions) section, respectively.
#### Checks that require ProwlerRole
@@ -102,32 +102,3 @@ Those credentials must be associated to a user or service account with proper pe
???+ note
By default, `prowler` will scan all accessible GCP Projects, use flag `--project-ids` to specify the projects to be scanned.
## Microsoft365
Prowler for Microsoft365 currently supports the following authentication types:
- [Service principal application](https://learn.microsoft.com/en-us/entra/identity-platform/app-objects-and-service-principals?tabs=browser#service-principal-object) (recommended).
- Current az cli credentials stored.
- Interactive browser authentication.
???+ warning
For Prowler App only the Service Principal with an application authentication method is supported.
### Service Principal authentication
To allow Prowler assume the service principal identity to start the scan it is needed to configure the following environment variables:
```console
export AZURE_CLIENT_ID="XXXXXXXXX"
export AZURE_CLIENT_SECRET="XXXXXXXXX"
export AZURE_TENANT_ID="XXXXXXXXX"
```
If you try to execute Prowler with the `--sp-env-auth` flag and those variables are empty or not exported, the execution is going to fail.
Follow the instructions in the [Create Prowler Service Principal](../tutorials/azure/create-prowler-service-principal.md) section to create a service principal.
### Interactive Browser authentication
To use `--browser-auth` the user needs to authenticate against Azure using the default browser to start the scan, also `--tenant-id` flag is required.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 330 KiB

Some files were not shown because too many files have changed in this diff Show More