Compare commits

..

1 Commits

Author SHA1 Message Date
Rubén De la Torre Vico 06a91aa9ff docs: update the changelog 2025-06-02 12:10:09 +02:00
678 changed files with 8357 additions and 41262 deletions
+1 -14
View File
@@ -6,15 +6,11 @@
PROWLER_UI_VERSION="stable"
AUTH_URL=http://localhost:3000
API_BASE_URL=http://prowler-api:8080/api/v1
NEXT_PUBLIC_API_BASE_URL=${API_BASE_URL}
NEXT_PUBLIC_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
AUTH_TRUST_HOST=true
UI_PORT=3000
# openssl rand -base64 32
AUTH_SECRET="N/c6mnaS5+SWq81+819OrzQZlmx1Vxtp/orjttJSmw8="
# Google Tag Manager ID
NEXT_PUBLIC_GOOGLE_TAG_MANAGER_ID=""
#### Prowler API Configuration ####
PROWLER_API_VERSION="stable"
@@ -131,7 +127,7 @@ SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.7.5
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.6.0
# Social login credentials
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
@@ -141,12 +137,3 @@ SOCIAL_GOOGLE_OAUTH_CLIENT_SECRET=""
SOCIAL_GITHUB_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/github"
SOCIAL_GITHUB_OAUTH_CLIENT_ID=""
SOCIAL_GITHUB_OAUTH_CLIENT_SECRET=""
# Single Sign-On (SSO)
SAML_SSO_CALLBACK_URL="${AUTH_URL}/api/auth/callback/saml"
# Lighthouse tracing
LANGSMITH_TRACING=false
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY=""
LANGCHAIN_PROJECT=""
-5
View File
@@ -27,11 +27,6 @@ provider/github:
- any-glob-to-any-file: "prowler/providers/github/**"
- any-glob-to-any-file: "tests/providers/github/**"
provider/iac:
- changed-files:
- any-glob-to-any-file: "prowler/providers/iac/**"
- any-glob-to-any-file: "tests/providers/iac/**"
github_actions:
- changed-files:
- any-glob-to-any-file: ".github/workflows/*"
@@ -76,12 +76,12 @@ jobs:
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push container image (latest)
# Comment the following line for testing
if: github.event_name == 'push'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.WORKING_DIRECTORY }}
# Set push: false for testing
@@ -94,7 +94,7 @@ jobs:
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.WORKING_DIRECTORY }}
push: true
+2 -2
View File
@@ -48,12 +48,12 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/api-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
with:
category: "/language:${{matrix.language}}"
+10 -31
View File
@@ -28,10 +28,6 @@ env:
VALKEY_DB: 0
API_WORKING_DIR: ./api
IMAGE_NAME: prowler-api
IGNORE_FILES: |
api/docs/**
api/README.md
api/CHANGELOG.md
jobs:
test:
@@ -82,7 +78,12 @@ jobs:
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: api/**
files_ignore: ${{ env.IGNORE_FILES }}
files_ignore: |
api/.github/**
api/docs/**
api/permissions/**
api/README.md
api/mkdocs.yml
- name: Replace @master with current branch in pyproject.toml
working-directory: ./api
@@ -112,12 +113,6 @@ jobs:
python-version: ${{ matrix.python-version }}
cache: "poetry"
- name: Install system dependencies for xmlsec
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
sudo apt-get update
sudo apt-get install -y libxml2-dev libxmlsec1-dev libxmlsec1-openssl pkg-config
- name: Install dependencies
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
@@ -136,12 +131,6 @@ jobs:
run: |
poetry check --lock
- name: Prevents known compatibility error between lxml and libxml2/libxmlsec versions - https://github.com/xmlsec/python-xmlsec/issues/320
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry run pip install --force-reinstall --no-binary lxml lxml
- name: Lint with ruff
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
@@ -169,9 +158,8 @@ jobs:
- name: Safety
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
# 76352, 76353, 77323 come from SDK, but they cannot upgrade it yet. It does not affect API
run: |
poetry run safety check --ignore 70612,66963,74429,76352,76353,77323
poetry run safety check --ignore 70612,66963,74429
- name: Vulture
working-directory: ./api
@@ -193,7 +181,7 @@ jobs:
- name: Upload coverage reports to Codecov
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3
uses: codecov/codecov-action@ad3126e916f78f00edff4ed0317cf185271ccc2d # v5.4.2
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
@@ -202,19 +190,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Test if changes are in not ignored paths
id: are-non-ignored-files-changed
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: api/**
files_ignore: ${{ env.IGNORE_FILES }}
- name: Set up Docker Buildx
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.API_WORKING_DIR }}
push: false
+1 -1
View File
@@ -11,7 +11,7 @@ jobs:
with:
fetch-depth: 0
- name: TruffleHog OSS
uses: trufflesecurity/trufflehog@6641d4ba5b684fffe195b9820345de1bf19f3181 # v3.89.2
uses: trufflesecurity/trufflehog@b06f6d72a3791308bb7ba59c2b8cb7a083bd17e4 # v3.88.26
with:
path: ./
base: ${{ github.event.repository.default_branch }}
@@ -1,86 +0,0 @@
name: Check Changelog
on:
pull_request:
types: [opened, synchronize, reopened, labeled, unlabeled]
jobs:
check-changelog:
if: contains(github.event.pull_request.labels.*.name, 'no-changelog') == false
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
pull-requests: write
env:
MONITORED_FOLDERS: "api ui prowler"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Get list of changed files
id: changed_files
run: |
git fetch origin ${{ github.base_ref }}
git diff --name-only origin/${{ github.base_ref }}...HEAD > changed_files.txt
cat changed_files.txt
- name: Check for folder changes and changelog presence
id: check_folders
run: |
missing_changelogs=""
for folder in $MONITORED_FOLDERS; do
if grep -q "^${folder}/" changed_files.txt; then
echo "Detected changes in ${folder}/"
if ! grep -q "^${folder}/CHANGELOG.md$" changed_files.txt; then
echo "No changelog update found for ${folder}/"
missing_changelogs="${missing_changelogs}- \`${folder}\`\n"
fi
fi
done
echo "missing_changelogs<<EOF" >> $GITHUB_OUTPUT
echo -e "${missing_changelogs}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Find existing changelog comment
if: github.event.pull_request.head.repo.full_name == github.repository
id: find_comment
uses: peter-evans/find-comment@3eae4d37986fb5a8592848f6a574fdf654e61f9e #v3.1.0
with:
issue-number: ${{ github.event.pull_request.number }}
comment-author: 'github-actions[bot]'
body-includes: '<!-- changelog-check -->'
- name: Comment on PR if changelog is missing
if: github.event.pull_request.head.repo.full_name == github.repository && steps.check_folders.outputs.missing_changelogs != ''
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
issue-number: ${{ github.event.pull_request.number }}
comment-id: ${{ steps.find_comment.outputs.comment-id }}
body: |
<!-- changelog-check -->
⚠️ **Changes detected in the following folders without a corresponding update to the `CHANGELOG.md`:**
${{ steps.check_folders.outputs.missing_changelogs }}
Please add an entry to the corresponding `CHANGELOG.md` file to maintain a clear history of changes.
- name: Comment on PR if all changelogs are present
if: github.event.pull_request.head.repo.full_name == github.repository && steps.check_folders.outputs.missing_changelogs == ''
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
issue-number: ${{ github.event.pull_request.number }}
comment-id: ${{ steps.find_comment.outputs.comment-id }}
body: |
<!-- changelog-check -->
✅ All necessary `CHANGELOG.md` files have been updated. Great job! 🎉
- name: Fail if changelog is missing
if: steps.check_folders.outputs.missing_changelogs != ''
run: |
echo "ERROR: Missing changelog updates in some folders."
exit 1
@@ -123,11 +123,11 @@ jobs:
AWS_REGION: ${{ env.AWS_REGION }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push container image (latest)
if: github.event_name == 'push'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
push: true
tags: |
@@ -140,7 +140,7 @@ jobs:
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
# Use local context to get changes
# https://github.com/docker/build-push-action#path-context
+2 -2
View File
@@ -56,12 +56,12 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/sdk-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
with:
category: "/language:${{matrix.language}}"
+1 -16
View File
@@ -212,21 +212,6 @@ jobs:
run: |
poetry run pytest -n auto --cov=./prowler/providers/m365 --cov-report=xml:m365_coverage.xml tests/providers/m365
# Test IaC
- name: IaC - Check if any file has changed
id: iac-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/iac/**
./tests/providers/iac/**
.poetry.lock
- name: IaC - Test
if: steps.iac-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/iac --cov-report=xml:iac_coverage.xml tests/providers/iac
# Common Tests
- name: Lib - Test
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
@@ -241,7 +226,7 @@ jobs:
# Codecov
- name: Upload coverage reports to Codecov
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3
uses: codecov/codecov-action@ad3126e916f78f00edff4ed0317cf185271ccc2d # v5.4.2
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
@@ -38,7 +38,7 @@ jobs:
pip install boto3
- name: Configure AWS Credentials -- DEV
uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df # v4.2.1
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
aws-region: ${{ env.AWS_REGION_DEV }}
role-to-assume: ${{ secrets.DEV_IAM_ROLE_ARN }}
@@ -30,7 +30,6 @@ env:
# Container Registries
PROWLERCLOUD_DOCKERHUB_REPOSITORY: prowlercloud
PROWLERCLOUD_DOCKERHUB_IMAGE: prowler-ui
NEXT_PUBLIC_API_BASE_URL: http://prowler-api:8080/api/v1
jobs:
repository-check:
@@ -77,17 +76,16 @@ jobs:
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push container image (latest)
# Comment the following line for testing
if: github.event_name == 'push'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=${{ env.SHORT_SHA }}
NEXT_PUBLIC_API_BASE_URL=${{ env.NEXT_PUBLIC_API_BASE_URL }}
# Set push: false for testing
push: true
tags: |
@@ -98,12 +96,11 @@ jobs:
- name: Build and push container image (release)
if: github.event_name == 'release'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v${{ env.RELEASE_TAG }}
NEXT_PUBLIC_API_BASE_URL=${{ env.NEXT_PUBLIC_API_BASE_URL }}
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.RELEASE_TAG }}
+2 -2
View File
@@ -48,12 +48,12 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/ui-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
with:
category: "/language:${{matrix.language}}"
+3 -52
View File
@@ -34,72 +34,23 @@ jobs:
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
cache-dependency-path: './ui/package-lock.json'
- name: Install dependencies
working-directory: ./ui
run: npm ci
run: npm install
- name: Run Healthcheck
working-directory: ./ui
run: npm run healthcheck
- name: Build the application
working-directory: ./ui
run: npm run build
e2e-tests:
runs-on: ubuntu-latest
env:
AUTH_SECRET: 'fallback-ci-secret-for-testing'
AUTH_TRUST_HOST: true
NEXTAUTH_URL: http://localhost:3000
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: '20.x'
cache: 'npm'
cache-dependency-path: './ui/package-lock.json'
- name: Install dependencies
working-directory: ./ui
run: npm ci
- name: Cache Playwright browsers
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
id: playwright-cache
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ hashFiles('ui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-playwright-
- name: Install Playwright browsers
working-directory: ./ui
if: steps.playwright-cache.outputs.cache-hit != 'true'
run: npm run test:e2e:install
- name: Build the application
working-directory: ./ui
run: npm run build
- name: Run Playwright tests
working-directory: ./ui
run: npm run test:e2e
- name: Upload Playwright report
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
if: failure()
with:
name: playwright-report
path: ui/playwright-report/
retention-days: 30
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ${{ env.UI_WORKING_DIR }}
# Always build using `prod` target
+1 -1
View File
@@ -115,7 +115,7 @@ repos:
- id: safety
name: safety
description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities"
entry: bash -c 'safety check --ignore 70612,66963,74429,76352,76353'
entry: bash -c 'safety check --ignore 70612,66963,74429'
language: system
- id: vulture
+5 -2
View File
@@ -6,8 +6,7 @@ LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
ARG POWERSHELL_VERSION=7.5.0
# hadolint ignore=DL3008
RUN apt-get update && apt-get install -y --no-install-recommends \
wget libicu72 libunwind8 libssl3 libcurl4 ca-certificates apt-transport-https gnupg \
RUN apt-get update && apt-get install -y --no-install-recommends wget libicu72 \
&& rm -rf /var/lib/apt/lists/*
# Install PowerShell
@@ -47,6 +46,10 @@ ENV PATH="${HOME}/.local/bin:${PATH}"
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry
# By default poetry does not compile Python source files to bytecode during installation.
# This speeds up the installation process, but the first execution may take a little more
# time because Python then compiles source files to bytecode automatically. If you want to
# compile source files to bytecode during installation, you can use the --compile option
RUN poetry install --compile && \
rm -rf ~/.cache/pip
+2 -2
View File
@@ -87,11 +87,11 @@ prowler dashboard
| Provider | Checks | Services | [Compliance Frameworks](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/compliance/) | [Categories](https://docs.prowler.com/projects/prowler-open-source/en/latest/tutorials/misc/#categories) |
|---|---|---|---|---|
| AWS | 567 | 82 | 36 | 10 |
| GCP | 79 | 13 | 10 | 3 |
| GCP | 79 | 13 | 9 | 3 |
| Azure | 142 | 18 | 10 | 3 |
| Kubernetes | 83 | 7 | 5 | 7 |
| GitHub | 16 | 2 | 1 | 0 |
| M365 | 69 | 7 | 3 | 2 |
| M365 | 69 | 7 | 2 | 2 |
| NHN (Unofficial) | 6 | 2 | 1 | 0 |
> [!Note]
+168
View File
@@ -0,0 +1,168 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.pyc
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
/_data/
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
*.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# VSCode
.vscode/
+91
View File
@@ -0,0 +1,91 @@
repos:
## GENERAL
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-merge-conflict
- id: check-yaml
args: ["--unsafe"]
- id: check-json
- id: end-of-file-fixer
- id: trailing-whitespace
- id: no-commit-to-branch
- id: pretty-format-json
args: ["--autofix", "--no-sort-keys", "--no-ensure-ascii"]
exclude: 'src/backend/api/fixtures/dev/.*\.json$'
## TOML
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.13.0
hooks:
- id: pretty-format-toml
args: [--autofix]
files: pyproject.toml
## BASH
- repo: https://github.com/koalaman/shellcheck-precommit
rev: v0.10.0
hooks:
- id: shellcheck
exclude: contrib
## PYTHON
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.0
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check
args: ["--directory=src"]
- id: poetry-lock
args: ["--no-update", "--directory=src"]
- repo: https://github.com/hadolint/hadolint
rev: v2.13.0-beta
hooks:
- id: hadolint
args: ["--ignore=DL3013", "Dockerfile"]
- repo: local
hooks:
- id: pylint
name: pylint
entry: bash -c 'poetry run pylint --disable=W,C,R,E -j 0 -rn -sn src/'
language: system
files: '.*\.py'
- id: trufflehog
name: TruffleHog
description: Detect secrets in your data.
entry: bash -c 'trufflehog --no-update git file://. --only-verified --fail'
# For running trufflehog in docker, use the following entry instead:
# entry: bash -c 'docker run -v "$(pwd):/workdir" -i --rm trufflesecurity/trufflehog:latest git file:///workdir --only-verified --fail'
language: system
stages: ["commit", "push"]
- id: bandit
name: bandit
description: "Bandit is a tool for finding common security issues in Python code"
entry: bash -c 'poetry run bandit -q -lll -x '*_test.py,./contrib/,./.venv/' -r .'
language: system
files: '.*\.py'
- id: safety
name: safety
description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities"
entry: bash -c 'poetry run safety check --ignore 70612,66963,74429'
language: system
- id: vulture
name: vulture
description: "Vulture finds unused code in Python programs."
entry: bash -c 'poetry run vulture --exclude "contrib,.venv,tests,conftest.py" --min-confidence 100 .'
language: system
files: '.*\.py'
+35 -71
View File
@@ -2,81 +2,44 @@
All notable changes to the **Prowler API** are documented in this file.
## [v1.10.0] (Prowler UNRELEASED)
---
## [v1.9.0] (Prowler v5.8.0)
## [v1.9.0] (Prowler UNRELEASED)
### Added
- Support GCP Service Account key [(#7824)](https://github.com/prowler-cloud/prowler/pull/7824)
- `GET /compliance-overviews` endpoints to retrieve compliance metadata and specific requirements statuses [(#7877)](https://github.com/prowler-cloud/prowler/pull/7877)
- Lighthouse configuration support [(#7848)](https://github.com/prowler-cloud/prowler/pull/7848)
### Changed
- Reworked `GET /compliance-overviews` to return proper requirement metrics [(#7877)](https://github.com/prowler-cloud/prowler/pull/7877)
- Optional `user` and `password` for M365 provider [(#7992)](https://github.com/prowler-cloud/prowler/pull/7992)
### Fixed
- Scheduled scans are no longer deleted when their daily schedule run is disabled [(#8082)](https://github.com/prowler-cloud/prowler/pull/8082)
---
## [v1.8.5] (Prowler v5.7.5)
### Fixed
- Normalize provider UID to ensure safe and unique export directory paths [(#8007)](https://github.com/prowler-cloud/prowler/pull/8007).
- Blank resource types in `/metadata` endpoints [(#8027)](https://github.com/prowler-cloud/prowler/pull/8027)
---
## [v1.8.4] (Prowler v5.7.4)
### Removed
- Reverted RLS transaction handling and DB custom backend [(#7994)](https://github.com/prowler-cloud/prowler/pull/7994)
---
## [v1.8.3] (Prowler v5.7.3)
### Added
- Database backend to handle already closed connections [(#7935)](https://github.com/prowler-cloud/prowler/pull/7935)
- Support GCP Service Account key. [(#7824)](https://github.com/prowler-cloud/prowler/pull/7824)
### Changed
- Renamed field encrypted_password to password for M365 provider [(#7784)](https://github.com/prowler-cloud/prowler/pull/7784)
### Fixed
- Transaction persistence with RLS operations [(#7916)](https://github.com/prowler-cloud/prowler/pull/7916)
- Reverted the change `get_with_retry` to use the original `get` method for retrieving tasks [(#7932)](https://github.com/prowler-cloud/prowler/pull/7932)
- Fixed the connection status verification before launching a scan [(#7831)](https://github.com/prowler-cloud/prowler/pull/7831)
---
## [v1.8.2] (Prowler v5.7.2)
### Fixed
- Task lookup to use task_kwargs instead of task_args for scan report resolution [(#7830)](https://github.com/prowler-cloud/prowler/pull/7830)
- Kubernetes UID validation to allow valid context names [(#7871)](https://github.com/prowler-cloud/prowler/pull/7871)
- Connection status verification before launching a scan [(#7831)](https://github.com/prowler-cloud/prowler/pull/7831)
- Race condition when creating background tasks [(#7876)](https://github.com/prowler-cloud/prowler/pull/7876)
- Error when modifying or retrieving tenants due to missing user UUID in transaction context [(#7890)](https://github.com/prowler-cloud/prowler/pull/7890)
- Fixed task lookup to use task_kwargs instead of task_args for scan report resolution. [(#7830)](https://github.com/prowler-cloud/prowler/pull/7830)
- Fixed Kubernetes UID validation to allow valid context names [(#7871)](https://github.com/prowler-cloud/prowler/pull/7871)
- Fixed a race condition when creating background tasks [(#7876)](https://github.com/prowler-cloud/prowler/pull/7876).
- Fixed an error when modifying or retrieving tenants due to missing user UUID in transaction context [(#7890)](https://github.com/prowler-cloud/prowler/pull/7890).
---
## [v1.8.1] (Prowler v5.7.1)
### Fixed
- Added database index to improve performance on finding lookup [(#7800)](https://github.com/prowler-cloud/prowler/pull/7800)
- Added database index to improve performance on finding lookup. [(#7800)](https://github.com/prowler-cloud/prowler/pull/7800)
---
## [v1.8.0] (Prowler v5.7.0)
### Added
- Huge improvements to `/findings/metadata` and resource related filters for findings [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690)
- Improvements to `/overviews` endpoints [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690)
- Queue to perform backfill background tasks [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690)
- New endpoints to retrieve latest findings and metadata [(#7743)](https://github.com/prowler-cloud/prowler/pull/7743)
- Export support for Prowler ThreatScore in M365 [(7783)](https://github.com/prowler-cloud/prowler/pull/7783)
- Added huge improvements to `/findings/metadata` and resource related filters for findings [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690).
- Added improvements to `/overviews` endpoints [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690).
- Added new queue to perform backfill background tasks [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690).
- Added new endpoints to retrieve latest findings and metadata [(#7743)](https://github.com/prowler-cloud/prowler/pull/7743).
- Added export support for Prowler ThreatScore in M365 [(7783)](https://github.com/prowler-cloud/prowler/pull/7783)
---
@@ -84,9 +47,9 @@ All notable changes to the **Prowler API** are documented in this file.
### Added
- M365 as a new provider [(#7563)](https://github.com/prowler-cloud/prowler/pull/7563)
- `compliance/` folder and ZIPexport functionality for all compliance reports [(#7653)](https://github.com/prowler-cloud/prowler/pull/7653)
- API endpoint to fetch and download any specific compliance file by name [(#7653)](https://github.com/prowler-cloud/prowler/pull/7653)
- Added M365 as a new provider [(#7563)](https://github.com/prowler-cloud/prowler/pull/7563).
- Added a `compliance/` folder and ZIPexport functionality for all compliance reports.[(#7653)](https://github.com/prowler-cloud/prowler/pull/7653).
- Added a new API endpoint to fetch and download any specific compliance file by name [(#7653)](https://github.com/prowler-cloud/prowler/pull/7653).
---
@@ -94,42 +57,43 @@ All notable changes to the **Prowler API** are documented in this file.
### Added
- Support for developing new integrations [(#7167)](https://github.com/prowler-cloud/prowler/pull/7167)
- HTTP Security Headers [(#7289)](https://github.com/prowler-cloud/prowler/pull/7289)
- New endpoint to get the compliance overviews metadata [(#7333)](https://github.com/prowler-cloud/prowler/pull/7333)
- Support for muted findings [(#7378)](https://github.com/prowler-cloud/prowler/pull/7378)
- Missing fields to API findings and resources [(#7318)](https://github.com/prowler-cloud/prowler/pull/7318)
- Support for developing new integrations [(#7167)](https://github.com/prowler-cloud/prowler/pull/7167).
- HTTP Security Headers [(#7289)](https://github.com/prowler-cloud/prowler/pull/7289).
- New endpoint to get the compliance overviews metadata [(#7333)](https://github.com/prowler-cloud/prowler/pull/7333).
- Support for muted findings [(#7378)](https://github.com/prowler-cloud/prowler/pull/7378).
- Added missing fields to API findings and resources [(#7318)](https://github.com/prowler-cloud/prowler/pull/7318).
---
## [v1.5.4] (Prowler v5.4.4)
### Fixed
- Bug with periodic tasks when trying to delete a provider [(#7466)](https://github.com/prowler-cloud/prowler/pull/7466)
- Fixed a bug with periodic tasks when trying to delete a provider ([#7466])(https://github.com/prowler-cloud/prowler/pull/7466).
---
## [v1.5.3] (Prowler v5.4.3)
### Fixed
- Duplicated scheduled scans handling [(#7401)](https://github.com/prowler-cloud/prowler/pull/7401)
- Environment variable to configure the deletion task batch size [(#7423)](https://github.com/prowler-cloud/prowler/pull/7423)
- Added duplicated scheduled scans handling ([#7401])(https://github.com/prowler-cloud/prowler/pull/7401).
- Added environment variable to configure the deletion task batch size ([#7423])(https://github.com/prowler-cloud/prowler/pull/7423).
---
## [v1.5.2] (Prowler v5.4.2)
### Changed
- Refactored deletion logic and implemented retry mechanism for deletion tasks [(#7349)](https://github.com/prowler-cloud/prowler/pull/7349)
- Refactored deletion logic and implemented retry mechanism for deletion tasks [(#7349)](https://github.com/prowler-cloud/prowler/pull/7349).
---
## [v1.5.1] (Prowler v5.4.1)
### Fixed
- Handle response in case local files are missing [(#7183)](https://github.com/prowler-cloud/prowler/pull/7183)
- Race condition when deleting export files after the S3 upload [(#7172)](https://github.com/prowler-cloud/prowler/pull/7172)
- Handle exception when a provider has no secret in test connection [(#7283)](https://github.com/prowler-cloud/prowler/pull/7283)
- Added a handled response in case local files are missing [(#7183)](https://github.com/prowler-cloud/prowler/pull/7183).
- Fixed a race condition when deleting export files after the S3 upload [(#7172)](https://github.com/prowler-cloud/prowler/pull/7172).
- Handled exception when a provider has no secret in test connection [(#7283)](https://github.com/prowler-cloud/prowler/pull/7283).
---
@@ -137,20 +101,20 @@ All notable changes to the **Prowler API** are documented in this file.
### Added
- Social login integration with Google and GitHub [(#6906)](https://github.com/prowler-cloud/prowler/pull/6906)
- API scan report system, now all scans launched from the API will generate a compressed file with the report in OCSF, CSV and HTML formats [(#6878)](https://github.com/prowler-cloud/prowler/pull/6878)
- Add API scan report system, now all scans launched from the API will generate a compressed file with the report in OCSF, CSV and HTML formats [(#6878)](https://github.com/prowler-cloud/prowler/pull/6878).
- Configurable Sentry integration [(#6874)](https://github.com/prowler-cloud/prowler/pull/6874)
### Changed
- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019)
- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019).
---
## [v1.4.0] (Prowler v5.3.0)
### Changed
- Daily scheduled scan instances are now created beforehand with `SCHEDULED` state [(#6700)](https://github.com/prowler-cloud/prowler/pull/6700)
- Findings endpoints now require at least one date filter [(#6800)](https://github.com/prowler-cloud/prowler/pull/6800)
- Findings metadata endpoint received a performance improvement [(#6863)](https://github.com/prowler-cloud/prowler/pull/6863)
- Increased the allowed length of the provider UID for Kubernetes providers [(#6869)](https://github.com/prowler-cloud/prowler/pull/6869)
- Daily scheduled scan instances are now created beforehand with `SCHEDULED` state [(#6700)](https://github.com/prowler-cloud/prowler/pull/6700).
- Findings endpoints now require at least one date filter [(#6800)](https://github.com/prowler-cloud/prowler/pull/6800).
- Findings metadata endpoint received a performance improvement [(#6863)](https://github.com/prowler-cloud/prowler/pull/6863).
- Increased the allowed length of the provider UID for Kubernetes providers [(#6869)](https://github.com/prowler-cloud/prowler/pull/6869).
---
+5 -20
View File
@@ -6,19 +6,7 @@ ARG POWERSHELL_VERSION=7.5.0
ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}
# hadolint ignore=DL3008
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
libicu72 \
gcc \
g++ \
make \
libxml2-dev \
libxmlsec1-dev \
libxmlsec1-openssl \
pkg-config \
libtool \
libxslt1-dev \
python3-dev \
RUN apt-get update && apt-get install -y --no-install-recommends wget libicu72 \
&& rm -rf /var/lib/apt/lists/*
# Install PowerShell
@@ -49,21 +37,18 @@ COPY pyproject.toml ./
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry
COPY src/backend/ ./backend/
ENV PATH="/home/prowler/.local/bin:$PATH"
# Add `--no-root` to avoid installing the current project as a package
RUN poetry install --no-root && \
rm -rf ~/.cache/pip
RUN poetry run python "$(poetry env info --path)/src/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py"
# Prevents known compatibility error between lxml and libxml2/libxmlsec versions.
# See: https://github.com/xmlsec/python-xmlsec/issues/320
RUN poetry run pip install --force-reinstall --no-binary lxml lxml
COPY src/backend/ ./backend/
COPY docker-entrypoint.sh ./docker-entrypoint.sh
RUN poetry run python "$(poetry env info --path)/src/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py"
WORKDIR /home/prowler/backend
# Development image
+125
View File
@@ -0,0 +1,125 @@
services:
api:
build:
dockerfile: Dockerfile
image: prowler-api
env_file:
- path: ./.env
required: false
ports:
- "${DJANGO_PORT:-8000}:${DJANGO_PORT:-8000}"
profiles:
- prod
depends_on:
postgres:
condition: service_healthy
valkey:
condition: service_healthy
entrypoint:
- "../docker-entrypoint.sh"
- "prod"
api-dev:
build:
dockerfile: Dockerfile
target: dev
image: prowler-api-dev
environment:
- DJANGO_SETTINGS_MODULE=config.django.devel
- DJANGO_LOGGING_FORMATTER=human_readable
env_file:
- path: ./.env
required: false
ports:
- "${DJANGO_PORT:-8080}:${DJANGO_PORT:-8080}"
volumes:
- "./src/backend:/home/prowler/backend"
- "./pyproject.toml:/home/prowler/pyproject.toml"
profiles:
- dev
depends_on:
postgres:
condition: service_healthy
valkey:
condition: service_healthy
entrypoint:
- "../docker-entrypoint.sh"
- "dev"
postgres:
image: postgres:16.3-alpine
ports:
- "${POSTGRES_PORT:-5432}:${POSTGRES_PORT:-5432}"
hostname: "postgres-db"
volumes:
- ./_data/postgres:/var/lib/postgresql/data
environment:
- POSTGRES_USER=${POSTGRES_ADMIN_USER:-prowler}
- POSTGRES_PASSWORD=${POSTGRES_ADMIN_PASSWORD:-S3cret}
- POSTGRES_DB=${POSTGRES_DB:-prowler_db}
env_file:
- path: ./.env
required: false
healthcheck:
test: ["CMD-SHELL", "sh -c 'pg_isready -U ${POSTGRES_ADMIN_USER:-prowler} -d ${POSTGRES_DB:-prowler_db}'"]
interval: 5s
timeout: 5s
retries: 5
valkey:
image: valkey/valkey:7-alpine3.19
ports:
- "${VALKEY_PORT:-6379}:6379"
hostname: "valkey"
volumes:
- ./_data/valkey:/data
env_file:
- path: ./.env
required: false
healthcheck:
test: ["CMD-SHELL", "sh -c 'valkey-cli ping'"]
interval: 10s
timeout: 5s
retries: 3
worker:
build:
dockerfile: Dockerfile
image: prowler-worker
environment:
- DJANGO_SETTINGS_MODULE=${DJANGO_SETTINGS_MODULE:-config.django.production}
env_file:
- path: ./.env
required: false
profiles:
- dev
- prod
depends_on:
valkey:
condition: service_healthy
postgres:
condition: service_healthy
entrypoint:
- "../docker-entrypoint.sh"
- "worker"
worker-beat:
build:
dockerfile: Dockerfile
image: prowler-worker
environment:
- DJANGO_SETTINGS_MODULE=${DJANGO_SETTINGS_MODULE:-config.django.production}
env_file:
- path: ./.env
required: false
profiles:
- dev
- prod
depends_on:
valkey:
condition: service_healthy
postgres:
condition: service_healthy
entrypoint:
- "../docker-entrypoint.sh"
- "beat"
-4
View File
@@ -3,10 +3,6 @@
apply_migrations() {
echo "Applying database migrations..."
# Fix Inconsistent migration history after adding sites app
poetry run python manage.py check_and_fix_socialaccount_sites_migration --database admin
poetry run python manage.py migrate --database admin
}
+126 -1721
View File
File diff suppressed because it is too large Load Diff
+4 -5
View File
@@ -7,8 +7,8 @@ authors = [{name = "Prowler Engineering", email = "engineering@prowler.com"}]
dependencies = [
"celery[pytest] (>=5.4.0,<6.0.0)",
"dj-rest-auth[with_social,jwt] (==7.0.1)",
"django==5.1.10",
"django-allauth[saml] (>=65.8.0,<66.0.0)",
"django==5.1.8",
"django-allauth==65.4.1",
"django-celery-beat (>=2.7.0,<3.0.0)",
"django-celery-results (>=2.5.1,<3.0.0)",
"django-cors-headers==4.4.0",
@@ -23,12 +23,11 @@ dependencies = [
"drf-spectacular==0.27.2",
"drf-spectacular-jsonapi==0.5.1",
"gunicorn==23.0.0",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.8",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (>=1.0.1,<2.0.0)",
"sentry-sdk[django] (>=2.20.0,<3.0.0)",
"uuid6==2024.7.10",
"openai (>=1.82.0,<2.0.0)"
"uuid6==2024.7.10"
]
description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
+1 -68
View File
@@ -17,8 +17,6 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
def pre_social_login(self, request, sociallogin):
# Link existing accounts with the same email address
email = sociallogin.account.extra_data.get("email")
# if sociallogin.account.provider == "saml":
# email = sociallogin.user.email
if email:
existing_user = self.get_user_by_email(email)
if existing_user:
@@ -31,72 +29,8 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
"""
with transaction.atomic(using=MainRouter.admin_db):
user = super().save_user(request, sociallogin, form)
extra = sociallogin.account.extra_data
# if provider == "saml":
# # Handle SAML-specific logic
# user.first_name = (
# extra.get("firstName", [""])[0] if extra.get("firstName") else ""
# )
# user.last_name = (
# extra.get("lastName", [""])[0] if extra.get("lastName") else ""
# )
# user.company_name = (
# extra.get("organization", [""])[0]
# if extra.get("organization")
# else ""
# )
# user.name = f"{user.first_name} {user.last_name}".strip()
# if user.name == "":
# user.name = "N/A"
# user.save(using=MainRouter.admin_db)
# email_domain = user.email.split("@")[-1]
# tenant = (
# SAMLConfiguration.objects.using(MainRouter.admin_db)
# .get(email_domain=email_domain)
# .tenant
# )
# with rls_transaction(str(tenant.id)):
# role_name = (
# extra.get("userType", ["saml_default_role"])[0].strip()
# if extra.get("userType")
# else "saml_default_role"
# )
# try:
# role = Role.objects.using(MainRouter.admin_db).get(
# name=role_name, tenant_id=tenant.id
# )
# except Role.DoesNotExist:
# role = Role.objects.using(MainRouter.admin_db).create(
# name=role_name,
# tenant_id=tenant.id,
# manage_users=False,
# manage_account=False,
# manage_billing=False,
# manage_providers=False,
# manage_integrations=False,
# manage_scans=False,
# unlimited_visibility=False,
# )
# Membership.objects.using(MainRouter.admin_db).create(
# user=user,
# tenant=tenant,
# role=Membership.RoleChoices.MEMBER,
# )
# UserRoleRelationship.objects.using(MainRouter.admin_db).create(
# user=user,
# role=role,
# tenant_id=tenant.id,
# )
# Handle other providers (e.g., GitHub, Google)
user.save(using=MainRouter.admin_db)
social_account_name = extra.get("name")
social_account_name = sociallogin.account.extra_data.get("name")
if social_account_name:
user.name = social_account_name
user.save(using=MainRouter.admin_db)
@@ -124,5 +58,4 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
role=role,
tenant_id=tenant.id,
)
return user
+7 -11
View File
@@ -190,16 +190,10 @@ def generate_compliance_overview_template(prowler_compliance: dict):
total_checks = len(requirement.Checks)
checks_dict = {check: None for check in requirement.Checks}
req_status_val = "MANUAL" if total_checks == 0 else "PASS"
# Build requirement dictionary
requirement_dict = {
"name": requirement.Name or requirement.Id,
"description": requirement.Description,
"tactics": getattr(requirement, "Tactics", []),
"subtechniques": getattr(requirement, "SubTechniques", []),
"platforms": getattr(requirement, "Platforms", []),
"technique_url": getattr(requirement, "TechniqueURL", ""),
"attributes": [
dict(attribute) for attribute in requirement.Attributes
],
@@ -210,18 +204,20 @@ def generate_compliance_overview_template(prowler_compliance: dict):
"manual": 0,
"total": total_checks,
},
"status": req_status_val,
"status": "PASS",
}
# Update requirements status counts for the framework
if req_status_val == "MANUAL":
# Update requirements status
if total_checks == 0:
requirements_status["manual"] += 1
elif req_status_val == "PASS":
requirements_status["passed"] += 1
# Add requirement to compliance requirements
compliance_requirements[requirement.Id] = requirement_dict
# Calculate pending requirements
pending_requirements = total_requirements - requirements_status["manual"]
requirements_status["passed"] = pending_requirements
# Build compliance dictionary
compliance_dict = {
"framework": compliance_data.Framework,
+11 -124
View File
@@ -1,4 +1,3 @@
import re
import secrets
import uuid
from contextlib import contextmanager
@@ -153,28 +152,6 @@ def delete_related_daily_task(provider_id: str):
PeriodicTask.objects.filter(name=task_name).delete()
def create_objects_in_batches(
tenant_id: str, model, objects: list, batch_size: int = 500
):
"""
Bulk-create model instances in repeated, per-tenant RLS transactions.
All chunks execute in their own transaction, so no single transaction
grows too large.
Args:
tenant_id (str): UUID string of the tenant under which to set RLS.
model: Django model class whose `.objects.bulk_create()` will be called.
objects (list): List of model instances (unsaved) to bulk-create.
batch_size (int): Maximum number of objects per bulk_create call.
"""
total = len(objects)
for i in range(0, total, batch_size):
chunk = objects[i : i + batch_size]
with rls_transaction(value=tenant_id, parameter=POSTGRES_TENANT_VAR):
model.objects.bulk_create(chunk, batch_size)
# Postgres Enums
@@ -250,72 +227,6 @@ def register_enum(apps, schema_editor, enum_class): # noqa: F841
register_adapter(enum_class, enum_adapter)
def _should_create_index_on_partition(
partition_name: str, all_partitions: bool = False
) -> bool:
"""
Determine if we should create an index on this partition.
Args:
partition_name: The name of the partition (e.g., "findings_2025_aug", "findings_default")
all_partitions: If True, create on all partitions. If False, only current/future partitions.
Returns:
bool: True if index should be created on this partition, False otherwise.
"""
if all_partitions:
return True
# Extract date from partition name if it follows the pattern
# Partition names look like: findings_2025_aug, findings_2025_jul, etc.
date_pattern = r"(\d{4})_([a-z]{3})$"
match = re.search(date_pattern, partition_name)
if not match:
# If we can't parse the date, include it to be safe (e.g., default partition)
return True
try:
year_str, month_abbr = match.groups()
year = int(year_str)
# Map month abbreviations to numbers
month_map = {
"jan": 1,
"feb": 2,
"mar": 3,
"apr": 4,
"may": 5,
"jun": 6,
"jul": 7,
"aug": 8,
"sep": 9,
"oct": 10,
"nov": 11,
"dec": 12,
}
month = month_map.get(month_abbr.lower())
if month is None:
# Unknown month abbreviation, include it to be safe
return True
partition_date = datetime(year, month, 1, tzinfo=timezone.utc)
# Get current month start
now = datetime.now(timezone.utc)
current_month_start = now.replace(
day=1, hour=0, minute=0, second=0, microsecond=0
)
# Include current month and future partitions
return partition_date >= current_month_start
except (ValueError, TypeError):
# If date parsing fails, include it to be safe
return True
def create_index_on_partitions(
apps, # noqa: F841
schema_editor,
@@ -324,39 +235,16 @@ def create_index_on_partitions(
columns: str,
method: str = "BTREE",
where: str = "",
all_partitions: bool = True,
):
"""
Create an index on existing partitions of `parent_table`.
Create an index on every existing partition of `parent_table`.
Args:
parent_table: The name of the root table (e.g. "findings").
index_name: A short name for the index (will be prefixed per-partition).
columns: The parenthesized column list, e.g. "tenant_id, scan_id, status".
method: The index method—BTREE, GIN, etc. Defaults to BTREE.
where: Optional WHERE clause (without the leading "WHERE"), e.g. "status = 'FAIL'".
all_partitions: Whether to create indexes on all partitions or just current/future ones.
Defaults to False (current/future only) to avoid maintenance overhead
on old partitions where the index may not be needed.
Examples:
# Create index only on current and future partitions (recommended for new indexes)
create_index_on_partitions(
apps, schema_editor,
parent_table="findings",
index_name="new_performance_idx",
columns="tenant_id, status, severity",
all_partitions=False # Default behavior
)
# Create index on all partitions (use when migrating existing critical indexes)
create_index_on_partitions(
apps, schema_editor,
parent_table="findings",
index_name="critical_existing_idx",
columns="tenant_id, scan_id",
all_partitions=True
)
method: The index method—BTREE, GIN, etc. Defaults to BTREE.
where: Optional WHERE clause (without the leading "WHERE"), e.g. "status = 'FAIL'".
"""
with connection.cursor() as cursor:
cursor.execute(
@@ -371,14 +259,13 @@ def create_index_on_partitions(
where_sql = f" WHERE {where}" if where else ""
for partition in partitions:
if _should_create_index_on_partition(partition, all_partitions):
idx_name = f"{partition.replace('.', '_')}_{index_name}"
sql = (
f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {idx_name} "
f"ON {partition} USING {method} ({columns})"
f"{where_sql};"
)
schema_editor.execute(sql)
idx_name = f"{partition.replace('.', '_')}_{index_name}"
sql = (
f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {idx_name} "
f"ON {partition} USING {method} ({columns})"
f"{where_sql};"
)
schema_editor.execute(sql)
def drop_index_on_partitions(
@@ -392,7 +279,7 @@ def drop_index_on_partitions(
Args:
parent_table: The name of the root table (e.g. "findings").
index_name: The same short name used when creating them.
index_name: The same short name used when creating them.
"""
with connection.cursor() as cursor:
cursor.execute(
+4 -34
View File
@@ -3,7 +3,7 @@ from rest_framework import status
from rest_framework.exceptions import APIException
from rest_framework_json_api.exceptions import exception_handler
from rest_framework_json_api.serializers import ValidationError
from rest_framework_simplejwt.exceptions import InvalidToken, TokenError
from rest_framework_simplejwt.exceptions import TokenError, InvalidToken
class ModelValidationError(ValidationError):
@@ -32,31 +32,6 @@ class InvitationTokenExpiredException(APIException):
default_code = "token_expired"
# Task Management Exceptions (non-HTTP)
class TaskManagementError(Exception):
"""Base exception for task management errors."""
def __init__(self, task=None):
self.task = task
super().__init__()
class TaskFailedException(TaskManagementError):
"""Raised when a task has failed."""
class TaskNotFoundException(TaskManagementError):
"""Raised when a task is not found."""
class TaskInProgressException(TaskManagementError):
"""Raised when a task is running but there's no related Task object to return."""
def __init__(self, task_result=None):
self.task_result = task_result
super().__init__()
def custom_exception_handler(exc, context):
if isinstance(exc, django_validation_error):
if hasattr(exc, "error_dict"):
@@ -64,12 +39,7 @@ def custom_exception_handler(exc, context):
else:
exc = ValidationError(detail=exc.messages[0], code=exc.code)
elif isinstance(exc, (TokenError, InvalidToken)):
if (
hasattr(exc, "detail")
and isinstance(exc.detail, dict)
and "messages" in exc.detail
):
exc.detail["messages"] = [
message_item["message"] for message_item in exc.detail["messages"]
]
exc.detail["messages"] = [
message_item["message"] for message_item in exc.detail["messages"]
]
return exception_handler(exc, context)
+5 -4
View File
@@ -22,7 +22,7 @@ from api.db_utils import (
StatusEnumField,
)
from api.models import (
ComplianceRequirementOverview,
ComplianceOverview,
Finding,
Integration,
Invitation,
@@ -637,11 +637,12 @@ class RoleFilter(FilterSet):
class ComplianceOverviewFilter(FilterSet):
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
scan_id = UUIDFilter(field_name="scan_id")
region = CharFilter(field_name="region")
provider_type = ChoiceFilter(choices=Provider.ProviderChoices.choices)
provider_type__in = ChoiceInFilter(choices=Provider.ProviderChoices.choices)
scan_id = UUIDFilter(field_name="scan__id")
class Meta:
model = ComplianceRequirementOverview
model = ComplianceOverview
fields = {
"inserted_at": ["date", "gte", "lte"],
"compliance_id": ["exact", "icontains"],
@@ -1,80 +0,0 @@
from django.contrib.sites.models import Site
from django.core.management.base import BaseCommand
from django.db import DEFAULT_DB_ALIAS, connection, connections, transaction
from django.db.migrations.recorder import MigrationRecorder
def table_exists(table_name):
with connection.cursor() as cursor:
cursor.execute(
"""
SELECT EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_name = %s
)
""",
[table_name],
)
return cursor.fetchone()[0]
class Command(BaseCommand):
help = "Fix migration inconsistency between socialaccount and sites"
def add_arguments(self, parser):
parser.add_argument(
"--database",
default=DEFAULT_DB_ALIAS,
help="Specifies the database to operate on.",
)
def handle(self, *args, **options):
db = options["database"]
connection = connections[db]
recorder = MigrationRecorder(connection)
applied = set(recorder.applied_migrations())
has_social = ("socialaccount", "0001_initial") in applied
with connection.cursor() as cursor:
cursor.execute(
"""
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_name = 'django_site'
);
"""
)
site_table_exists = cursor.fetchone()[0]
if has_social and not site_table_exists:
self.stdout.write(
f"Detected inconsistency in '{db}'. Creating 'django_site' table manually..."
)
with transaction.atomic(using=db):
with connection.schema_editor() as schema_editor:
schema_editor.create_model(Site)
recorder.record_applied("sites", "0001_initial")
recorder.record_applied("sites", "0002_alter_domain_unique")
self.stdout.write(
"Fixed: 'django_site' table created and migrations registered."
)
# Ensure the relationship table also exists
if not table_exists("socialaccount_socialapp_sites"):
self.stdout.write(
"Detected missing 'socialaccount_socialapp_sites' table. Creating manually..."
)
with connection.schema_editor() as schema_editor:
from allauth.socialaccount.models import SocialApp
schema_editor.create_model(
SocialApp._meta.get_field("sites").remote_field.through
)
self.stdout.write(
"Fixed: 'socialaccount_socialapp_sites' table created."
)
@@ -1,124 +0,0 @@
# Generated by Django 5.1.8 on 2025-05-21 11:37
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
from api.rls import RowLevelSecurityConstraint
class Migration(migrations.Migration):
dependencies = [
("api", "0026_provider_secret_gcp_service_account"),
]
operations = [
migrations.CreateModel(
name="ComplianceRequirementOverview",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("compliance_id", models.TextField(blank=False)),
("framework", models.TextField(blank=False)),
("version", models.TextField(blank=True)),
("description", models.TextField(blank=True)),
("region", models.TextField(blank=False)),
("requirement_id", models.TextField(blank=False)),
(
"requirement_status",
api.db_utils.StatusEnumField(
choices=[
("FAIL", "Fail"),
("PASS", "Pass"),
("MANUAL", "Manual"),
]
),
),
("passed_checks", models.IntegerField(default=0)),
("failed_checks", models.IntegerField(default=0)),
("total_checks", models.IntegerField(default=0)),
(
"scan",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="compliance_requirements_overviews",
related_query_name="compliance_requirements_overview",
to="api.scan",
),
),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "compliance_requirements_overviews",
"abstract": False,
"indexes": [
models.Index(
fields=["tenant_id", "scan_id"], name="cro_tenant_scan_idx"
),
models.Index(
fields=["tenant_id", "scan_id", "compliance_id"],
name="cro_scan_comp_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "compliance_id", "region"],
name="cro_scan_comp_reg_idx",
),
models.Index(
fields=[
"tenant_id",
"scan_id",
"compliance_id",
"requirement_id",
],
name="cro_scan_comp_req_idx",
),
models.Index(
fields=[
"tenant_id",
"scan_id",
"compliance_id",
"requirement_id",
"region",
],
name="cro_scan_comp_req_reg_idx",
),
],
"constraints": [
models.UniqueConstraint(
fields=(
"tenant_id",
"scan_id",
"compliance_id",
"requirement_id",
"region",
),
name="unique_tenant_compliance_requirement_overview",
)
],
},
),
migrations.AddConstraint(
model_name="ComplianceRequirementOverview",
constraint=RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_compliancerequirementoverview",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
]
@@ -1,29 +0,0 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0027_compliance_requirement_overviews"),
]
operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_check_idx",
columns="tenant_id, scan_id, check_id",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_check_idx",
),
)
]
@@ -1,17 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0028_findings_check_index_partitions"),
]
operations = [
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id", "check_id"],
name="find_tenant_scan_check_idx",
),
),
]
@@ -1,107 +0,0 @@
# Generated by Django 5.1.10 on 2025-06-12 12:45
import uuid
import django.core.validators
import django.db.models.deletion
from django.db import migrations, models
import api.rls
class Migration(migrations.Migration):
dependencies = [
("api", "0029_findings_check_index_parent"),
]
operations = [
migrations.CreateModel(
name="LighthouseConfiguration",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"name",
models.CharField(
help_text="Name of the configuration",
max_length=100,
validators=[django.core.validators.MinLengthValidator(3)],
),
),
(
"api_key",
models.BinaryField(
help_text="Encrypted API key for the LLM service"
),
),
(
"model",
models.CharField(
choices=[
("gpt-4o-2024-11-20", "GPT-4o v2024-11-20"),
("gpt-4o-2024-08-06", "GPT-4o v2024-08-06"),
("gpt-4o-2024-05-13", "GPT-4o v2024-05-13"),
("gpt-4o", "GPT-4o Default"),
("gpt-4o-mini-2024-07-18", "GPT-4o Mini v2024-07-18"),
("gpt-4o-mini", "GPT-4o Mini Default"),
],
default="gpt-4o-2024-08-06",
help_text="Must be one of the supported model names",
max_length=50,
),
),
(
"temperature",
models.FloatField(default=0, help_text="Must be between 0 and 1"),
),
(
"max_tokens",
models.IntegerField(
default=4000, help_text="Must be between 500 and 5000"
),
),
(
"business_context",
models.TextField(
blank=True,
default="",
help_text="Additional business context for this AI model configuration",
),
),
("is_active", models.BooleanField(default=True)),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "lighthouse_configurations",
"abstract": False,
"constraints": [
models.UniqueConstraint(
fields=("tenant_id",),
name="unique_lighthouse_config_per_tenant",
),
],
},
),
migrations.AddConstraint(
model_name="lighthouseconfiguration",
constraint=api.rls.RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_lighthouseconfiguration",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
]
@@ -1,24 +0,0 @@
# Generated by Django 5.1.10 on 2025-06-23 10:04
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0030_lighthouseconfiguration"),
("django_celery_beat", "0019_alter_periodictasks_options"),
]
operations = [
migrations.AlterField(
model_name="scan",
name="scheduler_task",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="django_celery_beat.periodictask",
),
),
]
+43 -447
View File
@@ -1,10 +1,10 @@
import json
import logging
import re
import time
from uuid import UUID, uuid4
from config.custom_logging import BackendLogger
from cryptography.fernet import Fernet, InvalidToken
from config.env import env
from cryptography.fernet import Fernet
from django.conf import settings
from django.contrib.auth.models import AbstractBaseUser
from django.contrib.postgres.fields import ArrayField
@@ -51,8 +51,6 @@ fernet = Fernet(settings.SECRETS_ENCRYPTION_KEY.encode())
# Convert Prowler Severity enum to Django TextChoices
SeverityChoices = enum_to_choices(Severity)
logger = logging.getLogger(BackendLogger.API)
class StatusChoices(models.TextChoices):
"""
@@ -356,6 +354,42 @@ class ProviderGroupMembership(RowLevelSecurityProtectedModel):
resource_name = "provider_groups-provider"
class TaskManager(models.Manager):
def get_with_retry(
self,
id: str,
max_retries: int = None,
delay_seconds: float = None,
):
"""
Retry fetching a Task by ID in case it hasn't been created yet.
Args:
id (str): The Celery task ID (expected to match Task model PK).
max_retries (int, optional): Number of retry attempts. Defaults to env TASK_RETRY_ATTEMPTS or 5.
delay_seconds (float, optional): Delay between retries in seconds. Defaults to env TASK_RETRY_DELAY_SECONDS or 0.1.
Returns:
Task: The retrieved Task instance.
Raises:
Task.DoesNotExist: If the task is not found after all retries.
"""
max_retries = max_retries or env.int("TASK_RETRY_ATTEMPTS", default=5)
delay_seconds = delay_seconds or env.float(
"TASK_RETRY_DELAY_SECONDS", default=0.1
)
for _attempt in range(max_retries):
try:
return self.get(id=id)
except self.model.DoesNotExist:
time.sleep(delay_seconds)
raise self.model.DoesNotExist(
f"Task with ID {id} not found after {max_retries} retries."
)
class Task(RowLevelSecurityProtectedModel):
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
@@ -368,6 +402,8 @@ class Task(RowLevelSecurityProtectedModel):
blank=True,
)
objects = TaskManager()
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "tasks"
@@ -431,7 +467,7 @@ class Scan(RowLevelSecurityProtectedModel):
completed_at = models.DateTimeField(null=True, blank=True)
next_scan_at = models.DateTimeField(null=True, blank=True)
scheduler_task = models.ForeignKey(
PeriodicTask, on_delete=models.SET_NULL, null=True, blank=True
PeriodicTask, on_delete=models.CASCADE, null=True, blank=True
)
output_location = models.CharField(blank=True, null=True, max_length=200)
@@ -766,10 +802,6 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
GinIndex(fields=["resource_services"], name="gin_find_service_idx"),
GinIndex(fields=["resource_regions"], name="gin_find_region_idx"),
GinIndex(fields=["resource_types"], name="gin_find_rtype_idx"),
models.Index(
fields=["tenant_id", "scan_id", "check_id"],
name="find_tenant_scan_check_idx",
),
]
class JSONAPIMeta:
@@ -1151,78 +1183,6 @@ class ComplianceOverview(RowLevelSecurityProtectedModel):
resource_name = "compliance-overviews"
class ComplianceRequirementOverview(RowLevelSecurityProtectedModel):
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
compliance_id = models.TextField(blank=False)
framework = models.TextField(blank=False)
version = models.TextField(blank=True)
description = models.TextField(blank=True)
region = models.TextField(blank=False)
requirement_id = models.TextField(blank=False)
requirement_status = StatusEnumField(choices=StatusChoices)
passed_checks = models.IntegerField(default=0)
failed_checks = models.IntegerField(default=0)
total_checks = models.IntegerField(default=0)
scan = models.ForeignKey(
Scan,
on_delete=models.CASCADE,
related_name="compliance_requirements_overviews",
related_query_name="compliance_requirements_overview",
)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "compliance_requirements_overviews"
constraints = [
models.UniqueConstraint(
fields=(
"tenant_id",
"scan_id",
"compliance_id",
"requirement_id",
"region",
),
name="unique_tenant_compliance_requirement_overview",
),
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "DELETE"],
),
]
indexes = [
models.Index(fields=["tenant_id", "scan_id"], name="cro_tenant_scan_idx"),
models.Index(
fields=["tenant_id", "scan_id", "compliance_id"],
name="cro_scan_comp_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "compliance_id", "region"],
name="cro_scan_comp_reg_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "compliance_id", "requirement_id"],
name="cro_scan_comp_req_idx",
),
models.Index(
fields=[
"tenant_id",
"scan_id",
"compliance_id",
"requirement_id",
"region",
],
name="cro_scan_comp_req_reg_idx",
),
]
class JSONAPIMeta:
resource_name = "compliance-requirements-overviews"
class ScanSummary(RowLevelSecurityProtectedModel):
objects = ActiveProviderManager()
all_objects = models.Manager()
@@ -1291,6 +1251,7 @@ class ScanSummary(RowLevelSecurityProtectedModel):
class Integration(RowLevelSecurityProtectedModel):
class IntegrationChoices(models.TextChoices):
S3 = "amazon_s3", _("Amazon S3")
SAML = "saml", _("SAML")
AWS_SECURITY_HUB = "aws_security_hub", _("AWS Security Hub")
JIRA = "jira", _("JIRA")
SLACK = "slack", _("Slack")
@@ -1364,244 +1325,6 @@ class IntegrationProviderRelationship(RowLevelSecurityProtectedModel):
]
# class SAMLToken(models.Model):
# id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
# inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
# updated_at = models.DateTimeField(auto_now=True, editable=False)
# expires_at = models.DateTimeField(editable=False)
# token = models.JSONField(unique=True)
# user = models.ForeignKey(User, on_delete=models.CASCADE)
# class Meta:
# db_table = "saml_tokens"
# def save(self, *args, **kwargs):
# if not self.expires_at:
# self.expires_at = datetime.now(timezone.utc) + timedelta(seconds=15)
# super().save(*args, **kwargs)
# def is_expired(self) -> bool:
# return datetime.now(timezone.utc) >= self.expires_at
# class SAMLDomainIndex(models.Model):
# """
# Public index of SAML domains. No RLS. Used for fast lookup in SAML login flow.
# """
# email_domain = models.CharField(max_length=254, unique=True)
# tenant = models.ForeignKey("Tenant", on_delete=models.CASCADE)
# class Meta:
# db_table = "saml_domain_index"
# constraints = [
# models.UniqueConstraint(
# fields=("email_domain", "tenant"),
# name="unique_resources_by_email_domain",
# ),
# BaseSecurityConstraint(
# name="statements_on_%(class)s",
# statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
# ),
# ]
# class SAMLConfiguration(RowLevelSecurityProtectedModel):
# """
# Stores per-tenant SAML settings, including email domain and IdP metadata.
# Automatically syncs to a SocialApp instance on save.
# Note:
# This model exists to provide a tenant-aware abstraction over SAML configuration.
# It supports row-level security, custom validation, and metadata parsing, enabling
# Prowler to expose a clean API and admin interface for managing SAML integrations.
# Although Django Allauth uses the SocialApp model to store provider configuration,
# it is not designed for multi-tenant use. SocialApp lacks support for tenant scoping,
# email domain mapping, and structured metadata handling.
# By managing SAMLConfiguration separately, we ensure:
# - Strong isolation between tenants via RLS.
# - Ownership of raw IdP metadata and its validation.
# - An explicit link between SAML config and business-level identifiers (e.g. email domain).
# - Programmatic transformation into the SocialApp format used by Allauth.
# In short, this model acts as a secure and user-friendly layer over Allauth's lower-level primitives.
# """
# id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
# email_domain = models.CharField(
# max_length=254,
# unique=True,
# help_text="Email domain used to identify the tenant, e.g. prowlerdemo.com",
# )
# metadata_xml = models.TextField(
# help_text="Raw IdP metadata XML to configure SingleSignOnService, certificates, etc."
# )
# created_at = models.DateTimeField(auto_now_add=True)
# updated_at = models.DateTimeField(auto_now=True)
# class JSONAPIMeta:
# resource_name = "saml-configurations"
# class Meta:
# db_table = "saml_configurations"
# constraints = [
# RowLevelSecurityConstraint(
# field="tenant_id",
# name="rls_on_%(class)s",
# statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
# ),
# # 1 config per tenant
# models.UniqueConstraint(
# fields=["tenant"],
# name="unique_samlconfig_per_tenant",
# ),
# ]
# def clean(self, old_email_domain=None):
# # Domain must not contain @
# if "@" in self.email_domain:
# raise ValidationError({"email_domain": "Domain must not contain @"})
# # Enforce at most one config per tenant
# qs = SAMLConfiguration.objects.filter(tenant=self.tenant)
# # Exclude ourselves in case of update
# if self.pk:
# qs = qs.exclude(pk=self.pk)
# if qs.exists():
# raise ValidationError(
# {"tenant": "A SAML configuration already exists for this tenant."}
# )
# # The email domain must be unique in the entire system
# qs = SAMLConfiguration.objects.using(MainRouter.admin_db).filter(
# email_domain__iexact=self.email_domain
# )
# if qs.exists() and old_email_domain != self.email_domain:
# raise ValidationError(
# {"tenant": "There is a problem with your email domain."}
# )
# def save(self, *args, **kwargs):
# self.email_domain = self.email_domain.strip().lower()
# is_create = not SAMLConfiguration.objects.filter(pk=self.pk).exists()
# if not is_create:
# old = SAMLConfiguration.objects.get(pk=self.pk)
# old_email_domain = old.email_domain
# old_metadata_xml = old.metadata_xml
# else:
# old_email_domain = None
# old_metadata_xml = None
# self.clean(old_email_domain)
# super().save(*args, **kwargs)
# if is_create or (
# old_email_domain != self.email_domain
# or old_metadata_xml != self.metadata_xml
# ):
# self._sync_social_app(old_email_domain)
# # Sync the public index
# if not is_create and old_email_domain and old_email_domain != self.email_domain:
# SAMLDomainIndex.objects.filter(email_domain=old_email_domain).delete()
# # Create/update the new domain index
# SAMLDomainIndex.objects.update_or_create(
# email_domain=self.email_domain, defaults={"tenant": self.tenant}
# )
# def _parse_metadata(self):
# """
# Parse the raw IdP metadata XML and extract:
# - entity_id
# - sso_url
# - slo_url (may be None)
# - x509cert (required)
# """
# ns = {
# "md": "urn:oasis:names:tc:SAML:2.0:metadata",
# "ds": "http://www.w3.org/2000/09/xmldsig#",
# }
# try:
# root = ET.fromstring(self.metadata_xml)
# except ET.ParseError as e:
# raise ValidationError({"metadata_xml": f"Invalid XML: {e}"})
# # Entity ID
# entity_id = root.attrib.get("entityID")
# # SSO endpoint (must exist)
# sso = root.find(".//md:IDPSSODescriptor/md:SingleSignOnService", ns)
# if sso is None or "Location" not in sso.attrib:
# raise ValidationError(
# {"metadata_xml": "Missing SingleSignOnService in metadata."}
# )
# sso_url = sso.attrib["Location"]
# # SLO endpoint (optional)
# slo = root.find(".//md:IDPSSODescriptor/md:SingleLogoutService", ns)
# slo_url = slo.attrib.get("Location") if slo is not None else None
# # X.509 certificate (required)
# cert = root.find(
# './/md:KeyDescriptor[@use="signing"]/ds:KeyInfo/ds:X509Data/ds:X509Certificate',
# ns,
# )
# if cert is None or not cert.text or not cert.text.strip():
# raise ValidationError(
# {
# "metadata_xml": 'Metadata must include a <ds:X509Certificate> under <KeyDescriptor use="signing">.'
# }
# )
# x509cert = cert.text.strip()
# return {
# "entity_id": entity_id,
# "sso_url": sso_url,
# "slo_url": slo_url,
# "x509cert": x509cert,
# }
# def _sync_social_app(self, previous_email_domain=None):
# """
# Create or update the corresponding SocialApp based on email_domain.
# If the domain changed, update the matching SocialApp.
# """
# idp_settings = self._parse_metadata()
# settings_dict = SOCIALACCOUNT_PROVIDERS["saml"].copy()
# settings_dict["idp"] = idp_settings
# current_site = Site.objects.get(id=settings.SITE_ID)
# social_app_qs = SocialApp.objects.filter(
# provider="saml", client_id=previous_email_domain or self.email_domain
# )
# client_id = self.email_domain[:191]
# name = f"SAML-{self.email_domain}"[:40]
# if social_app_qs.exists():
# social_app = social_app_qs.first()
# social_app.client_id = client_id
# social_app.name = name
# social_app.settings = settings_dict
# social_app.save()
# social_app.sites.set([current_site])
# else:
# social_app = SocialApp.objects.create(
# provider="saml",
# client_id=client_id,
# name=name,
# settings=settings_dict,
# )
# social_app.sites.set([current_site])
class ResourceScanSummary(RowLevelSecurityProtectedModel):
scan_id = models.UUIDField(default=uuid7, db_index=True)
resource_id = models.UUIDField(default=uuid4, db_index=True)
@@ -1649,130 +1372,3 @@ class ResourceScanSummary(RowLevelSecurityProtectedModel):
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
class LighthouseConfiguration(RowLevelSecurityProtectedModel):
"""
Stores configuration and API keys for LLM services.
"""
class ModelChoices(models.TextChoices):
GPT_4O_2024_11_20 = "gpt-4o-2024-11-20", _("GPT-4o v2024-11-20")
GPT_4O_2024_08_06 = "gpt-4o-2024-08-06", _("GPT-4o v2024-08-06")
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13", _("GPT-4o v2024-05-13")
GPT_4O = "gpt-4o", _("GPT-4o Default")
GPT_4O_MINI_2024_07_18 = "gpt-4o-mini-2024-07-18", _("GPT-4o Mini v2024-07-18")
GPT_4O_MINI = "gpt-4o-mini", _("GPT-4o Mini Default")
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
name = models.CharField(
max_length=100,
validators=[MinLengthValidator(3)],
blank=False,
null=False,
help_text="Name of the configuration",
)
api_key = models.BinaryField(
blank=False, null=False, help_text="Encrypted API key for the LLM service"
)
model = models.CharField(
max_length=50,
choices=ModelChoices.choices,
blank=False,
null=False,
default=ModelChoices.GPT_4O_2024_08_06,
help_text="Must be one of the supported model names",
)
temperature = models.FloatField(default=0, help_text="Must be between 0 and 1")
max_tokens = models.IntegerField(
default=4000, help_text="Must be between 500 and 5000"
)
business_context = models.TextField(
blank=True,
null=False,
default="",
help_text="Additional business context for this AI model configuration",
)
is_active = models.BooleanField(default=True)
def __str__(self):
return self.name
def clean(self):
super().clean()
# Validate temperature
if not 0 <= self.temperature <= 1:
raise ModelValidationError(
detail="Temperature must be between 0 and 1",
code="invalid_temperature",
pointer="/data/attributes/temperature",
)
# Validate max_tokens
if not 500 <= self.max_tokens <= 5000:
raise ModelValidationError(
detail="Max tokens must be between 500 and 5000",
code="invalid_max_tokens",
pointer="/data/attributes/max_tokens",
)
@property
def api_key_decoded(self):
"""Return the decrypted API key, or None if unavailable or invalid."""
if not self.api_key:
return None
try:
decrypted_key = fernet.decrypt(bytes(self.api_key))
return decrypted_key.decode()
except InvalidToken:
logger.warning("Invalid token while decrypting API key.")
except Exception as e:
logger.exception("Unexpected error while decrypting API key: %s", e)
@api_key_decoded.setter
def api_key_decoded(self, value):
"""Store the encrypted API key."""
if not value:
raise ModelValidationError(
detail="API key is required",
code="invalid_api_key",
pointer="/data/attributes/api_key",
)
# Validate OpenAI API key format
openai_key_pattern = r"^sk-[\w-]+T3BlbkFJ[\w-]+$"
if not re.match(openai_key_pattern, value):
raise ModelValidationError(
detail="Invalid OpenAI API key format.",
code="invalid_api_key",
pointer="/data/attributes/api_key",
)
self.api_key = fernet.encrypt(value.encode())
def save(self, *args, **kwargs):
self.full_clean()
super().save(*args, **kwargs)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "lighthouse_configurations"
constraints = [
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
# Add unique constraint for name within a tenant
models.UniqueConstraint(
fields=["tenant_id"], name="unique_lighthouse_config_per_tenant"
),
]
class JSONAPIMeta:
resource_name = "lighthouse-configurations"
+1 -1
View File
@@ -1,4 +1,4 @@
from drf_spectacular_jsonapi.schemas.pagination import JsonApiPageNumberPagination
from rest_framework_json_api.pagination import JsonApiPageNumberPagination
class ComplianceOverviewPagination(JsonApiPageNumberPagination):
File diff suppressed because it is too large Load Diff
@@ -1,88 +0,0 @@
from unittest.mock import MagicMock
import pytest
from allauth.socialaccount.models import SocialLogin
from django.contrib.auth import get_user_model
from api.adapters import ProwlerSocialAccountAdapter
User = get_user_model()
@pytest.mark.django_db
class TestProwlerSocialAccountAdapter:
def test_get_user_by_email_returns_user(self, create_test_user):
adapter = ProwlerSocialAccountAdapter()
user = adapter.get_user_by_email(create_test_user.email)
assert user == create_test_user
def test_get_user_by_email_returns_none_for_unknown_email(self):
adapter = ProwlerSocialAccountAdapter()
assert adapter.get_user_by_email("notfound@example.com") is None
# def test_pre_social_login_links_existing_user(self, create_test_user, rf):
# adapter = ProwlerSocialAccountAdapter()
# sociallogin = MagicMock(spec=SocialLogin)
# sociallogin.account = MagicMock()
# sociallogin.account.provider = "saml"
# sociallogin.account.extra_data = {}
# sociallogin.user = create_test_user
# sociallogin.connect = MagicMock()
# adapter.pre_social_login(rf.get("/"), sociallogin)
# call_args = sociallogin.connect.call_args
# assert call_args is not None
# called_request, called_user = call_args[0]
# assert called_request.path == "/"
# assert called_user.email == create_test_user.email
def test_pre_social_login_no_link_if_email_missing(self, rf):
adapter = ProwlerSocialAccountAdapter()
sociallogin = MagicMock(spec=SocialLogin)
sociallogin.account = MagicMock()
sociallogin.account.provider = "github"
sociallogin.account.extra_data = {}
sociallogin.connect = MagicMock()
adapter.pre_social_login(rf.get("/"), sociallogin)
sociallogin.connect.assert_not_called()
# def test_save_user_saml_flow(
# self,
# rf,
# saml_setup,
# saml_sociallogin,
# ):
# adapter = ProwlerSocialAccountAdapter()
# request = rf.get("/")
# saml_sociallogin.user.email = saml_setup["email"]
# saml_sociallogin.account.extra_data = {
# "firstName": [],
# "lastName": [],
# "organization": [],
# "userType": [],
# }
# tenant = Tenant.objects.using(MainRouter.admin_db).get(
# id=saml_setup["tenant_id"]
# )
# saml_config = SAMLConfiguration.objects.using(MainRouter.admin_db).get(
# tenant=tenant
# )
# assert saml_config.email_domain == saml_setup["domain"]
# user = adapter.save_user(request, saml_sociallogin)
# assert user.name == "N/A"
# assert user.company_name == ""
# assert user.email == saml_setup["email"]
# assert (
# Membership.objects.using(MainRouter.admin_db)
# .filter(user=user, tenant=tenant)
# .exists()
# )
+6 -22
View File
@@ -1,12 +1,12 @@
from unittest.mock import MagicMock, patch
from unittest.mock import patch, MagicMock
from api.compliance import (
generate_compliance_overview_template,
generate_scan_compliance,
get_prowler_provider_checks,
get_prowler_provider_compliance,
load_prowler_checks,
load_prowler_compliance,
load_prowler_checks,
generate_scan_compliance,
generate_compliance_overview_template,
)
from api.models import Provider
@@ -69,7 +69,7 @@ class TestCompliance:
load_prowler_compliance()
from api.compliance import PROWLER_CHECKS, PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE
from api.compliance import PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE, PROWLER_CHECKS
assert PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE == {
"template_key": "template_value"
@@ -218,10 +218,6 @@ class TestCompliance:
Description="Description of requirement 1",
Attributes=[],
Checks=["check1", "check2"],
Tactics=["tactic1"],
SubTechniques=["subtechnique1"],
Platforms=["platform1"],
TechniqueURL="https://example.com",
)
requirement2 = MagicMock(
Id="requirement2",
@@ -229,10 +225,6 @@ class TestCompliance:
Description="Description of requirement 2",
Attributes=[],
Checks=[],
Tactics=[],
SubTechniques=[],
Platforms=[],
TechniqueURL="",
)
compliance1 = MagicMock(
Requirements=[requirement1, requirement2],
@@ -255,10 +247,6 @@ class TestCompliance:
"requirement1": {
"name": "Requirement 1",
"description": "Description of requirement 1",
"tactics": ["tactic1"],
"subtechniques": ["subtechnique1"],
"platforms": ["platform1"],
"technique_url": "https://example.com",
"attributes": [],
"checks": {"check1": None, "check2": None},
"checks_status": {
@@ -272,10 +260,6 @@ class TestCompliance:
"requirement2": {
"name": "Requirement 2",
"description": "Description of requirement 2",
"tactics": [],
"subtechniques": [],
"platforms": [],
"technique_url": "",
"attributes": [],
"checks": {},
"checks_status": {
@@ -284,7 +268,7 @@ class TestCompliance:
"manual": 0,
"total": 0,
},
"status": "MANUAL",
"status": "PASS",
},
},
"requirements_status": {
@@ -3,13 +3,9 @@ from enum import Enum
from unittest.mock import patch
import pytest
from django.conf import settings
from freezegun import freeze_time
from api.db_utils import (
_should_create_index_on_partition,
batch_delete,
create_objects_in_batches,
enum_to_choices,
generate_random_token,
one_week_from_now,
@@ -142,88 +138,3 @@ class TestBatchDelete:
)
assert Provider.objects.all().count() == 0
assert summary == {"api.Provider": create_test_providers}
class TestShouldCreateIndexOnPartition:
@freeze_time("2025-05-15 00:00:00Z")
@pytest.mark.parametrize(
"partition_name, all_partitions, expected",
[
("any_name", True, True),
("findings_default", True, True),
("findings_2022_jan", True, True),
("foo_bar", False, True),
("findings_2025_MAY", False, True),
("findings_2025_may", False, True),
("findings_2025_jun", False, True),
("findings_2025_apr", False, False),
("findings_2025_xyz", False, True),
],
)
def test_partition_inclusion_logic(self, partition_name, all_partitions, expected):
assert (
_should_create_index_on_partition(partition_name, all_partitions)
is expected
)
@freeze_time("2025-05-15 00:00:00Z")
def test_invalid_date_components(self):
# even if regex matches but int conversion fails, we fallback True
# (e.g. year too big, month number parse error)
bad_name = "findings_99999_jan"
assert _should_create_index_on_partition(bad_name, False) is True
bad_name2 = "findings_2025_abc"
# abc not in month_map → fallback True
assert _should_create_index_on_partition(bad_name2, False) is True
@pytest.mark.django_db
class TestCreateObjectsInBatches:
@pytest.fixture
def tenant(self, tenants_fixture):
return tenants_fixture[0]
def make_provider_instances(self, tenant, count):
"""
Return a list of `count` unsaved Provider instances for the given tenant.
"""
base_uid = 1000
return [
Provider(
tenant=tenant,
uid=str(base_uid + i),
provider=Provider.ProviderChoices.AWS,
)
for i in range(count)
]
def test_exact_multiple_of_batch(self, tenant):
total = 6
batch_size = 3
objs = self.make_provider_instances(tenant, total)
create_objects_in_batches(str(tenant.id), Provider, objs, batch_size=batch_size)
qs = Provider.objects.filter(tenant=tenant)
assert qs.count() == total
def test_non_multiple_of_batch(self, tenant):
total = 7
batch_size = 3
objs = self.make_provider_instances(tenant, total)
create_objects_in_batches(str(tenant.id), Provider, objs, batch_size=batch_size)
qs = Provider.objects.filter(tenant=tenant)
assert qs.count() == total
def test_batch_size_default(self, tenant):
default_size = settings.DJANGO_DELETION_BATCH_SIZE
total = default_size + 2
objs = self.make_provider_instances(tenant, total)
create_objects_in_batches(str(tenant.id), Provider, objs)
qs = Provider.objects.filter(tenant=tenant)
assert qs.count() == total
-379
View File
@@ -1,379 +0,0 @@
import json
from uuid import uuid4
import pytest
from django_celery_results.models import TaskResult
from rest_framework import status
from rest_framework.response import Response
from api.exceptions import (
TaskFailedException,
TaskInProgressException,
TaskNotFoundException,
)
from api.models import Task, User
from api.rls import Tenant
from api.v1.mixins import PaginateByPkMixin, TaskManagementMixin
@pytest.mark.django_db
class TestPaginateByPkMixin:
@pytest.fixture
def tenant(self):
return Tenant.objects.create(name="Test Tenant")
@pytest.fixture
def users(self, tenant):
# Create 5 users with proper email field
users = []
for i in range(5):
user = User.objects.create(email=f"user{i}@example.com", name=f"User {i}")
users.append(user)
return users
class DummyView(PaginateByPkMixin):
def __init__(self, page):
self._page = page
def paginate_queryset(self, qs):
return self._page
def get_serializer(self, queryset, many):
class S:
def __init__(self, data):
# serialize to list of ids
self.data = [obj.id for obj in data] if many else queryset.id
return S(queryset)
def get_paginated_response(self, data):
return Response({"results": data}, status=status.HTTP_200_OK)
def test_no_pagination(self, users):
base_qs = User.objects.all().order_by("id")
view = self.DummyView(page=None)
resp = view.paginate_by_pk(
request=None, base_queryset=base_qs, manager=User.objects
)
# since no pagination, should return all ids in order
expected = [u.id for u in base_qs]
assert isinstance(resp, Response)
assert resp.data == expected
def test_with_pagination(self, users):
base_qs = User.objects.all().order_by("id")
# simulate paging to first 2 ids
page = [base_qs[1].id, base_qs[3].id]
view = self.DummyView(page=page)
resp = view.paginate_by_pk(
request=None, base_queryset=base_qs, manager=User.objects
)
# should fetch only those two users, in the same order as page
assert resp.status_code == status.HTTP_200_OK
assert resp.data == {"results": page}
@pytest.mark.django_db
class TestTaskManagementMixin:
class DummyView(TaskManagementMixin):
pass
@pytest.fixture
def tenant(self):
return Tenant.objects.create(name="Test Tenant")
@pytest.fixture(autouse=True)
def cleanup(self):
Task.objects.all().delete()
TaskResult.objects.all().delete()
def test_no_task_and_no_taskresult_raises_not_found(self):
view = self.DummyView()
with pytest.raises(TaskNotFoundException):
view.check_task_status("task_xyz", {"foo": "bar"})
def test_no_task_and_no_taskresult_returns_none_when_not_raising(self):
view = self.DummyView()
result = view.check_task_status(
"task_xyz", {"foo": "bar"}, raise_on_not_found=False
)
assert result is None
def test_taskresult_pending_raises_in_progress(self):
task_kwargs = {"foo": "bar"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="task_xyz",
task_kwargs=json.dumps(task_kwargs),
status="PENDING",
)
view = self.DummyView()
with pytest.raises(TaskInProgressException) as excinfo:
view.check_task_status("task_xyz", task_kwargs, raise_on_not_found=False)
assert hasattr(excinfo.value, "task_result")
assert excinfo.value.task_result == tr
def test_taskresult_started_raises_in_progress(self):
task_kwargs = {"foo": "bar"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="task_xyz",
task_kwargs=json.dumps(task_kwargs),
status="STARTED",
)
view = self.DummyView()
with pytest.raises(TaskInProgressException) as excinfo:
view.check_task_status("task_xyz", task_kwargs, raise_on_not_found=False)
assert hasattr(excinfo.value, "task_result")
assert excinfo.value.task_result == tr
def test_taskresult_progress_raises_in_progress(self):
task_kwargs = {"foo": "bar"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="task_xyz",
task_kwargs=json.dumps(task_kwargs),
status="PROGRESS",
)
view = self.DummyView()
with pytest.raises(TaskInProgressException) as excinfo:
view.check_task_status("task_xyz", task_kwargs, raise_on_not_found=False)
assert hasattr(excinfo.value, "task_result")
assert excinfo.value.task_result == tr
def test_taskresult_failure_raises_failed(self):
task_kwargs = {"a": 1}
TaskResult.objects.create(
task_id=str(uuid4()),
task_name="task_fail",
task_kwargs=json.dumps(task_kwargs),
status="FAILURE",
)
view = self.DummyView()
with pytest.raises(TaskFailedException):
view.check_task_status("task_fail", task_kwargs, raise_on_not_found=False)
def test_taskresult_failure_returns_none_when_not_raising(self):
task_kwargs = {"a": 1}
TaskResult.objects.create(
task_id=str(uuid4()),
task_name="task_fail",
task_kwargs=json.dumps(task_kwargs),
status="FAILURE",
)
view = self.DummyView()
result = view.check_task_status(
"task_fail", task_kwargs, raise_on_failed=False, raise_on_not_found=False
)
assert result is None
def test_taskresult_success_returns_none(self):
task_kwargs = {"x": 2}
TaskResult.objects.create(
task_id=str(uuid4()),
task_name="task_ok",
task_kwargs=json.dumps(task_kwargs),
status="SUCCESS",
)
view = self.DummyView()
# should not raise, and returns None
assert (
view.check_task_status("task_ok", task_kwargs, raise_on_not_found=False)
is None
)
def test_taskresult_revoked_returns_none(self):
task_kwargs = {"x": 2}
TaskResult.objects.create(
task_id=str(uuid4()),
task_name="task_revoked",
task_kwargs=json.dumps(task_kwargs),
status="REVOKED",
)
view = self.DummyView()
# should not raise, and returns None
assert (
view.check_task_status(
"task_revoked", task_kwargs, raise_on_not_found=False
)
is None
)
def test_task_with_failed_status_raises_failed(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="FAILURE",
)
task = Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
with pytest.raises(TaskFailedException) as excinfo:
view.check_task_status("scan_task", task_kwargs)
# Check that the exception contains the expected task
assert hasattr(excinfo.value, "task")
assert excinfo.value.task == task
def test_task_with_cancelled_status_raises_failed(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="REVOKED",
)
task = Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
with pytest.raises(TaskFailedException) as excinfo:
view.check_task_status("scan_task", task_kwargs)
# Check that the exception contains the expected task
assert hasattr(excinfo.value, "task")
assert excinfo.value.task == task
def test_task_with_failed_status_returns_task_when_not_raising(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="FAILURE",
)
task = Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
result = view.check_task_status("scan_task", task_kwargs, raise_on_failed=False)
assert result == task
def test_task_with_completed_status_returns_none(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="SUCCESS",
)
Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
result = view.check_task_status("scan_task", task_kwargs)
assert result is None
def test_task_with_executing_status_returns_task(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="STARTED",
)
task = Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
result = view.check_task_status("scan_task", task_kwargs)
assert result is not None
assert result.pk == task.pk
def test_task_with_pending_status_returns_task(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="PENDING",
)
task = Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
result = view.check_task_status("scan_task", task_kwargs)
assert result is not None
assert result.pk == task.pk
def test_get_task_response_if_running_returns_none_for_completed_task(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="SUCCESS",
)
Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
result = view.get_task_response_if_running("scan_task", task_kwargs)
assert result is None
def test_get_task_response_if_running_returns_none_for_no_task(self):
view = self.DummyView()
result = view.get_task_response_if_running(
"nonexistent", {"foo": "bar"}, raise_on_not_found=False
)
assert result is None
def test_get_task_response_if_running_returns_202_for_executing_task(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="STARTED",
)
task = Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
result = view.get_task_response_if_running("scan_task", task_kwargs)
assert isinstance(result, Response)
assert result.status_code == status.HTTP_202_ACCEPTED
assert "Content-Location" in result.headers
# The response should contain the serialized task data
assert result.data is not None
assert "id" in result.data
assert str(result.data["id"]) == str(task.id)
def test_get_task_response_if_running_returns_none_for_available_task(self, tenant):
task_kwargs = {"provider_id": "test"}
tr = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs),
status="PENDING",
)
Task.objects.create(tenant=tenant, task_runner_task=tr)
view = self.DummyView()
result = view.get_task_response_if_running("scan_task", task_kwargs)
# PENDING maps to AVAILABLE, which is not EXECUTING, so should return None
assert result is None
def test_kwargs_filtering_works_correctly(self, tenant):
# Create tasks with different kwargs
task_kwargs_1 = {"provider_id": "test1", "scan_type": "full"}
task_kwargs_2 = {"provider_id": "test2", "scan_type": "quick"}
tr1 = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs_1),
status="STARTED",
)
tr2 = TaskResult.objects.create(
task_id=str(uuid4()),
task_name="scan_task",
task_kwargs=json.dumps(task_kwargs_2),
status="STARTED",
)
task1 = Task.objects.create(tenant=tenant, task_runner_task=tr1)
task2 = Task.objects.create(tenant=tenant, task_runner_task=tr2)
view = self.DummyView()
# Should find task1 when searching for its kwargs
result1 = view.check_task_status("scan_task", {"provider_id": "test1"})
assert result1 is not None
assert result1.pk == task1.pk
# Should find task2 when searching for its kwargs
result2 = view.check_task_status("scan_task", {"provider_id": "test2"})
assert result2 is not None
assert result2.pk == task2.pk
# Should not find anything when searching for non-existent kwargs
result3 = view.check_task_status(
"scan_task", {"provider_id": "test3"}, raise_on_not_found=False
)
assert result3 is None
+28 -139
View File
@@ -1,6 +1,9 @@
import uuid
from unittest import mock
import pytest
from api.models import Resource, ResourceTag
from api.models import Resource, ResourceTag, Task
@pytest.mark.django_db
@@ -122,147 +125,33 @@ class TestResourceModel:
# assert Finding.objects.filter(uid=long_uid).exists()
# @pytest.mark.django_db
# class TestSAMLConfigurationModel:
# VALID_METADATA = """<?xml version='1.0' encoding='UTF-8'?>
# <md:EntityDescriptor entityID='TEST' xmlns:md='urn:oasis:names:tc:SAML:2.0:metadata'>
# <md:IDPSSODescriptor WantAuthnRequestsSigned='false' protocolSupportEnumeration='urn:oasis:names:tc:SAML:2.0:protocol'>
# <md:KeyDescriptor use='signing'>
# <ds:KeyInfo xmlns:ds='http://www.w3.org/2000/09/xmldsig#'>
# <ds:X509Data>
# <ds:X509Certificate>FAKECERTDATA</ds:X509Certificate>
# </ds:X509Data>
# </ds:KeyInfo>
# </md:KeyDescriptor>
# <md:SingleSignOnService Binding='urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST' Location='https://idp.test/sso'/>
# </md:IDPSSODescriptor>
# </md:EntityDescriptor>
# """
@pytest.mark.django_db
class TestTaskManager:
def test_get_with_retry_success(self):
task_id = uuid.uuid4()
call_counter = {"count": 0}
# def test_creates_valid_configuration(self):
# tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant A")
# config = SAMLConfiguration.objects.using(MainRouter.admin_db).create(
# email_domain="ssoexample.com",
# metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
# tenant=tenant,
# )
def side_effect(*args, **kwargs):
if call_counter["count"] < 2:
call_counter["count"] += 1
raise Task.DoesNotExist()
return Task(id=task_id)
# assert config.email_domain == "ssoexample.com"
# assert SocialApp.objects.filter(client_id="ssoexample.com").exists()
with mock.patch.object(Task.objects, "get", side_effect=side_effect):
task = Task.objects.get_with_retry(
task_id, max_retries=5, delay_seconds=0.01
)
# def test_email_domain_with_at_symbol_fails(self):
# tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant B")
# config = SAMLConfiguration(
# email_domain="invalid@domain.com",
# metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
# tenant=tenant,
# )
assert task.id == task_id
assert call_counter["count"] == 2
# with pytest.raises(ValidationError) as exc_info:
# config.clean()
def test_get_with_retry_fail(self):
non_existent_id = uuid.uuid4()
# errors = exc_info.value.message_dict
# assert "email_domain" in errors
# assert "Domain must not contain @" in errors["email_domain"][0]
with mock.patch.object(Task.objects, "get", side_effect=Task.DoesNotExist):
with pytest.raises(Task.DoesNotExist) as excinfo:
Task.objects.get_with_retry(
non_existent_id, max_retries=3, delay_seconds=0.01
)
# def test_duplicate_email_domain_fails(self):
# tenant1 = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant C1")
# tenant2 = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant C2")
# SAMLConfiguration.objects.using(MainRouter.admin_db).create(
# email_domain="duplicate.com",
# metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
# tenant=tenant1,
# )
# config = SAMLConfiguration(
# email_domain="duplicate.com",
# metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
# tenant=tenant2,
# )
# with pytest.raises(ValidationError) as exc_info:
# config.clean()
# errors = exc_info.value.message_dict
# assert "tenant" in errors
# assert "There is a problem with your email domain." in errors["tenant"][0]
# def test_duplicate_tenant_config_fails(self):
# tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant D")
# SAMLConfiguration.objects.using(MainRouter.admin_db).create(
# email_domain="unique1.com",
# metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
# tenant=tenant,
# )
# config = SAMLConfiguration(
# email_domain="unique2.com",
# metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
# tenant=tenant,
# )
# with pytest.raises(ValidationError) as exc_info:
# config.clean()
# errors = exc_info.value.message_dict
# assert "tenant" in errors
# assert (
# "A SAML configuration already exists for this tenant."
# in errors["tenant"][0]
# )
# def test_invalid_metadata_xml_fails(self):
# tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant E")
# config = SAMLConfiguration(
# email_domain="brokenxml.com",
# metadata_xml="<bad<xml>",
# tenant=tenant,
# )
# with pytest.raises(ValidationError) as exc_info:
# config._parse_metadata()
# errors = exc_info.value.message_dict
# assert "metadata_xml" in errors
# assert "Invalid XML" in errors["metadata_xml"][0]
# assert "not well-formed" in errors["metadata_xml"][0]
# def test_metadata_missing_sso_fails(self):
# tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant F")
# xml = """<md:EntityDescriptor entityID="x" xmlns:md="urn:oasis:names:tc:SAML:2.0:metadata">
# <md:IDPSSODescriptor></md:IDPSSODescriptor>
# </md:EntityDescriptor>"""
# config = SAMLConfiguration(
# email_domain="nosso.com",
# metadata_xml=xml,
# tenant=tenant,
# )
# with pytest.raises(ValidationError) as exc_info:
# config._parse_metadata()
# errors = exc_info.value.message_dict
# assert "metadata_xml" in errors
# assert "Missing SingleSignOnService" in errors["metadata_xml"][0]
# def test_metadata_missing_certificate_fails(self):
# tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant G")
# xml = """<md:EntityDescriptor entityID="x" xmlns:md="urn:oasis:names:tc:SAML:2.0:metadata">
# <md:IDPSSODescriptor>
# <md:SingleSignOnService Binding="urn:oasis:names:tc:SAML:2.0:bindings:HTTP-Redirect" Location="https://example.com/sso"/>
# </md:IDPSSODescriptor>
# </md:EntityDescriptor>"""
# config = SAMLConfiguration(
# email_domain="nocert.com",
# metadata_xml=xml,
# tenant=tenant,
# )
# with pytest.raises(ValidationError) as exc_info:
# config._parse_metadata()
# errors = exc_info.value.message_dict
# assert "metadata_xml" in errors
# assert "X509Certificate" in errors["metadata_xml"][0]
assert str(non_existent_id) in str(excinfo.value)
-80
View File
@@ -1,80 +0,0 @@
import logging
from unittest.mock import MagicMock
from config.settings.sentry import before_send
def test_before_send_ignores_log_with_ignored_exception():
"""Test that before_send ignores logs containing ignored exceptions."""
log_record = MagicMock()
log_record.msg = "Provider kubernetes is not connected"
log_record.levelno = logging.ERROR # 40
hint = {"log_record": log_record}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was dropped (None returned)
assert result is None
def test_before_send_ignores_exception_with_ignored_exception():
"""Test that before_send ignores exceptions containing ignored exceptions."""
exc_info = (Exception, Exception("Provider kubernetes is not connected"), None)
hint = {"exc_info": exc_info}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was dropped (None returned)
assert result is None
def test_before_send_passes_through_non_ignored_log():
"""Test that before_send passes through logs that don't contain ignored exceptions."""
log_record = MagicMock()
log_record.msg = "Some other error message"
log_record.levelno = logging.ERROR # 40
hint = {"log_record": log_record}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was passed through
assert result == event
def test_before_send_passes_through_non_ignored_exception():
"""Test that before_send passes through exceptions that don't contain ignored exceptions."""
exc_info = (Exception, Exception("Some other error message"), None)
hint = {"exc_info": exc_info}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was passed through
assert result == event
def test_before_send_handles_warning_level():
"""Test that before_send handles warning level logs."""
log_record = MagicMock()
log_record.msg = "Provider kubernetes is not connected"
log_record.levelno = logging.WARNING # 30
hint = {"log_record": log_record}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was dropped (None returned)
assert result is None
File diff suppressed because it is too large Load Diff
-189
View File
@@ -1,16 +1,5 @@
from django.urls import reverse
from django_celery_results.models import TaskResult
from rest_framework import status
from rest_framework.response import Response
from api.exceptions import (
TaskFailedException,
TaskInProgressException,
TaskNotFoundException,
)
from api.models import StateChoices, Task
from api.v1.serializers import TaskSerializer
class PaginateByPkMixin:
"""
@@ -42,181 +31,3 @@ class PaginateByPkMixin:
serialized = self.get_serializer(queryset, many=True).data
return self.get_paginated_response(serialized)
class TaskManagementMixin:
"""
Mixin to manage task status checking.
This mixin provides functionality to check if a task with specific parameters
is running, completed, failed, or doesn't exist. It returns the task when running
and raises specific exceptions for failed/not found scenarios that can be handled
at the view level.
"""
def check_task_status(
self,
task_name: str,
task_kwargs: dict,
raise_on_failed: bool = True,
raise_on_not_found: bool = True,
) -> Task | None:
"""
Check the status of a task with given name and kwargs.
This method first checks for a related Task object, and if not found,
checks TaskResult directly. If a TaskResult is found and running but
there's no related Task, it raises TaskInProgressException.
Args:
task_name (str): The name of the task to check
task_kwargs (dict): The kwargs to match against the task
raise_on_failed (bool): Whether to raise exception if task failed
raise_on_not_found (bool): Whether to raise exception if task not found
Returns:
Task | None: The task instance if found (regardless of state), None if not found and raise_on_not_found=False
Raises:
TaskFailedException: If task failed and raise_on_failed=True
TaskNotFoundException: If task not found and raise_on_not_found=True
TaskInProgressException: If task is running but no related Task object exists
"""
# First, try to find a Task object with related TaskResult
try:
# Build the filter for task kwargs
task_filter = {
"task_runner_task__task_name": task_name,
}
# Add kwargs filters - we need to check if the task kwargs contain our parameters
for key, value in task_kwargs.items():
task_filter["task_runner_task__task_kwargs__contains"] = str(value)
task = (
Task.objects.filter(**task_filter)
.select_related("task_runner_task")
.order_by("-inserted_at")
.first()
)
if task:
# Get task state using the same logic as TaskSerializer
task_state_mapping = {
"PENDING": StateChoices.AVAILABLE,
"STARTED": StateChoices.EXECUTING,
"PROGRESS": StateChoices.EXECUTING,
"SUCCESS": StateChoices.COMPLETED,
"FAILURE": StateChoices.FAILED,
"REVOKED": StateChoices.CANCELLED,
}
celery_status = (
task.task_runner_task.status if task.task_runner_task else None
)
task_state = task_state_mapping.get(
celery_status or "", StateChoices.AVAILABLE
)
# Check task state and raise exceptions accordingly
if task_state in (StateChoices.FAILED, StateChoices.CANCELLED):
if raise_on_failed:
raise TaskFailedException(task=task)
return task
elif task_state == StateChoices.COMPLETED:
return None
return task
except Task.DoesNotExist:
pass
# If no Task found, check TaskResult directly
try:
# Build the filter for TaskResult
task_result_filter = {
"task_name": task_name,
}
# Add kwargs filters - check if the task kwargs contain our parameters
for key, value in task_kwargs.items():
task_result_filter["task_kwargs__contains"] = str(value)
task_result = (
TaskResult.objects.filter(**task_result_filter)
.order_by("-date_created")
.first()
)
if task_result:
# Check if the TaskResult indicates a running task
if task_result.status in ["PENDING", "STARTED", "PROGRESS"]:
# Task is running but no related Task object exists
raise TaskInProgressException(task_result=task_result)
elif task_result.status == "FAILURE":
if raise_on_failed:
raise TaskFailedException(task=None)
# For other statuses (SUCCESS, REVOKED), we don't have a Task to return,
# so we treat it as not found
except TaskResult.DoesNotExist:
pass
# No task found at all
if raise_on_not_found:
raise TaskNotFoundException()
return None
def get_task_response_if_running(
self,
task_name: str,
task_kwargs: dict,
raise_on_failed: bool = True,
raise_on_not_found: bool = True,
) -> Response | None:
"""
Get a 202 response with task details if the task is currently running.
This method is useful for endpoints that should return task status when
a background task is in progress, similar to the compliance overview endpoints.
Args:
task_name (str): The name of the task to check
task_kwargs (dict): The kwargs to match against the task
Returns:
Response | None: 202 response with task details if running, None otherwise
"""
task = self.check_task_status(
task_name=task_name,
task_kwargs=task_kwargs,
raise_on_failed=raise_on_failed,
raise_on_not_found=raise_on_not_found,
)
if not task:
return None
# Get task state
task_state_mapping = {
"PENDING": StateChoices.AVAILABLE,
"STARTED": StateChoices.EXECUTING,
"PROGRESS": StateChoices.EXECUTING,
"SUCCESS": StateChoices.COMPLETED,
"FAILURE": StateChoices.FAILED,
"REVOKED": StateChoices.CANCELLED,
}
celery_status = task.task_runner_task.status if task.task_runner_task else None
task_state = task_state_mapping.get(celery_status or "", StateChoices.AVAILABLE)
if task_state == StateChoices.EXECUTING:
self.response_serializer_class = TaskSerializer
serializer = TaskSerializer(task)
return Response(
data=serializer.data,
status=status.HTTP_202_ACCEPTED,
headers={
"Content-Location": reverse("task-detail", kwargs={"pk": task.id})
},
)
+114 -201
View File
@@ -14,12 +14,12 @@ from rest_framework_simplejwt.serializers import TokenObtainPairSerializer
from rest_framework_simplejwt.tokens import RefreshToken
from api.models import (
ComplianceOverview,
Finding,
Integration,
IntegrationProviderRelationship,
Invitation,
InvitationRoleRelationship,
LighthouseConfiguration,
Membership,
Provider,
ProviderGroup,
@@ -31,7 +31,6 @@ from api.models import (
RoleProviderGroupRelationship,
Scan,
StateChoices,
StatusChoices,
Task,
User,
UserRoleRelationship,
@@ -1199,8 +1198,8 @@ class M365ProviderSecret(serializers.Serializer):
client_id = serializers.CharField()
client_secret = serializers.CharField()
tenant_id = serializers.CharField()
user = serializers.EmailField(required=False)
password = serializers.CharField(required=False)
user = serializers.EmailField()
password = serializers.CharField()
class Meta:
resource_name = "provider-secrets"
@@ -1680,63 +1679,130 @@ class RoleProviderGroupRelationshipSerializer(RLSSerializer, BaseWriteSerializer
# Compliance overview
class ComplianceOverviewSerializer(serializers.Serializer):
class ComplianceOverviewSerializer(RLSSerializer):
"""
Serializer for compliance requirement status aggregated by compliance framework.
This serializer is used to format aggregated compliance framework data,
providing counts of passed, failed, and manual requirements along with
an overall global status for each framework.
Serializer for the ComplianceOverview model.
"""
# Add ID field which will be used for resource identification
id = serializers.CharField()
framework = serializers.CharField()
version = serializers.CharField()
requirements_passed = serializers.IntegerField()
requirements_failed = serializers.IntegerField()
requirements_manual = serializers.IntegerField()
total_requirements = serializers.IntegerField()
requirements_status = serializers.SerializerMethodField(
read_only=True, method_name="get_requirements_status"
)
provider_type = serializers.SerializerMethodField(read_only=True)
class JSONAPIMeta:
resource_name = "compliance-overviews"
class Meta:
model = ComplianceOverview
fields = [
"id",
"inserted_at",
"compliance_id",
"framework",
"version",
"requirements_status",
"region",
"provider_type",
"scan",
"url",
]
@extend_schema_field(
{
"type": "object",
"properties": {
"passed": {"type": "integer"},
"failed": {"type": "integer"},
"manual": {"type": "integer"},
"total": {"type": "integer"},
},
}
)
def get_requirements_status(self, obj):
return {
"passed": obj.requirements_passed,
"failed": obj.requirements_failed,
"manual": obj.requirements_manual,
"total": obj.total_requirements,
}
@extend_schema_field(serializers.CharField(allow_null=True))
def get_provider_type(self, obj):
"""
Retrieves the provider_type from scan.provider.provider_type.
"""
try:
return obj.scan.provider.provider
except AttributeError:
return None
class ComplianceOverviewDetailSerializer(serializers.Serializer):
"""
Serializer for detailed compliance requirement information.
class ComplianceOverviewFullSerializer(ComplianceOverviewSerializer):
requirements = serializers.SerializerMethodField(read_only=True)
This serializer formats the aggregated requirement data, showing detailed status
and counts for each requirement across all regions.
"""
class Meta(ComplianceOverviewSerializer.Meta):
fields = ComplianceOverviewSerializer.Meta.fields + [
"description",
"requirements",
]
id = serializers.CharField()
framework = serializers.CharField()
version = serializers.CharField()
description = serializers.CharField()
status = serializers.ChoiceField(choices=StatusChoices.choices)
class JSONAPIMeta:
resource_name = "compliance-requirements-details"
class ComplianceOverviewAttributesSerializer(serializers.Serializer):
id = serializers.CharField()
framework_description = serializers.CharField()
name = serializers.CharField()
framework = serializers.CharField()
version = serializers.CharField()
description = serializers.CharField()
attributes = serializers.JSONField()
class JSONAPIMeta:
resource_name = "compliance-requirements-attributes"
@extend_schema_field(
{
"type": "object",
"properties": {
"requirement_id": {
"type": "object",
"properties": {
"name": {"type": "string"},
"checks": {
"type": "object",
"properties": {
"check_name": {
"type": "object",
"properties": {
"status": {
"type": "string",
"enum": ["PASS", "FAIL", None],
},
},
}
},
"description": "Each key in the 'checks' object is a check name, with values as "
"'PASS', 'FAIL', or null.",
},
"status": {
"type": "string",
"enum": ["PASS", "FAIL", "MANUAL"],
},
"attributes": {
"type": "array",
"items": {
"type": "object",
},
},
"description": {"type": "string"},
"checks_status": {
"type": "object",
"properties": {
"total": {"type": "integer"},
"pass": {"type": "integer"},
"fail": {"type": "integer"},
"manual": {"type": "integer"},
},
},
},
}
},
}
)
def get_requirements(self, obj):
"""
Returns the detailed structure of requirements.
"""
return obj.requirements
class ComplianceOverviewMetadataSerializer(serializers.Serializer):
regions = serializers.ListField(child=serializers.CharField(), allow_empty=True)
class JSONAPIMeta:
class Meta:
resource_name = "compliance-overviews-metadata"
@@ -2062,156 +2128,3 @@ class IntegrationUpdateSerializer(BaseWriteIntegrationSerializer):
IntegrationProviderRelationship.objects.bulk_create(new_relationships)
return super().update(instance, validated_data)
# SSO
# class SamlInitiateSerializer(serializers.Serializer):
# email_domain = serializers.CharField()
# class JSONAPIMeta:
# resource_name = "saml-initiate"
# class SamlMetadataSerializer(serializers.Serializer):
# class JSONAPIMeta:
# resource_name = "saml-meta"
# class SAMLConfigurationSerializer(RLSSerializer):
# class Meta:
# model = SAMLConfiguration
# fields = ["id", "email_domain", "metadata_xml", "created_at", "updated_at"]
# read_only_fields = ["id", "created_at", "updated_at"]
class LighthouseConfigSerializer(RLSSerializer):
"""
Serializer for the LighthouseConfig model.
"""
api_key = serializers.CharField(required=False)
class Meta:
model = LighthouseConfiguration
fields = [
"id",
"name",
"api_key",
"model",
"temperature",
"max_tokens",
"business_context",
"is_active",
"inserted_at",
"updated_at",
"url",
]
extra_kwargs = {
"id": {"read_only": True},
"is_active": {"read_only": True},
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
}
def to_representation(self, instance):
data = super().to_representation(instance)
# Check if api_key is specifically requested in fields param
fields_param = self.context.get("request", None) and self.context[
"request"
].query_params.get("fields[lighthouse-config]", "")
if fields_param == "api_key":
# Return decrypted key if specifically requested
data["api_key"] = instance.api_key_decoded if instance.api_key else None
else:
# Return masked key for general requests
data["api_key"] = "*" * len(instance.api_key) if instance.api_key else None
return data
class LighthouseConfigCreateSerializer(RLSSerializer, BaseWriteSerializer):
"""Serializer for creating new Lighthouse configurations."""
api_key = serializers.CharField(write_only=True, required=True)
class Meta:
model = LighthouseConfiguration
fields = [
"id",
"name",
"api_key",
"model",
"temperature",
"max_tokens",
"business_context",
"is_active",
"inserted_at",
"updated_at",
]
extra_kwargs = {
"id": {"read_only": True},
"is_active": {"read_only": True},
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
}
def validate(self, attrs):
tenant_id = self.context.get("request").tenant_id
if LighthouseConfiguration.objects.filter(tenant_id=tenant_id).exists():
raise serializers.ValidationError(
{
"tenant_id": "Lighthouse configuration already exists for this tenant."
}
)
return super().validate(attrs)
def create(self, validated_data):
api_key = validated_data.pop("api_key")
instance = super().create(validated_data)
instance.api_key_decoded = api_key
instance.save()
return instance
def to_representation(self, instance):
data = super().to_representation(instance)
# Always mask the API key in the response
data["api_key"] = "*" * len(instance.api_key) if instance.api_key else None
return data
class LighthouseConfigUpdateSerializer(BaseWriteSerializer):
"""
Serializer for updating LighthouseConfig instances.
"""
api_key = serializers.CharField(write_only=True, required=False)
class Meta:
model = LighthouseConfiguration
fields = [
"id",
"name",
"api_key",
"model",
"temperature",
"max_tokens",
"business_context",
"is_active",
]
extra_kwargs = {
"id": {"read_only": True},
"is_active": {"read_only": True},
"name": {"required": False},
"model": {"required": False},
"temperature": {"required": False},
"max_tokens": {"required": False},
}
def update(self, instance, validated_data):
api_key = validated_data.pop("api_key", None)
instance = super().update(instance, validated_data)
if api_key:
instance.api_key_decoded = api_key
instance.save()
return instance
-25
View File
@@ -13,7 +13,6 @@ from api.v1.views import (
IntegrationViewSet,
InvitationAcceptViewSet,
InvitationViewSet,
LighthouseConfigViewSet,
MembershipViewSet,
OverviewViewSet,
ProviderGroupProvidersRelationshipView,
@@ -50,12 +49,6 @@ router.register(
router.register(r"overviews", OverviewViewSet, basename="overview")
router.register(r"schedules", ScheduleViewSet, basename="schedule")
router.register(r"integrations", IntegrationViewSet, basename="integration")
# router.register(r"saml-config", SAMLConfigurationViewSet, basename="saml-config")
router.register(
r"lighthouse-configurations",
LighthouseConfigViewSet,
basename="lighthouseconfiguration",
)
tenants_router = routers.NestedSimpleRouter(router, r"tenants", lookup="tenant")
tenants_router.register(
@@ -119,24 +112,6 @@ urlpatterns = [
),
name="provider_group-providers-relationship",
),
# API endpoint to start SAML SSO flow (WIP)
# path(
# "auth/saml/initiate/", SAMLInitiateAPIView.as_view(), name="api_saml_initiate"
# ),
# # Custom SAML endpoints (must come before allauth.urls) (WIP)
# path(
# "accounts/saml/<organization_slug>/login/",
# CustomSAMLLoginView.as_view(),
# name="saml_login",
# ),
# path(
# "accounts/saml/<organization_slug>/acs/finish/",
# TenantFinishACSView.as_view(),
# name="saml_finish_acs",
# ),
# Allauth SAML endpoints for tenants (WIP)
# path("accounts/", include("allauth.urls")),
# path("tokens/saml", SAMLTokenValidateView.as_view(), name="token-saml"),
path("tokens/google", GoogleSocialLoginView.as_view(), name="token-google"),
path("tokens/github", GithubSocialLoginView.as_view(), name="token-github"),
path("", include(router.urls)),
+53 -706
View File
@@ -17,7 +17,7 @@ from django.conf import settings as django_settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import Count, Exists, F, OuterRef, Prefetch, Q, Sum
from django.db.models import Count, Exists, F, OuterRef, Prefetch, Q, Subquery, Sum
from django.db.models.functions import Coalesce
from django.http import HttpResponse
from django.urls import reverse
@@ -26,10 +26,10 @@ from django.utils.decorators import method_decorator
from django.views.decorators.cache import cache_control
from django_celery_beat.models import PeriodicTask
from drf_spectacular.settings import spectacular_settings
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import (
OpenApiParameter,
OpenApiResponse,
OpenApiTypes,
extend_schema,
extend_schema_view,
)
@@ -51,7 +51,6 @@ from tasks.beat import schedule_provider_scan
from tasks.jobs.export import get_s3_client
from tasks.tasks import (
backfill_scan_resource_summaries_task,
check_lighthouse_connection_task,
check_provider_connection_task,
delete_provider_task,
delete_tenant_task,
@@ -59,12 +58,8 @@ from tasks.tasks import (
)
from api.base_views import BaseRLSViewSet, BaseTenantViewset, BaseUserViewset
from api.compliance import (
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE,
get_compliance_frameworks,
)
from api.compliance import get_compliance_frameworks
from api.db_router import MainRouter
from api.exceptions import TaskFailedException
from api.filters import (
ComplianceOverviewFilter,
FindingFilter,
@@ -86,11 +81,9 @@ from api.filters import (
)
from api.models import (
ComplianceOverview,
ComplianceRequirementOverview,
Finding,
Integration,
Invitation,
LighthouseConfiguration,
Membership,
Provider,
ProviderGroup,
@@ -118,10 +111,9 @@ from api.utils import (
validate_invitation,
)
from api.uuid_utils import datetime_to_uuid7, uuid7_start
from api.v1.mixins import PaginateByPkMixin, TaskManagementMixin
from api.v1.mixins import PaginateByPkMixin
from api.v1.serializers import (
ComplianceOverviewAttributesSerializer,
ComplianceOverviewDetailSerializer,
ComplianceOverviewFullSerializer,
ComplianceOverviewMetadataSerializer,
ComplianceOverviewSerializer,
FindingDynamicFilterSerializer,
@@ -134,9 +126,6 @@ from api.v1.serializers import (
InvitationCreateSerializer,
InvitationSerializer,
InvitationUpdateSerializer,
LighthouseConfigCreateSerializer,
LighthouseConfigSerializer,
LighthouseConfigUpdateSerializer,
MembershipSerializer,
OverviewFindingSerializer,
OverviewProviderSerializer,
@@ -332,11 +321,6 @@ class SchemaView(SpectacularAPIView):
"description": "Endpoints for managing third-party integrations, including registration, configuration,"
" retrieval, and deletion of integrations such as S3, JIRA, or other services.",
},
{
"name": "Lighthouse",
"description": "Endpoints for managing Lighthouse configurations, including creation, retrieval, "
"updating, and deletion of configurations such as OpenAI keys, models, and business context.",
},
]
return super().get(request, *args, **kwargs)
@@ -393,242 +377,6 @@ class GithubSocialLoginView(SocialLoginView):
return original_response
# @extend_schema(exclude=True)
# class SAMLTokenValidateView(GenericAPIView):
# resource_name = "tokens"
# http_method_names = ["post"]
# def post(self, request):
# token_id = request.query_params.get("id", "invalid")
# try:
# saml_token = SAMLToken.objects.using(MainRouter.admin_db).get(id=token_id)
# except SAMLToken.DoesNotExist:
# return Response({"detail": "Invalid token ID."}, status=404)
# if saml_token.is_expired():
# return Response({"detail": "Token expired."}, status=400)
# token_data = saml_token.token
# # Currently we don't store the tokens in the database, so we delete the token after use
# saml_token.delete()
# return Response(token_data, status=200)
# @extend_schema(exclude=True)
# class CustomSAMLLoginView(LoginView):
# def dispatch(self, request, *args, **kwargs):
# """
# Convert GET requests to POST to bypass allauth's confirmation screen.
# Why this is necessary:
# - django-allauth requires POST for social logins to prevent open redirect attacks
# - SAML login links typically use GET requests (e.g., <a href="...">)
# - This conversion allows seamless login without user-facing confirmation
# Security considerations:
# 1. Preserves CSRF protection: Original POST handling remains intact
# 2. Avoids global SOCIALACCOUNT_LOGIN_ON_GET=True which would:
# - Enable GET logins for ALL providers (security risk)
# - Potentially expose open redirect vulnerabilities
# 3. SAML payloads remain signed/encrypted regardless of HTTP method
# 4. No sensitive parameters are exposed in URLs (copied to POST body)
# This approach maintains security while providing better UX.
# """
# if request.method == "GET":
# # Convert GET to POST while preserving parameters
# request.method = "POST"
# # Safe because SAML validates signatures
# request.POST = request.GET.copy()
# return super().dispatch(request, *args, **kwargs)
# @extend_schema(exclude=True)
# class SAMLInitiateAPIView(GenericAPIView):
# serializer_class = SamlInitiateSerializer
# permission_classes = []
# def post(self, request, *args, **kwargs):
# # Validate the input payload and extract the domain
# serializer = self.get_serializer(data=request.data)
# serializer.is_valid(raise_exception=True)
# email = serializer.validated_data["email_domain"]
# domain = email.split("@", 1)[-1].lower()
# # Retrieve the SAML configuration for the given email domain
# try:
# check = SAMLDomainIndex.objects.get(email_domain=domain)
# with rls_transaction(str(check.tenant_id)):
# config = SAMLConfiguration.objects.get(tenant_id=str(check.tenant_id))
# except (SAMLDomainIndex.DoesNotExist, SAMLConfiguration.DoesNotExist):
# return Response(
# {"detail": "Unauthorized domain."}, status=status.HTTP_403_FORBIDDEN
# )
# # Check certificates are not empty (TODO: Validate certificates)
# # saml_public_cert = os.getenv("SAML_PUBLIC_CERT", "").strip()
# # saml_private_key = os.getenv("SAML_PRIVATE_KEY", "").strip()
# # if not saml_public_cert or not saml_private_key:
# # return Response(
# # {"detail": "SAML configuration is invalid: missing certificates."},
# # status=status.HTTP_403_FORBIDDEN,
# # )
# # Build the SAML login URL using the configured API host
# api_host = os.getenv("API_BASE_URL")
# login_path = reverse(
# "saml_login", kwargs={"organization_slug": config.email_domain}
# )
# login_url = urljoin(api_host, login_path)
# return redirect(login_url)
# @extend_schema_view(
# list=extend_schema(
# tags=["SAML"],
# summary="List all SSO configurations",
# description="Returns all the SAML-based SSO configurations associated with the current tenant.",
# ),
# retrieve=extend_schema(
# tags=["SAML"],
# summary="Retrieve SSO configuration details",
# description="Returns the details of a specific SAML configuration belonging to the current tenant.",
# ),
# create=extend_schema(
# tags=["SAML"],
# summary="Create the SSO configuration",
# description="Creates a new SAML SSO configuration for the current tenant, including email domain and metadata XML.",
# ),
# partial_update=extend_schema(
# tags=["SAML"],
# summary="Update the SSO configuration",
# description="Partially updates an existing SAML SSO configuration. Supports changes to email domain and metadata XML.",
# ),
# destroy=extend_schema(
# tags=["SAML"],
# summary="Delete the SSO configuration",
# description="Deletes an existing SAML SSO configuration associated with the current tenant.",
# ),
# )
# @method_decorator(CACHE_DECORATOR, name="retrieve")
# @method_decorator(CACHE_DECORATOR, name="list")
# class SAMLConfigurationViewSet(BaseRLSViewSet):
# """
# ViewSet for managing SAML SSO configurations per tenant.
# This endpoint allows authorized users to perform CRUD operations on SAMLConfiguration,
# which define how a tenant integrates with an external SAML Identity Provider (IdP).
# Typical use cases include:
# - Listing all existing configurations for auditing or UI display.
# - Retrieving a single configuration to show setup details.
# - Creating or updating a configuration to onboard or modify SAML integration.
# - Deleting a configuration when deactivating SAML for a tenant.
# """
# serializer_class = SAMLConfigurationSerializer
# required_permissions = [Permissions.MANAGE_INTEGRATIONS]
# queryset = SAMLConfiguration.objects.all()
# def get_queryset(self):
# # If called during schema generation, return an empty queryset
# if getattr(self, "swagger_fake_view", False):
# return SAMLConfiguration.objects.none()
# return SAMLConfiguration.objects.filter(tenant=self.request.tenant_id)
# class TenantFinishACSView(FinishACSView):
# def dispatch(self, request, organization_slug):
# response = super().dispatch(request, organization_slug)
# user = getattr(request, "user", None)
# if not user or not user.is_authenticated:
# return response
# try:
# social_app = SocialApp.objects.get(
# provider="saml", client_id=organization_slug
# )
# social_account = SocialAccount.objects.get(
# user=user, provider=social_app.provider
# )
# except (SocialApp.DoesNotExist, SocialAccount.DoesNotExist):
# return response
# extra = social_account.extra_data
# user.first_name = (
# extra.get("firstName", [""])[0] if extra.get("firstName") else ""
# )
# user.last_name = extra.get("lastName", [""])[0] if extra.get("lastName") else ""
# user.company_name = (
# extra.get("organization", [""])[0] if extra.get("organization") else ""
# )
# user.name = f"{user.first_name} {user.last_name}".strip()
# if user.name == "":
# user.name = "N/A"
# user.save()
# email_domain = user.email.split("@")[-1]
# tenant = (
# SAMLConfiguration.objects.using(MainRouter.admin_db)
# .get(email_domain=email_domain)
# .tenant
# )
# role_name = (
# extra.get("userType", ["saml_default_role"])[0].strip()
# if extra.get("userType")
# else "saml_default_role"
# )
# try:
# role = Role.objects.using(MainRouter.admin_db).get(
# name=role_name, tenant=tenant
# )
# except Role.DoesNotExist:
# role = Role.objects.using(MainRouter.admin_db).create(
# name=role_name,
# tenant=tenant,
# manage_users=False,
# manage_account=False,
# manage_billing=False,
# manage_providers=False,
# manage_integrations=False,
# manage_scans=False,
# unlimited_visibility=False,
# )
# UserRoleRelationship.objects.using(MainRouter.admin_db).filter(
# user=user,
# tenant_id=tenant.id,
# ).delete()
# UserRoleRelationship.objects.using(MainRouter.admin_db).create(
# user=user,
# role=role,
# tenant_id=tenant.id,
# )
# membership, _ = Membership.objects.using(MainRouter.admin_db).get_or_create(
# user=user,
# tenant=tenant,
# defaults={
# "user": user,
# "tenant": tenant,
# "role": Membership.RoleChoices.MEMBER,
# },
# )
# serializer = TokenSocialLoginSerializer(data={"email": user.email})
# serializer.is_valid(raise_exception=True)
# token_data = serializer.validated_data
# saml_token = SAMLToken.objects.using(MainRouter.admin_db).create(
# token=token_data, user=user
# )
# callback_url = env.str("SAML_SSO_CALLBACK_URL")
# redirect_url = f"{callback_url}?id={saml_token.id}"
# return redirect(redirect_url)
@extend_schema_view(
list=extend_schema(
tags=["User"],
@@ -1338,7 +1086,7 @@ class ProviderViewSet(BaseRLSViewSet):
task = check_provider_connection_task.delay(
provider_id=pk, tenant_id=self.request.tenant_id
)
prowler_task = Task.objects.get(id=task.id)
prowler_task = Task.objects.get_with_retry(id=task.id)
serializer = TaskSerializer(prowler_task)
return Response(
data=serializer.data,
@@ -1361,7 +1109,7 @@ class ProviderViewSet(BaseRLSViewSet):
task = delete_provider_task.delay(
provider_id=pk, tenant_id=self.request.tenant_id
)
prowler_task = Task.objects.get(id=task.id)
prowler_task = Task.objects.get_with_retry(id=task.id)
serializer = TaskSerializer(prowler_task)
return Response(
data=serializer.data,
@@ -1741,7 +1489,7 @@ class ScanViewSet(BaseRLSViewSet):
},
)
prowler_task = Task.objects.get(id=task.id)
prowler_task = Task.objects.get_with_retry(id=task.id)
scan.task_id = task.id
scan.save(update_fields=["task_id"])
@@ -2161,8 +1909,6 @@ class FindingViewSet(PaginateByPkMixin, BaseRLSViewSet):
)
resource_types = list(
queryset.values_list("resource_type", flat=True)
.exclude(resource_type__isnull=True)
.exclude(resource_type__exact="")
.distinct()
.order_by("resource_type")
)
@@ -2264,8 +2010,6 @@ class FindingViewSet(PaginateByPkMixin, BaseRLSViewSet):
)
resource_types = list(
queryset.values_list("resource_type", flat=True)
.exclude(resource_type__isnull=True)
.exclude(resource_type__exact="")
.distinct()
.order_by("resource_type")
)
@@ -2647,7 +2391,8 @@ class RoleProviderGroupRelationshipView(RelationshipView, BaseRLSViewSet):
list=extend_schema(
tags=["Compliance Overview"],
summary="List compliance overviews for a scan",
description="Retrieve an overview of all the compliance in a given scan.",
description="Retrieve an overview of all the compliance in a given scan. If no region filters are provided, the"
" region with the most fails will be returned by default.",
parameters=[
OpenApiParameter(
name="filter[scan_id]",
@@ -2657,18 +2402,12 @@ class RoleProviderGroupRelationshipView(RelationshipView, BaseRLSViewSet):
description="Related scan ID.",
),
],
responses={
200: OpenApiResponse(
description="Compliance overviews obtained successfully",
response=ComplianceOverviewSerializer(many=True),
),
202: OpenApiResponse(
description="The task is in progress", response=TaskSerializer
),
500: OpenApiResponse(
description="Compliance overviews generation task failed"
),
},
),
retrieve=extend_schema(
tags=["Compliance Overview"],
summary="Retrieve data from a specific compliance overview",
description="Fetch detailed information about a specific compliance overview by its ID, including detailed "
"requirement information and check's status.",
),
metadata=extend_schema(
tags=["Compliance Overview"],
@@ -2684,84 +2423,19 @@ class RoleProviderGroupRelationshipView(RelationshipView, BaseRLSViewSet):
description="Related scan ID.",
),
],
responses={
200: OpenApiResponse(
description="Compliance overviews metadata obtained successfully",
response=ComplianceOverviewMetadataSerializer,
),
202: OpenApiResponse(description="The task is in progress"),
500: OpenApiResponse(
description="Compliance overviews generation task failed"
),
},
),
requirements=extend_schema(
tags=["Compliance Overview"],
summary="List compliance requirements overview for a scan",
description="Retrieve a detailed overview of compliance requirements in a given scan, grouped by compliance "
"framework. This endpoint provides requirement-level details and aggregates status across regions.",
parameters=[
OpenApiParameter(
name="filter[scan_id]",
required=True,
type=OpenApiTypes.UUID,
location=OpenApiParameter.QUERY,
description="Related scan ID.",
),
OpenApiParameter(
name="filter[compliance_id]",
required=True,
type=OpenApiTypes.STR,
location=OpenApiParameter.QUERY,
description="Compliance ID.",
),
],
responses={
200: OpenApiResponse(
description="Compliance requirement details obtained successfully",
response=ComplianceOverviewDetailSerializer(many=True),
),
202: OpenApiResponse(description="The task is in progress"),
500: OpenApiResponse(
description="Compliance overviews generation task failed"
),
},
filters=True,
),
attributes=extend_schema(
tags=["Compliance Overview"],
summary="Get compliance requirement attributes",
description="Retrieve detailed attribute information for all requirements in a specific compliance framework "
"along with the associated check IDs for each requirement.",
parameters=[
OpenApiParameter(
name="filter[compliance_id]",
required=True,
type=str,
location=OpenApiParameter.QUERY,
description="Compliance framework ID to get attributes for.",
),
],
responses={
200: OpenApiResponse(
description="Compliance attributes obtained successfully",
response=ComplianceOverviewAttributesSerializer(many=True),
),
},
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="requirements")
@method_decorator(CACHE_DECORATOR, name="attributes")
class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
@method_decorator(CACHE_DECORATOR, name="retrieve")
class ComplianceOverviewViewSet(BaseRLSViewSet):
pagination_class = ComplianceOverviewPagination
queryset = ComplianceRequirementOverview.objects.all()
queryset = ComplianceOverview.objects.all()
serializer_class = ComplianceOverviewSerializer
filterset_class = ComplianceOverviewFilter
http_method_names = ["get"]
search_fields = ["compliance_id"]
ordering = ["compliance_id"]
ordering_fields = ["compliance_id"]
ordering_fields = ["inserted_at", "compliance_id", "framework", "region"]
# RBAC required permissions (implicit -> MANAGE_PROVIDERS enable unlimited visibility or check the visibility of
# the provider through the provider group)
required_permissions = []
@@ -2772,44 +2446,51 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
role, Permissions.UNLIMITED_VISIBILITY.value, False
)
if unlimited_visibility:
base_queryset = self.filter_queryset(
ComplianceRequirementOverview.objects.filter(
if self.action == "retrieve":
if unlimited_visibility:
# User has unlimited visibility, return all compliance
return ComplianceOverview.objects.filter(
tenant_id=self.request.tenant_id
)
providers = get_providers(role)
return ComplianceOverview.objects.filter(
tenant_id=self.request.tenant_id, scan__provider__in=providers
)
if unlimited_visibility:
base_queryset = self.filter_queryset(
ComplianceOverview.objects.filter(tenant_id=self.request.tenant_id)
)
else:
providers = Provider.objects.filter(
provider_groups__in=role.provider_groups.all()
).distinct()
base_queryset = self.filter_queryset(
ComplianceRequirementOverview.objects.filter(
ComplianceOverview.objects.filter(
tenant_id=self.request.tenant_id, scan__provider__in=providers
)
)
return base_queryset
max_failed_ids = (
base_queryset.filter(compliance_id=OuterRef("compliance_id"))
.order_by("-requirements_failed")
.values("id")[:1]
)
return base_queryset.filter(id__in=Subquery(max_failed_ids)).order_by(
"compliance_id"
)
def get_serializer_class(self):
if hasattr(self, "response_serializer_class"):
return self.response_serializer_class
elif self.action == "list":
return ComplianceOverviewSerializer
if self.action == "retrieve":
return ComplianceOverviewFullSerializer
elif self.action == "metadata":
return ComplianceOverviewMetadataSerializer
elif self.action == "attributes":
return ComplianceOverviewAttributesSerializer
elif self.action == "requirements":
return ComplianceOverviewDetailSerializer
return super().get_serializer_class()
@extend_schema(exclude=True)
def retrieve(self, request, *args, **kwargs):
raise MethodNotAllowed(method="GET")
def list(self, request, *args, **kwargs):
scan_id = request.query_params.get("filter[scan_id]")
if not scan_id:
if not request.query_params.get("filter[scan_id]"):
raise ValidationError(
[
{
@@ -2820,82 +2501,7 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
}
]
)
try:
if task := self.get_task_response_if_running(
task_name="scan-compliance-overviews",
task_kwargs={"tenant_id": self.request.tenant_id, "scan_id": scan_id},
raise_on_not_found=False,
):
return task
except TaskFailedException:
return Response(
{"detail": "Task failed to generate compliance overview data."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
queryset = self.filter_queryset(self.filter_queryset(self.get_queryset()))
requirement_status_subquery = queryset.values(
"compliance_id", "requirement_id"
).annotate(
fail_count=Count("id", filter=Q(requirement_status="FAIL")),
pass_count=Count("id", filter=Q(requirement_status="PASS")),
total_count=Count("id"),
)
compliance_data = {}
framework_info = {}
for item in queryset.values("compliance_id", "framework", "version").distinct():
framework_info[item["compliance_id"]] = {
"framework": item["framework"],
"version": item["version"],
}
for item in requirement_status_subquery:
compliance_id = item["compliance_id"]
if item["fail_count"] > 0:
req_status = "FAIL"
elif item["pass_count"] == item["total_count"]:
req_status = "PASS"
else:
req_status = "MANUAL"
if compliance_id not in compliance_data:
compliance_data[compliance_id] = {
"total_requirements": 0,
"requirements_passed": 0,
"requirements_failed": 0,
"requirements_manual": 0,
}
compliance_data[compliance_id]["total_requirements"] += 1
if req_status == "PASS":
compliance_data[compliance_id]["requirements_passed"] += 1
elif req_status == "FAIL":
compliance_data[compliance_id]["requirements_failed"] += 1
else:
compliance_data[compliance_id]["requirements_manual"] += 1
response_data = []
for compliance_id, data in compliance_data.items():
framework = framework_info.get(compliance_id, {})
response_data.append(
{
"id": compliance_id,
"compliance_id": compliance_id,
"framework": framework.get("framework", ""),
"version": framework.get("version", ""),
"requirements_passed": data["requirements_passed"],
"requirements_failed": data["requirements_failed"],
"requirements_manual": data["requirements_manual"],
"total_requirements": data["total_requirements"],
}
)
serializer = self.get_serializer(response_data, many=True)
return Response(serializer.data)
return super().list(request, *args, **kwargs)
@action(detail=False, methods=["get"], url_name="metadata")
def metadata(self, request):
@@ -2911,21 +2517,11 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
}
]
)
try:
if task := self.get_task_response_if_running(
task_name="scan-compliance-overviews",
task_kwargs={"tenant_id": self.request.tenant_id, "scan_id": scan_id},
raise_on_not_found=False,
):
return task
except TaskFailedException:
return Response(
{"detail": "Task failed to generate compliance overview data."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
tenant_id = self.request.tenant_id
regions = list(
self.get_queryset()
.filter(scan_id=scan_id)
ComplianceOverview.objects.filter(tenant_id=tenant_id, scan_id=scan_id)
.values_list("region", flat=True)
.order_by("region")
.distinct()
@@ -2936,178 +2532,6 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
serializer.is_valid(raise_exception=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@action(detail=False, methods=["get"], url_name="requirements")
def requirements(self, request):
scan_id = request.query_params.get("filter[scan_id]")
compliance_id = request.query_params.get("filter[compliance_id]")
if not scan_id:
raise ValidationError(
[
{
"detail": "This query parameter is required.",
"status": 400,
"source": {"pointer": "filter[scan_id]"},
"code": "required",
}
]
)
if not compliance_id:
raise ValidationError(
[
{
"detail": "This query parameter is required.",
"status": 400,
"source": {"pointer": "filter[compliance_id]"},
"code": "required",
}
]
)
try:
if task := self.get_task_response_if_running(
task_name="scan-compliance-overviews",
task_kwargs={"tenant_id": self.request.tenant_id, "scan_id": scan_id},
raise_on_not_found=False,
):
return task
except TaskFailedException:
return Response(
{"detail": "Task failed to generate compliance overview data."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
filtered_queryset = self.filter_queryset(self.get_queryset())
all_requirements = (
filtered_queryset.values(
"requirement_id", "framework", "version", "description"
)
.distinct()
.annotate(
total_instances=Count("id"),
manual_count=Count("id", filter=Q(requirement_status="MANUAL")),
)
)
passed_instances = (
filtered_queryset.filter(requirement_status="PASS")
.values("requirement_id")
.annotate(pass_count=Count("id"))
)
passed_counts = {
item["requirement_id"]: item["pass_count"] for item in passed_instances
}
requirements_summary = []
for requirement in all_requirements:
requirement_id = requirement["requirement_id"]
total_instances = requirement["total_instances"]
passed_count = passed_counts.get(requirement_id, 0)
is_manual = requirement["manual_count"] == total_instances
if is_manual:
requirement_status = "MANUAL"
elif passed_count == total_instances:
requirement_status = "PASS"
else:
requirement_status = "FAIL"
requirements_summary.append(
{
"id": requirement_id,
"framework": requirement["framework"],
"version": requirement["version"],
"description": requirement["description"],
"status": requirement_status,
}
)
serializer = self.get_serializer(requirements_summary, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@action(detail=False, methods=["get"], url_name="attributes")
def attributes(self, request):
compliance_id = request.query_params.get("filter[compliance_id]")
if not compliance_id:
raise ValidationError(
[
{
"detail": "This query parameter is required.",
"status": 400,
"source": {"pointer": "filter[compliance_id]"},
"code": "required",
}
]
)
provider_type = None
try:
sample_requirement = (
self.get_queryset().filter(compliance_id=compliance_id).first()
)
if sample_requirement:
provider_type = sample_requirement.scan.provider.provider
except Exception:
pass
# If we couldn't determine from database, try each provider type
if not provider_type:
for pt in Provider.ProviderChoices.values:
if compliance_id in PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE.get(pt, {}):
provider_type = pt
break
if not provider_type:
raise NotFound(detail=f"Compliance framework '{compliance_id}' not found.")
compliance_template = PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE.get(
provider_type, {}
)
compliance_framework = compliance_template.get(compliance_id)
if not compliance_framework:
raise NotFound(detail=f"Compliance framework '{compliance_id}' not found.")
attribute_data = []
for requirement_id, requirement in compliance_framework.get(
"requirements", {}
).items():
check_ids = list(requirement.get("checks", {}).keys())
metadata = requirement.get("attributes", [])
base_attributes = {
"metadata": metadata,
"check_ids": check_ids,
}
# Add technique details for MITRE-ATTACK framework
if "mitre_attack" in compliance_id:
base_attributes["technique_details"] = {
"tactics": requirement.get("tactics", []),
"subtechniques": requirement.get("subtechniques", []),
"platforms": requirement.get("platforms", []),
"technique_url": requirement.get("technique_url", ""),
}
attribute_data.append(
{
"id": requirement_id,
"framework_description": compliance_framework.get(
"description", ""
),
"name": requirement.get("name", ""),
"framework": compliance_framework.get("framework", ""),
"version": compliance_framework.get("version", ""),
"description": requirement.get("description", ""),
"attributes": base_attributes,
}
)
serializer = self.get_serializer(attribute_data, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@extend_schema(tags=["Overview"])
@extend_schema_view(
@@ -3154,7 +2578,7 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
class OverviewViewSet(BaseRLSViewSet):
queryset = ComplianceOverview.objects.all()
http_method_names = ["get"]
ordering = ["-inserted_at"]
ordering = ["-id"]
# RBAC required permissions (implicit -> MANAGE_PROVIDERS enable unlimited visibility or check the visibility of
# the provider through the provider group)
required_permissions = []
@@ -3399,7 +2823,7 @@ class ScheduleViewSet(BaseRLSViewSet):
with transaction.atomic():
task = schedule_provider_scan(provider_instance)
prowler_task = Task.objects.get(id=task.id)
prowler_task = Task.objects.get_with_retry(id=task.id)
self.response_serializer_class = TaskSerializer
output_serializer = self.get_serializer(prowler_task)
@@ -3476,80 +2900,3 @@ class IntegrationViewSet(BaseRLSViewSet):
context = super().get_serializer_context()
context["allowed_providers"] = self.allowed_providers
return context
@extend_schema_view(
list=extend_schema(
tags=["Lighthouse"],
summary="List all Lighthouse configurations",
description="Retrieve a list of all Lighthouse configurations.",
),
create=extend_schema(
tags=["Lighthouse"],
summary="Create a new Lighthouse configuration",
description="Create a new Lighthouse configuration with the specified details.",
),
partial_update=extend_schema(
tags=["Lighthouse"],
summary="Partially update a Lighthouse configuration",
description="Update certain fields of an existing Lighthouse configuration.",
),
destroy=extend_schema(
tags=["Lighthouse"],
summary="Delete a Lighthouse configuration",
description="Remove a Lighthouse configuration by its ID.",
),
connection=extend_schema(
tags=["Lighthouse"],
summary="Check the connection to the OpenAI API",
description="Verify the connection to the OpenAI API for a specific Lighthouse configuration.",
request=None,
responses={202: OpenApiResponse(response=TaskSerializer)},
),
)
class LighthouseConfigViewSet(BaseRLSViewSet):
"""
API endpoint for managing Lighthouse configuration.
"""
serializer_class = LighthouseConfigSerializer
ordering_fields = ["name", "inserted_at", "updated_at", "is_active"]
ordering = ["-inserted_at"]
def get_queryset(self):
return LighthouseConfiguration.objects.filter(tenant_id=self.request.tenant_id)
def get_serializer_class(self):
if self.action == "create":
return LighthouseConfigCreateSerializer
elif self.action == "partial_update":
return LighthouseConfigUpdateSerializer
elif self.action == "connection":
return TaskSerializer
return super().get_serializer_class()
@extend_schema(exclude=True)
def retrieve(self, request, *args, **kwargs):
raise MethodNotAllowed(method="GET")
@action(detail=True, methods=["post"], url_name="connection")
def connection(self, request, pk=None):
"""
Check the connection to the OpenAI API asynchronously.
"""
instance = self.get_object()
with transaction.atomic():
task = check_lighthouse_connection_task.delay(
lighthouse_config_id=str(instance.id), tenant_id=self.request.tenant_id
)
prowler_task = Task.objects.get(id=task.id)
serializer = TaskSerializer(prowler_task)
return Response(
data=serializer.data,
status=status.HTTP_202_ACCEPTED,
headers={
"Content-Location": reverse(
"task-detail", kwargs={"pk": prowler_task.id}
)
},
)
-7
View File
@@ -1,13 +1,6 @@
import warnings
from celery import Celery, Task
from config.env import env
# Suppress specific warnings from django-rest-auth: https://github.com/iMerica/dj-rest-auth/issues/684
warnings.filterwarnings(
"ignore", category=UserWarning, module="dj_rest_auth.registration.serializers"
)
BROKER_VISIBILITY_TIMEOUT = env.int("DJANGO_BROKER_VISIBILITY_TIMEOUT", default=86400)
celery_app = Celery("tasks")
-4
View File
@@ -10,7 +10,6 @@ from config.settings.social_login import * # noqa
SECRET_KEY = env("SECRET_KEY", default="secret")
DEBUG = env.bool("DJANGO_DEBUG", default=False)
ALLOWED_HOSTS = ["localhost", "127.0.0.1"]
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
# Application definition
@@ -27,19 +26,16 @@ INSTALLED_APPS = [
"rest_framework",
"corsheaders",
"drf_spectacular",
"drf_spectacular_jsonapi",
"django_guid",
"rest_framework_json_api",
"django_celery_results",
"django_celery_beat",
"rest_framework_simplejwt.token_blacklist",
"allauth",
"django.contrib.sites",
"allauth.account",
"allauth.socialaccount",
"allauth.socialaccount.providers.google",
"allauth.socialaccount.providers.github",
"allauth.socialaccount.providers.saml",
"dj_rest_auth.registration",
"rest_framework.authtoken",
]
+3 -10
View File
@@ -79,16 +79,9 @@ def before_send(event, hint):
log_msg = hint["log_record"].msg
log_lvl = hint["log_record"].levelno
# Handle Error and Critical events and discard the rest
if log_lvl <= 40 and any(ignored in log_msg for ignored in IGNORED_EXCEPTIONS):
return None # Explicitly return None to drop the event
# Ignore exceptions with the ignored_exceptions
if "exc_info" in hint and hint["exc_info"]:
exc_value = str(hint["exc_info"][1])
if any(ignored in exc_value for ignored in IGNORED_EXCEPTIONS):
return None # Explicitly return None to drop the event
# Handle Error events and discard the rest
if log_lvl == 40 and any(ignored in log_msg for ignored in IGNORED_EXCEPTIONS):
return
return event
@@ -11,7 +11,8 @@ GITHUB_OAUTH_CALLBACK_URL = env("SOCIAL_GITHUB_OAUTH_CALLBACK_URL", default="")
# Allauth settings
ACCOUNT_LOGIN_METHODS = {"email"} # Use Email / Password authentication
ACCOUNT_SIGNUP_FIELDS = ["email*", "password1*", "password2*"]
ACCOUNT_USERNAME_REQUIRED = False
ACCOUNT_EMAIL_REQUIRED = True
ACCOUNT_EMAIL_VERIFICATION = "none" # Do not require email confirmation
ACCOUNT_USER_MODEL_USERNAME_FIELD = None
REST_AUTH = {
@@ -24,11 +25,6 @@ SOCIALACCOUNT_EMAIL_AUTHENTICATION = True
# Connect local account and social account if local account with that email address already exists
SOCIALACCOUNT_EMAIL_AUTHENTICATION_AUTO_CONNECT = True
SOCIALACCOUNT_ADAPTER = "api.adapters.ProwlerSocialAccountAdapter"
# SAML keys (TODO: Validate certificates)
# SAML_PUBLIC_CERT = env("SAML_PUBLIC_CERT", default="")
# SAML_PRIVATE_KEY = env("SAML_PRIVATE_KEY", default="")
SOCIALACCOUNT_PROVIDERS = {
"google": {
"APP": {
@@ -54,23 +50,4 @@ SOCIALACCOUNT_PROVIDERS = {
"read:org",
],
},
"saml": {
"use_nameid_for_email": True,
"sp": {
"entity_id": "urn:prowler.com:sp",
},
"advanced": {
# "x509cert": SAML_PUBLIC_CERT,
# "private_key": SAML_PRIVATE_KEY,
# "authn_request_signed": True,
# "want_assertion_signed": True,
# "want_message_signed": True,
"name_id_format": "urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddress",
"authn_request_signed": False,
"logout_request_signed": False,
"logout_response_signed": False,
"want_assertion_encrypted": False,
"want_name_id_encrypted": False,
},
},
}
+1 -207
View File
@@ -15,12 +15,10 @@ from tasks.jobs.backfill import backfill_resource_scan_summaries
from api.db_utils import rls_transaction
from api.models import (
ComplianceOverview,
ComplianceRequirementOverview,
Finding,
Integration,
IntegrationProviderRelationship,
Invitation,
LighthouseConfiguration,
Membership,
Provider,
ProviderGroup,
@@ -31,7 +29,6 @@ from api.models import (
Scan,
ScanSummary,
StateChoices,
StatusChoices,
Task,
User,
UserRoleRelationship,
@@ -378,14 +375,8 @@ def providers_fixture(tenants_fixture):
tenant_id=tenant.id,
scanner_args={"key1": "value1", "key2": {"key21": "value21"}},
)
provider6 = Provider.objects.create(
provider="m365",
uid="m365.test.com",
alias="m365_testing",
tenant_id=tenant.id,
)
return provider1, provider2, provider3, provider4, provider5, provider6
return provider1, provider2, provider3, provider4, provider5
@pytest.fixture
@@ -786,131 +777,6 @@ def compliance_overviews_fixture(scans_fixture, tenants_fixture):
return compliance_overview1, compliance_overview2
@pytest.fixture
def compliance_requirements_overviews_fixture(scans_fixture, tenants_fixture):
"""Fixture for ComplianceRequirementOverview objects used by the new ComplianceOverviewViewSet."""
tenant = tenants_fixture[0]
scan1, scan2, scan3 = scans_fixture
# Create ComplianceRequirementOverview objects for scan1
requirement_overview1 = ComplianceRequirementOverview.objects.create(
tenant=tenant,
scan=scan1,
compliance_id="aws_account_security_onboarding_aws",
framework="AWS-Account-Security-Onboarding",
version="1.0",
description="Description for AWS Account Security Onboarding",
region="eu-west-1",
requirement_id="requirement1",
requirement_status=StatusChoices.PASS,
passed_checks=2,
failed_checks=0,
total_checks=2,
)
requirement_overview2 = ComplianceRequirementOverview.objects.create(
tenant=tenant,
scan=scan1,
compliance_id="aws_account_security_onboarding_aws",
framework="AWS-Account-Security-Onboarding",
version="1.0",
description="Description for AWS Account Security Onboarding",
region="eu-west-1",
requirement_id="requirement2",
requirement_status=StatusChoices.PASS,
passed_checks=2,
failed_checks=0,
total_checks=2,
)
requirement_overview3 = ComplianceRequirementOverview.objects.create(
tenant=tenant,
scan=scan1,
compliance_id="aws_account_security_onboarding_aws",
framework="AWS-Account-Security-Onboarding",
version="1.0",
description="Description for AWS Account Security Onboarding",
region="eu-west-2",
requirement_id="requirement1",
requirement_status=StatusChoices.PASS,
passed_checks=2,
failed_checks=0,
total_checks=2,
)
requirement_overview4 = ComplianceRequirementOverview.objects.create(
tenant=tenant,
scan=scan1,
compliance_id="aws_account_security_onboarding_aws",
framework="AWS-Account-Security-Onboarding",
version="1.0",
description="Description for AWS Account Security Onboarding",
region="eu-west-2",
requirement_id="requirement2",
requirement_status=StatusChoices.FAIL,
passed_checks=1,
failed_checks=1,
total_checks=2,
)
requirement_overview5 = ComplianceRequirementOverview.objects.create(
tenant=tenant,
scan=scan1,
compliance_id="aws_account_security_onboarding_aws",
framework="AWS-Account-Security-Onboarding",
version="1.0",
description="Description for AWS Account Security Onboarding (MANUAL)",
region="eu-west-2",
requirement_id="requirement3",
requirement_status=StatusChoices.MANUAL,
passed_checks=0,
failed_checks=0,
total_checks=0,
)
# Create a different compliance framework for testing
requirement_overview6 = ComplianceRequirementOverview.objects.create(
tenant=tenant,
scan=scan1,
compliance_id="cis_1.4_aws",
framework="CIS-1.4-AWS",
version="1.4",
description="CIS AWS Foundations Benchmark v1.4.0",
region="eu-west-1",
requirement_id="cis_requirement1",
requirement_status=StatusChoices.FAIL,
passed_checks=0,
failed_checks=3,
total_checks=3,
)
# Create another compliance framework for testing MITRE ATT&CK
requirement_overview7 = ComplianceRequirementOverview.objects.create(
tenant=tenant,
scan=scan1,
compliance_id="mitre_attack_aws",
framework="MITRE-ATTACK",
version="1.0",
description="MITRE ATT&CK",
region="eu-west-1",
requirement_id="mitre_requirement1",
requirement_status=StatusChoices.FAIL,
passed_checks=0,
failed_checks=0,
total_checks=0,
)
return (
requirement_overview1,
requirement_overview2,
requirement_overview3,
requirement_overview4,
requirement_overview5,
requirement_overview6,
requirement_overview7,
)
def get_api_tokens(
api_client, user_email: str, user_password: str, tenant_id: str = None
) -> tuple[str, str]:
@@ -1063,20 +929,6 @@ def backfill_scan_metadata_fixture(scans_fixture, findings_fixture):
backfill_resource_scan_summaries(tenant_id=tenant_id, scan_id=scan_id)
@pytest.fixture
def lighthouse_config_fixture(authenticated_client, tenants_fixture):
return LighthouseConfiguration.objects.create(
tenant_id=tenants_fixture[0].id,
name="OpenAI",
api_key_decoded="sk-test1234567890T3BlbkFJtest1234567890",
model="gpt-4o",
temperature=0,
max_tokens=4000,
business_context="Test business context",
is_active=True,
)
@pytest.fixture(scope="function")
def latest_scan_finding(authenticated_client, providers_fixture, resources_fixture):
provider = providers_fixture[0]
@@ -1118,64 +970,6 @@ def latest_scan_finding(authenticated_client, providers_fixture, resources_fixtu
return finding
# @pytest.fixture
# def saml_setup(tenants_fixture):
# tenant_id = tenants_fixture[0].id
# domain = "example.com"
# SAMLDomainIndex.objects.create(email_domain=domain, tenant_id=tenant_id)
# metadata_xml = """<?xml version='1.0' encoding='UTF-8'?>
# <md:EntityDescriptor entityID='TEST' xmlns:md='urn:oasis:names:tc:SAML:2.0:metadata'>
# <md:IDPSSODescriptor WantAuthnRequestsSigned='false' protocolSupportEnumeration='urn:oasis:names:tc:SAML:2.0:protocol'>
# <md:KeyDescriptor use='signing'>
# <ds:KeyInfo xmlns:ds='http://www.w3.org/2000/09/xmldsig#'>
# <ds:X509Data>
# <ds:X509Certificate>TEST</ds:X509Certificate>
# </ds:X509Data>
# </ds:KeyInfo>
# </md:KeyDescriptor>
# <md:NameIDFormat>urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddress</md:NameIDFormat>
# <md:SingleSignOnService Binding='urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST' Location='https://TEST/sso/saml'/>
# <md:SingleSignOnService Binding='urn:oasis:names:tc:SAML:2.0:bindings:HTTP-Redirect' Location='https://TEST/sso/saml'/>
# </md:IDPSSODescriptor>
# </md:EntityDescriptor>
# """
# SAMLConfiguration.objects.create(
# tenant_id=str(tenant_id),
# email_domain=domain,
# metadata_xml=metadata_xml,
# )
# return {
# "email": f"user@{domain}",
# "domain": domain,
# "tenant_id": tenant_id,
# }
# @pytest.fixture
# def saml_sociallogin(users_fixture):
# user = users_fixture[0]
# user.email = "samlsso@acme.com"
# extra_data = {
# "firstName": ["Test"],
# "lastName": ["User"],
# "organization": ["Prowler"],
# "userType": ["member"],
# }
# account = MagicMock()
# account.provider = "saml"
# account.extra_data = extra_data
# sociallogin = MagicMock(spec=SocialLogin)
# sociallogin.account = account
# sociallogin.user = user
# return sociallogin
def get_authorization_header(access_token: str) -> dict:
return {"Authorization": f"Bearer {access_token}"}
-6
View File
@@ -3,12 +3,6 @@
import os
import sys
import warnings
# Suppress specific warnings from django-rest-auth: https://github.com/iMerica/dj-rest-auth/issues/684
warnings.filterwarnings(
"ignore", category=UserWarning, module="dj_rest_auth.registration.serializers"
)
def main():
+1 -45
View File
@@ -1,9 +1,8 @@
from datetime import datetime, timezone
import openai
from celery.utils.log import get_task_logger
from api.models import LighthouseConfiguration, Provider
from api.models import Provider
from api.utils import prowler_provider_connection_test
logger = get_task_logger(__name__)
@@ -40,46 +39,3 @@ def check_provider_connection(provider_id: str):
connection_error = f"{connection_result.error}" if connection_result.error else None
return {"connected": connection_result.is_connected, "error": connection_error}
def check_lighthouse_connection(lighthouse_config_id: str):
"""
Business logic to check the connection status of a Lighthouse configuration.
Args:
lighthouse_config_id (str): The primary key of the LighthouseConfiguration instance to check.
Returns:
dict: A dictionary containing:
- 'connected' (bool): Indicates whether the connection is successful.
- 'error' (str or None): The error message if the connection failed, otherwise `None`.
- 'available_models' (list): List of available models if connection is successful.
Raises:
Model.DoesNotExist: If the lighthouse configuration does not exist.
"""
lighthouse_config = LighthouseConfiguration.objects.get(pk=lighthouse_config_id)
if not lighthouse_config.api_key_decoded:
lighthouse_config.is_active = False
lighthouse_config.save()
return {
"connected": False,
"error": "API key is invalid or missing.",
"available_models": [],
}
try:
client = openai.OpenAI(api_key=lighthouse_config.api_key_decoded)
models = client.models.list()
lighthouse_config.is_active = True
lighthouse_config.save()
return {
"connected": True,
"error": None,
"available_models": [model.id for model in models.data],
}
except Exception as e:
lighthouse_config.is_active = False
lighthouse_config.save()
return {"connected": False, "error": str(e), "available_models": []}
+2 -8
View File
@@ -1,5 +1,4 @@
import os
import re
import zipfile
import boto3
@@ -31,7 +30,6 @@ from prowler.lib.outputs.compliance.iso27001.iso27001_gcp import GCPISO27001
from prowler.lib.outputs.compliance.iso27001.iso27001_kubernetes import (
KubernetesISO27001,
)
from prowler.lib.outputs.compliance.iso27001.iso27001_m365 import M365ISO27001
from prowler.lib.outputs.compliance.kisa_ismsp.kisa_ismsp_aws import AWSKISAISMSP
from prowler.lib.outputs.compliance.mitre_attack.mitre_attack_aws import AWSMitreAttack
from prowler.lib.outputs.compliance.mitre_attack.mitre_attack_azure import (
@@ -91,7 +89,6 @@ COMPLIANCE_CLASS_MAP = {
"m365": [
(lambda name: name.startswith("cis_"), M365CIS),
(lambda name: name == "prowler_threatscore_m365", ProwlerThreatScoreM365),
(lambda name: name.startswith("iso27001_"), M365ISO27001),
],
}
@@ -241,18 +238,15 @@ def _generate_output_directory(
'/tmp/tenant-1234/aws/scan-5678/prowler-output-2023-02-15T12:34:56',
'/tmp/tenant-1234/aws/scan-5678/compliance/prowler-output-2023-02-15T12:34:56'
"""
# Sanitize the prowler provider name to ensure it is a valid directory name
prowler_provider_sanitized = re.sub(r"[^\w\-]", "-", prowler_provider)
path = (
f"{output_directory}/{tenant_id}/{scan_id}/prowler-output-"
f"{prowler_provider_sanitized}-{output_file_timestamp}"
f"{prowler_provider}-{output_file_timestamp}"
)
os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
compliance_path = (
f"{output_directory}/{tenant_id}/{scan_id}/compliance/prowler-output-"
f"{prowler_provider_sanitized}-{output_file_timestamp}"
f"{prowler_provider}-{output_file_timestamp}"
)
os.makedirs("/".join(compliance_path.split("/")[:-1]), exist_ok=True)
+72 -116
View File
@@ -13,9 +13,9 @@ from api.compliance import (
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE,
generate_scan_compliance,
)
from api.db_utils import create_objects_in_batches, rls_transaction
from api.db_utils import rls_transaction
from api.models import (
ComplianceRequirementOverview,
ComplianceOverview,
Finding,
Provider,
Resource,
@@ -26,7 +26,7 @@ from api.models import (
StateChoices,
)
from api.models import StatusChoices as FindingStatus
from api.utils import initialize_prowler_provider, return_prowler_provider
from api.utils import initialize_prowler_provider
from api.v1.serializers import ScanTaskSerializer
from prowler.lib.outputs.finding import Finding as ProwlerFinding
from prowler.lib.scan.scan import Scan as ProwlerScan
@@ -119,6 +119,7 @@ def perform_prowler_scan(
ValueError: If the provider cannot be connected.
"""
check_status_by_region = {}
exception = None
unique_resources = set()
scan_resource_cache: set[tuple[str, str, str, str]] = set()
@@ -149,8 +150,7 @@ def perform_prowler_scan(
provider_instance.save()
# If the provider is not connected, raise an exception outside the transaction.
# If raised within the transaction, the transaction will be rolled back and the provider will not be marked
# as not connected.
# If raised within the transaction, the transaction will be rolled back and the provider will not be marked as not connected.
if exc:
raise exc
@@ -293,6 +293,16 @@ def perform_prowler_scan(
)
finding_instance.add_resources([resource_instance])
# Update compliance data if applicable
if finding.status.value == "MUTED":
continue
region_dict = check_status_by_region.setdefault(finding.region, {})
current_status = region_dict.get(finding.check_id)
if current_status == "FAIL":
continue
region_dict[finding.check_id] = finding.status.value
# Update scan resource summaries
scan_resource_cache.add(
(
@@ -325,6 +335,63 @@ def perform_prowler_scan(
if exception is not None:
raise exception
try:
regions = prowler_provider.get_regions()
except AttributeError:
regions = set()
compliance_template = PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE[
provider_instance.provider
]
compliance_overview_by_region = {
region: deepcopy(compliance_template) for region in regions
}
for region, check_status in check_status_by_region.items():
compliance_data = compliance_overview_by_region.setdefault(
region, deepcopy(compliance_template)
)
for check_name, status in check_status.items():
generate_scan_compliance(
compliance_data,
provider_instance.provider,
check_name,
status,
)
# Prepare compliance overview objects
compliance_overview_objects = []
for region, compliance_data in compliance_overview_by_region.items():
for compliance_id, compliance in compliance_data.items():
compliance_overview_objects.append(
ComplianceOverview(
tenant_id=tenant_id,
scan=scan_instance,
region=region,
compliance_id=compliance_id,
framework=compliance["framework"],
version=compliance["version"],
description=compliance["description"],
requirements=compliance["requirements"],
requirements_passed=compliance["requirements_status"]["passed"],
requirements_failed=compliance["requirements_status"]["failed"],
requirements_manual=compliance["requirements_status"]["manual"],
total_requirements=compliance["total_requirements"],
)
)
try:
with rls_transaction(tenant_id):
ComplianceOverview.objects.bulk_create(
compliance_overview_objects, batch_size=500
)
except Exception as overview_exception:
import sentry_sdk
sentry_sdk.capture_exception(overview_exception)
logger.error(
f"Error storing compliance overview for scan {scan_id}: {overview_exception}"
)
try:
resource_scan_summaries = [
ResourceScanSummary(
@@ -503,114 +570,3 @@ def aggregate_findings(tenant_id: str, scan_id: str):
for agg in aggregation
}
ScanSummary.objects.bulk_create(scan_aggregations, batch_size=3000)
def create_compliance_requirements(tenant_id: str, scan_id: str):
"""
Create detailed compliance requirement overview records for a scan.
This function processes the compliance data collected during a scan and creates
individual records for each compliance requirement in each region. These detailed
records provide a granular view of compliance status.
Args:
tenant_id (str): The ID of the tenant for which to create records.
scan_id (str): The ID of the scan for which to create records.
Returns:
dict: A dictionary containing the number of requirements created and the regions processed.
Raises:
ValidationError: If tenant_id is not a valid UUID.
"""
try:
with rls_transaction(tenant_id):
scan_instance = Scan.objects.get(pk=scan_id)
provider_instance = scan_instance.provider
prowler_provider = return_prowler_provider(provider_instance)
# Get check status data by region from findings
check_status_by_region = {}
with rls_transaction(tenant_id):
findings = Finding.objects.filter(scan_id=scan_id, muted=False)
for finding in findings:
# Get region from resources
for resource in finding.resources.all():
region = resource.region
region_dict = check_status_by_region.setdefault(region, {})
current_status = region_dict.get(finding.check_id)
if current_status == "FAIL":
continue
region_dict[finding.check_id] = finding.status
try:
# Try to get regions from provider
regions = prowler_provider.get_regions()
except (AttributeError, Exception):
# If not available, use regions from findings
regions = set(check_status_by_region.keys())
# Get compliance template for the provider
compliance_template = PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE[
provider_instance.provider
]
# Create compliance data by region
compliance_overview_by_region = {
region: deepcopy(compliance_template) for region in regions
}
# Apply check statuses to compliance data
for region, check_status in check_status_by_region.items():
compliance_data = compliance_overview_by_region.setdefault(
region, deepcopy(compliance_template)
)
for check_name, status in check_status.items():
generate_scan_compliance(
compliance_data,
provider_instance.provider,
check_name,
status,
)
# Prepare compliance requirement objects
compliance_requirement_objects = []
for region, compliance_data in compliance_overview_by_region.items():
for compliance_id, compliance in compliance_data.items():
# Create an overview record for each requirement within each compliance framework
for requirement_id, requirement in compliance["requirements"].items():
compliance_requirement_objects.append(
ComplianceRequirementOverview(
tenant_id=tenant_id,
scan=scan_instance,
region=region,
compliance_id=compliance_id,
framework=compliance["framework"],
version=compliance["version"],
requirement_id=requirement_id,
description=requirement["description"],
passed_checks=requirement["checks_status"]["pass"],
failed_checks=requirement["checks_status"]["fail"],
total_checks=requirement["checks_status"]["total"],
requirement_status=requirement["status"],
)
)
# Bulk create requirement records
create_objects_in_batches(
tenant_id, ComplianceRequirementOverview, compliance_requirement_objects
)
return {
"requirements_created": len(compliance_requirement_objects),
"regions_processed": list(regions),
"compliance_frameworks": (
list(compliance_overview_by_region.get(list(regions)[0], {}).keys())
if regions
else []
),
}
except Exception as e:
logger.error(f"Error creating compliance requirements for scan {scan_id}: {e}")
raise e
+2 -45
View File
@@ -8,7 +8,7 @@ from config.celery import RLSTask
from config.django.base import DJANGO_FINDINGS_BATCH_SIZE, DJANGO_TMP_OUTPUT_DIRECTORY
from django_celery_beat.models import PeriodicTask
from tasks.jobs.backfill import backfill_resource_scan_summaries
from tasks.jobs.connection import check_lighthouse_connection, check_provider_connection
from tasks.jobs.connection import check_provider_connection
from tasks.jobs.deletion import delete_provider, delete_tenant
from tasks.jobs.export import (
COMPLIANCE_CLASS_MAP,
@@ -17,11 +17,7 @@ from tasks.jobs.export import (
_generate_output_directory,
_upload_to_s3,
)
from tasks.jobs.scan import (
aggregate_findings,
create_compliance_requirements,
perform_prowler_scan,
)
from tasks.jobs.scan import aggregate_findings, perform_prowler_scan
from tasks.utils import batched, get_next_execution_datetime
from api.compliance import get_compliance_frameworks
@@ -105,7 +101,6 @@ def perform_scan_task(
chain(
perform_scan_summary_task.si(tenant_id, scan_id),
create_compliance_requirements_task.si(tenant_id=tenant_id, scan_id=scan_id),
generate_outputs.si(
scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
),
@@ -216,9 +211,6 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
chain(
perform_scan_summary_task.si(tenant_id, scan_instance.id),
create_compliance_requirements_task.si(
tenant_id=tenant_id, scan_id=str(scan_instance.id)
),
generate_outputs.si(
scan_id=str(scan_instance.id), provider_id=provider_id, tenant_id=tenant_id
),
@@ -379,38 +371,3 @@ def backfill_scan_resource_summaries_task(tenant_id: str, scan_id: str):
scan_id (str): The scan identifier.
"""
return backfill_resource_scan_summaries(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(base=RLSTask, name="scan-compliance-overviews")
def create_compliance_requirements_task(tenant_id: str, scan_id: str):
"""
Creates detailed compliance requirement records for a scan.
This task processes the compliance data collected during a scan and creates
individual records for each compliance requirement in each region. These detailed
records provide a granular view of compliance status.
Args:
tenant_id (str): The tenant ID for which to create records.
scan_id (str): The ID of the scan for which to create records.
"""
return create_compliance_requirements(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(base=RLSTask, name="lighthouse-connection-check")
@set_tenant
def check_lighthouse_connection_task(lighthouse_config_id: str, tenant_id: str = None):
"""
Task to check the connection status of a Lighthouse configuration.
Args:
lighthouse_config_id (str): The primary key of the LighthouseConfiguration instance to check.
tenant_id (str): The tenant ID for the task.
Returns:
dict: A dictionary containing:
- 'connected' (bool): Indicates whether the connection is successful.
- 'error' (str or None): The error message if the connection failed, otherwise `None`.
- 'available_models' (list): List of available models if connection is successful.
"""
return check_lighthouse_connection(lighthouse_config_id=lighthouse_config_id)
-19
View File
@@ -55,22 +55,3 @@ class TestScheduleProviderScan:
assert "There is already a scheduled scan for this provider." in str(
exc_info.value
)
def test_remove_periodic_task(self, providers_fixture):
provider_instance = providers_fixture[0]
assert Scan.objects.count() == 0
with patch("tasks.tasks.perform_scheduled_scan_task.apply_async"):
schedule_provider_scan(provider_instance)
assert Scan.objects.count() == 1
scan = Scan.objects.first()
periodic_task = scan.scheduler_task
assert periodic_task is not None
periodic_task.delete()
scan.refresh_from_db()
# Assert the scan still exists but its scheduler_task is set to None
# Otherwise, Scan.DoesNotExist would be raised
assert Scan.objects.get(id=scan.id).scheduler_task is None
+3 -60
View File
@@ -1,10 +1,10 @@
from datetime import datetime, timezone
from unittest.mock import MagicMock, patch
from unittest.mock import patch, MagicMock
import pytest
from tasks.jobs.connection import check_lighthouse_connection, check_provider_connection
from api.models import LighthouseConfiguration, Provider
from api.models import Provider
from tasks.jobs.connection import check_provider_connection
@pytest.mark.parametrize(
@@ -70,60 +70,3 @@ def test_check_provider_connection_exception(
mock_provider_instance.save.assert_called_once()
assert mock_provider_instance.connected is False
@pytest.mark.parametrize(
"lighthouse_data",
[
{
"name": "OpenAI",
"api_key_decoded": "sk-test1234567890T3BlbkFJtest1234567890",
"model": "gpt-4o",
"temperature": 0,
"max_tokens": 4000,
"business_context": "Test business context",
"is_active": True,
},
],
)
@patch("tasks.jobs.connection.openai.OpenAI")
@pytest.mark.django_db
def test_check_lighthouse_connection(
mock_openai_client, tenants_fixture, lighthouse_data
):
lighthouse_config = LighthouseConfiguration.objects.create(
**lighthouse_data, tenant_id=tenants_fixture[0].id
)
mock_models = MagicMock()
mock_models.data = [MagicMock(id="gpt-4o"), MagicMock(id="gpt-4o-mini")]
mock_openai_client.return_value.models.list.return_value = mock_models
result = check_lighthouse_connection(
lighthouse_config_id=str(lighthouse_config.id),
)
lighthouse_config.refresh_from_db()
mock_openai_client.assert_called_once_with(
api_key=lighthouse_data["api_key_decoded"]
)
assert lighthouse_config.is_active is True
assert result["connected"] is True
assert result["error"] is None
assert result["available_models"] == ["gpt-4o", "gpt-4o-mini"]
@patch("tasks.jobs.connection.LighthouseConfiguration.objects.get")
@pytest.mark.django_db
def test_check_lighthouse_connection_missing_api_key(mock_lighthouse_get):
mock_lighthouse_instance = MagicMock()
mock_lighthouse_instance.api_key_decoded = None
mock_lighthouse_get.return_value = mock_lighthouse_instance
result = check_lighthouse_connection("lighthouse_config_id")
assert result["connected"] is False
assert result["error"] == "API key is invalid or missing."
assert result["available_models"] == []
assert mock_lighthouse_instance.is_active is False
mock_lighthouse_instance.save.assert_called_once()
@@ -145,22 +145,3 @@ class TestOutputs:
assert path.endswith(f"{provider}-{output_file_timestamp}")
assert compliance.endswith(f"{provider}-{output_file_timestamp}")
def test_generate_output_directory_invalid_character(self, tmpdir):
from prowler.config.config import output_file_timestamp
base_tmp = Path(str(tmpdir.mkdir("generate_output")))
base_dir = str(base_tmp)
tenant_id = "t1"
scan_id = "s1"
provider = "aws/test@check"
path, compliance = _generate_output_directory(
base_dir, provider, tenant_id, scan_id
)
assert os.path.isdir(os.path.dirname(path))
assert os.path.isdir(os.path.dirname(compliance))
assert path.endswith(f"aws-test-check-{output_file_timestamp}")
assert compliance.endswith(f"aws-test-check-{output_file_timestamp}")
+8 -790
View File
@@ -7,13 +7,11 @@ import pytest
from tasks.jobs.scan import (
_create_finding_delta,
_store_resources,
create_compliance_requirements,
perform_prowler_scan,
)
from tasks.utils import CustomEncoder
from api.models import (
ComplianceRequirementOverview,
Finding,
Provider,
Resource,
@@ -237,7 +235,7 @@ class TestPerformScan:
):
tenant_id = uuid.uuid4()
provider_instance = MagicMock()
provider_instance.id = "provider123"
provider_instance.id = "provider456"
finding = MagicMock()
finding.resource_uid = "resource_uid_123"
@@ -252,16 +250,15 @@ class TestPerformScan:
resource_instance.region = finding.region
mock_get_or_create_resource.return_value = (resource_instance, True)
tag_instance = MagicMock()
mock_get_or_create_tag.return_value = (tag_instance, True)
resource, resource_uid_tuple = _store_resources(
finding, str(tenant_id), provider_instance
finding, tenant_id, provider_instance
)
mock_get_or_create_resource.assert_called_once_with(
tenant_id=str(tenant_id),
tenant_id=tenant_id,
provider=provider_instance,
uid=finding.resource_uid,
defaults={
@@ -308,11 +305,11 @@ class TestPerformScan:
mock_get_or_create_tag.return_value = (tag_instance, True)
resource, resource_uid_tuple = _store_resources(
finding, str(tenant_id), provider_instance
finding, tenant_id, provider_instance
)
mock_get_or_create_resource.assert_called_once_with(
tenant_id=str(tenant_id),
tenant_id=tenant_id,
provider=provider_instance,
uid=finding.resource_uid,
defaults={
@@ -366,14 +363,14 @@ class TestPerformScan:
]
resource, resource_uid_tuple = _store_resources(
finding, str(tenant_id), provider_instance
finding, tenant_id, provider_instance
)
mock_get_or_create_tag.assert_any_call(
tenant_id=str(tenant_id), key="tag1", value="value1"
tenant_id=tenant_id, key="tag1", value="value1"
)
mock_get_or_create_tag.assert_any_call(
tenant_id=str(tenant_id), key="tag2", value="value2"
tenant_id=tenant_id, key="tag2", value="value2"
)
resource_instance.upsert_or_delete_tags.assert_called_once()
tags_passed = resource_instance.upsert_or_delete_tags.call_args[1]["tags"]
@@ -385,782 +382,3 @@ class TestPerformScan:
# TODO Add tests for aggregations
@pytest.mark.django_db
class TestCreateComplianceRequirements:
def test_create_compliance_requirements_success(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
findings_fixture,
resources_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch("tasks.jobs.scan.return_prowler_provider") as mock_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = [
"us-east-1",
"us-west-2",
]
mock_prowler_provider.return_value = mock_prowler_provider_instance
mock_compliance_template.__getitem__.return_value = {
"cis_1.4_aws": {
"framework": "CIS AWS Foundations Benchmark",
"version": "1.4.0",
"requirements": {
"1.1": {
"description": "Ensure root access key does not exist",
"checks_status": {
"pass": 0,
"fail": 0,
"manual": 0,
"total": 1,
},
"status": "PASS",
},
"1.2": {
"description": "Ensure MFA is enabled for root account",
"checks_status": {
"pass": 0,
"fail": 1,
"manual": 0,
"total": 1,
},
"status": "FAIL",
},
},
},
"aws_account_security_onboarding_aws": {
"framework": "AWS Account Security Onboarding",
"version": "1.0",
"requirements": {
"requirement1": {
"description": "Basic security requirement",
"checks_status": {
"pass": 1,
"fail": 0,
"manual": 0,
"total": 1,
},
"status": "PASS",
},
},
},
}
mock_findings_filter.return_value = []
result = create_compliance_requirements(tenant_id, scan_id)
assert "requirements_created" in result
assert "regions_processed" in result
assert "compliance_frameworks" in result
assert result["regions_processed"] == ["us-east-1", "us-west-2"]
assert result["requirements_created"] == 6
assert len(result["compliance_frameworks"]) == 2
mock_create_objects.assert_called_once()
call_args = mock_create_objects.call_args[0]
assert call_args[0] == tenant_id
assert call_args[1] == ComplianceRequirementOverview
assert len(call_args[2]) == 6
compliance_objects = call_args[2]
for obj in compliance_objects:
assert isinstance(obj, ComplianceRequirementOverview)
assert obj.tenant.id == tenant.id
assert obj.scan == scan
assert obj.region in ["us-east-1", "us-west-2"]
assert obj.compliance_id in [
"cis_1.4_aws",
"aws_account_security_onboarding_aws",
]
def test_create_compliance_requirements_with_findings(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch("tasks.jobs.scan.return_prowler_provider") as mock_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_finding1 = MagicMock()
mock_finding1.check_id = "check1"
mock_finding1.status = "PASS"
mock_resource1 = MagicMock()
mock_resource1.region = "us-east-1"
mock_finding1.resources.all.return_value = [mock_resource1]
mock_finding2 = MagicMock()
mock_finding2.check_id = "check2"
mock_finding2.status = "FAIL"
mock_resource2 = MagicMock()
mock_resource2.region = "us-west-2"
mock_finding2.resources.all.return_value = [mock_resource2]
mock_findings_filter.return_value = [mock_finding1, mock_finding2]
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = [
"us-east-1",
"us-west-2",
]
mock_prowler_provider.return_value = mock_prowler_provider_instance
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 1,
"manual": 0,
"total": 3,
},
"status": "FAIL",
},
"req_2": {
"description": "Test Requirement 2",
"checks": {"check_2": None},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
},
},
}
}
result = create_compliance_requirements(tenant_id, scan_id)
mock_findings_filter.assert_called_once_with(scan_id=scan_id, muted=False)
assert mock_generate_compliance.call_count == 2
assert result["requirements_created"] == 4
assert set(result["regions_processed"]) == {"us-east-1", "us-west-2"}
def test_create_compliance_requirements_no_provider_regions(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch("tasks.jobs.scan.return_prowler_provider") as mock_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.KUBERNETES
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_finding = MagicMock()
mock_finding.check_id = "check1"
mock_finding.status = "PASS"
mock_resource = MagicMock()
mock_resource.region = "default"
mock_finding.resources.all.return_value = [mock_resource]
mock_findings_filter.return_value = [mock_finding]
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.side_effect = AttributeError(
"No get_regions method"
)
mock_prowler_provider.return_value = mock_prowler_provider_instance
mock_compliance_template.__getitem__.return_value = {
"kubernetes_cis": {
"framework": "CIS Kubernetes Benchmark",
"version": "1.6.0",
"requirements": {
"1.1": {
"description": "Test requirement",
"checks_status": {
"pass": 0,
"fail": 0,
"manual": 0,
"total": 1,
},
"status": "PASS",
},
},
},
}
result = create_compliance_requirements(tenant_id, scan_id)
assert result["regions_processed"] == ["default"]
def test_create_compliance_requirements_empty_findings(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch("tasks.jobs.scan.return_prowler_provider") as mock_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_findings_filter.return_value = []
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = ["us-east-1"]
mock_prowler_provider.return_value = mock_prowler_provider_instance
mock_compliance_template.__getitem__.return_value = {
"cis_1.4_aws": {
"framework": "CIS AWS Foundations Benchmark",
"version": "1.4.0",
"requirements": {
"1.1": {
"description": "Test requirement",
"checks_status": {
"pass": 0,
"fail": 0,
"manual": 0,
"total": 1,
},
"status": "PASS",
},
},
},
}
mock_findings_filter.return_value = []
result = create_compliance_requirements(tenant_id, scan_id)
assert result["regions_processed"] == ["us-east-1"]
assert result["requirements_created"] == 1
mock_generate_compliance.assert_not_called()
def test_create_compliance_requirements_error_handling(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch("tasks.jobs.scan.return_prowler_provider") as mock_prowler_provider,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_prowler_provider.side_effect = Exception(
"Provider initialization failed"
)
with pytest.raises(Exception, match="Provider initialization failed"):
create_compliance_requirements(tenant_id, scan_id)
def test_create_compliance_requirements_muted_findings_excluded(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch("tasks.jobs.scan.return_prowler_provider") as mock_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_findings_filter.return_value = []
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = ["us-east-1"]
mock_prowler_provider.return_value = mock_prowler_provider_instance
mock_compliance_template.__getitem__.return_value = {}
mock_findings_filter.return_value = []
create_compliance_requirements(tenant_id, scan_id)
mock_findings_filter.assert_called_once_with(scan_id=scan_id, muted=False)
def test_create_compliance_requirements_check_status_priority(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.return_prowler_provider"
) as mock_return_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_finding1 = MagicMock()
mock_finding1.check_id = "check1"
mock_finding1.status = "PASS"
mock_resource1 = MagicMock()
mock_resource1.region = "us-east-1"
mock_finding1.resources.all.return_value = [mock_resource1]
mock_finding2 = MagicMock()
mock_finding2.check_id = "check1"
mock_finding2.status = "FAIL"
mock_resource2 = MagicMock()
mock_resource2.region = "us-east-1"
mock_finding2.resources.all.return_value = [mock_resource2]
mock_findings_filter.return_value = [mock_finding1, mock_finding2]
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = ["us-east-1"]
mock_return_prowler_provider.return_value = mock_prowler_provider_instance
mock_compliance_template.__getitem__.return_value = {
"cis_1.4_aws": {
"framework": "CIS AWS Foundations Benchmark",
"version": "1.4.0",
"requirements": {
"1.1": {
"description": "Test requirement",
"checks_status": {
"pass": 0,
"fail": 0,
"manual": 0,
"total": 1,
},
"status": "PASS",
},
},
},
}
create_compliance_requirements(tenant_id, scan_id)
assert mock_generate_compliance.call_count == 1
def test_compliance_overview_aggregation_requirement_fail_priority(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.return_prowler_provider"
) as mock_return_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
mock_findings_filter.return_value = []
mock_prowler_provider = MagicMock()
mock_prowler_provider.get_regions.return_value = [
"us-east-1",
"us-west-2",
"eu-west-1",
]
mock_return_prowler_provider.return_value = mock_prowler_provider
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 1,
"manual": 0,
"total": 3,
},
"status": "FAIL",
}
},
}
}
mock_generate_compliance.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {
"check_1": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "FAIL"},
"eu-west-1": {"status": "PASS"},
}
},
"checks_status": {
"pass": 2,
"fail": 1,
"manual": 0,
"total": 3,
},
"status": "FAIL",
}
},
}
}
created_objects = []
mock_create_objects.side_effect = (
lambda tenant_id, model, objs, batch_size=500: created_objects.extend(
objs
)
)
create_compliance_requirements(str(tenant.id), str(scan.id))
assert len(created_objects) == 3
assert all(obj.requirement_status == "FAIL" for obj in created_objects)
def test_compliance_overview_aggregation_requirement_pass_all_regions(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.return_prowler_provider"
) as mock_return_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
providers_fixture[0]
mock_findings_filter.return_value = []
mock_prowler_provider = MagicMock()
mock_prowler_provider.get_regions.return_value = ["us-east-1", "us-west-2"]
mock_return_prowler_provider.return_value = mock_prowler_provider
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
}
},
}
}
mock_generate_compliance.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {
"check_1": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "PASS"},
}
},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
}
},
}
}
created_objects = []
mock_create_objects.side_effect = (
lambda tenant_id, model, objs, batch_size=500: created_objects.extend(
objs
)
)
create_compliance_requirements(str(tenant.id), str(scan.id))
assert len(created_objects) == 2
assert all(obj.requirement_status == "PASS" for obj in created_objects)
def test_compliance_overview_aggregation_multiple_requirements_mixed_status(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.return_prowler_provider"
) as mock_return_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
mock_findings_filter.return_value = []
mock_prowler_provider = MagicMock()
mock_prowler_provider.get_regions.return_value = ["us-east-1", "us-west-2"]
mock_return_prowler_provider.return_value = mock_prowler_provider
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
},
"req_2": {
"description": "Test Requirement 2",
"checks": {"check_2": None},
"checks_status": {
"pass": 1,
"fail": 1,
"manual": 0,
"total": 2,
},
"status": "FAIL",
},
},
}
}
mock_generate_compliance.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {
"check_1": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "PASS"},
}
},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
},
"req_2": {
"description": "Test Requirement 2",
"checks": {
"check_2": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "FAIL"},
}
},
"checks_status": {
"pass": 1,
"fail": 1,
"manual": 0,
"total": 2,
},
"status": "FAIL",
},
},
}
}
created_objects = []
mock_create_objects.side_effect = (
lambda tenant_id, model, objs, batch_size=500: created_objects.extend(
objs
)
)
create_compliance_requirements(str(tenant.id), str(scan.id))
assert len(created_objects) == 4
req_1_objects = [
obj for obj in created_objects if obj.requirement_id == "req_1"
]
req_2_objects = [
obj for obj in created_objects if obj.requirement_id == "req_2"
]
assert len(req_1_objects) == 2
assert len(req_2_objects) == 2
assert all(obj.requirement_status == "PASS" for obj in req_1_objects)
assert all(obj.requirement_status == "FAIL" for obj in req_2_objects)
@@ -1,128 +0,0 @@
import random
from collections import defaultdict
import requests
from locust import events, task
from utils.helpers import APIUserBase, get_api_token, get_auth_headers
GLOBAL = {
"token": None,
"available_scans_info": {},
}
SUPPORTED_COMPLIANCE_IDS = {
"aws": ["ens_rd2022", "cis_2.0", "prowler_threatscore", "soc2"],
"gcp": ["ens_rd2022", "cis_2.0", "prowler_threatscore", "soc2"],
"azure": ["ens_rd2022", "cis_2.0", "prowler_threatscore", "soc2"],
"m365": ["cis_4.0", "iso27001_2022", "prowler_threatscore"],
}
def _get_random_scan() -> tuple:
provider_type = random.choice(list(GLOBAL["available_scans_info"].keys()))
scan_info = random.choice(GLOBAL["available_scans_info"][provider_type])
return provider_type, scan_info
def _get_random_compliance_id(provider: str) -> str:
return f"{random.choice(SUPPORTED_COMPLIANCE_IDS[provider])}_{provider}"
def _get_compliance_available_scans_by_provider_type(host: str, token: str) -> dict:
excluded_providers = ["kubernetes"]
response_dict = defaultdict(list)
provider_response = requests.get(
f"{host}/providers?fields[providers]=id,provider&filter[connected]=true",
headers=get_auth_headers(token),
)
for provider in provider_response.json()["data"]:
provider_id = provider["id"]
provider_type = provider["attributes"]["provider"]
if provider_type in excluded_providers:
continue
scan_response = requests.get(
f"{host}/scans?fields[scans]=id&filter[provider]={provider_id}&filter[state]=completed",
headers=get_auth_headers(token),
)
scan_data = scan_response.json()["data"]
if not scan_data:
continue
scan_id = scan_data[0]["id"]
response_dict[provider_type].append(scan_id)
return response_dict
def _get_compliance_regions_from_scan(host: str, token: str, scan_id: str) -> list:
response = requests.get(
f"{host}/compliance-overviews/metadata?filter[scan_id]={scan_id}",
headers=get_auth_headers(token),
)
assert response.status_code == 200, f"Failed to get scan: {response.text}"
return response.json()["data"]["attributes"]["regions"]
@events.test_start.add_listener
def on_test_start(environment, **kwargs):
GLOBAL["token"] = get_api_token(environment.host)
scans_by_provider = _get_compliance_available_scans_by_provider_type(
environment.host, GLOBAL["token"]
)
scan_info = defaultdict(list)
for provider, scans in scans_by_provider.items():
for scan in scans:
scan_info[provider].append(
{
"scan_id": scan,
"regions": _get_compliance_regions_from_scan(
environment.host, GLOBAL["token"], scan
),
}
)
GLOBAL["available_scans_info"] = scan_info
class APIUser(APIUserBase):
def on_start(self):
self.token = GLOBAL["token"]
@task(3)
def compliance_overviews_default(self):
provider_type, scan_info = _get_random_scan()
name = f"/compliance-overviews ({provider_type})"
endpoint = f"/compliance-overviews?" f"filter[scan_id]={scan_info['scan_id']}"
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(2)
def compliance_overviews_region(self):
provider_type, scan_info = _get_random_scan()
name = f"/compliance-overviews?filter[region] ({provider_type})"
endpoint = (
f"/compliance-overviews"
f"?filter[scan_id]={scan_info['scan_id']}"
f"&filter[region]={random.choice(scan_info['regions'])}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(2)
def compliance_overviews_requirements(self):
provider_type, scan_info = _get_random_scan()
compliance_id = _get_random_compliance_id(provider_type)
name = f"/compliance-overviews/requirements ({compliance_id})"
endpoint = (
f"/compliance-overviews/requirements"
f"?filter[scan_id]={scan_info['scan_id']}"
f"&filter[compliance_id]={compliance_id}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task
def compliance_overviews_attributes(self):
provider_type, _ = _get_random_scan()
compliance_id = _get_random_compliance_id(provider_type)
name = f"/compliance-overviews/attributes ({compliance_id})"
endpoint = (
f"/compliance-overviews/attributes"
f"?filter[compliance_id]={compliance_id}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@@ -1,117 +0,0 @@
# Prowler Multicloud CIS Benchmarks PowerBI Template
![Prowler Report](https://github.com/user-attachments/assets/560f7f83-1616-4836-811a-16963223c72f)
## Getting Started
1. Install Microsoft PowerBI Desktop
This report requires the Microsoft PowerBI Desktop software which can be downloaded for free from Microsoft.
2. Run compliance scans in Prowler
The report uses compliance csv outputs from Prowler. Compliance scans be run using either [Prowler CLI](https://docs.prowler.com/projects/prowler-open-source/en/latest/#prowler-cli) or [Prowler Cloud/App](https://cloud.prowler.com/sign-in)
1. Prowler CLI -&gt; Run a Prowler scan using the --compliance option
2. Prowler Cloud/App -&gt; Navigate to the compliance section to download csv outputs
![Download Compliance Scan](https://github.com/user-attachments/assets/42c11a60-8ce8-4c60-a663-2371199c052b)
The template supports the following CIS Benchmarks only:
| Compliance Framework | Version |
| ---------------------------------------------- | ------- |
| CIS Amazon Web Services Foundations Benchmark | v4.0.1 |
| CIS Google Cloud Platform Foundation Benchmark | v3.0.0 |
| CIS Microsoft Azure Foundations Benchmark | v3.0.0 |
| CIS Kubernetes Benchmark | v1.10.0 |
Ensure you run or download the correct benchmark versions.
3. Create a local directory to store Prowler csvoutputs
Once downloaded, place your csv outputs in a directory on your local machine. If you rename the files, they must maintain the provider in the filename.
To use time-series capabilities such as "compliance percent over time" you'll need scans from multiple dates.
4. Download and run the PowerBI template file (.pbit)
Running the .pbit file will open PowerBI Desktop and prompt you for the full filepath to the local directory
5. Enter the full filepath to the directory created in step 3
Provide the full filepath from the root directory.
Ensure that the filepath is not wrapped in quotation marks (""). If you use Window's "copy as path" feature, it will automatically include quotation marks.
6. Save the report as a PowerBI file (.pbix)
Once the filepath is entered, the template will automatically ingest and populate the report. You can then save this file as a new PowerBI report. If you'd like to generate another report, simply re-run the template file (.pbit) from step 4.
## Validation
After setting up your dashboard, you may want to validate the Prowler csv files were ingested correctly. To do this, navigate to the "Configuration" tab.
The "loaded CIS Benchmarks" table shows the supported benchmarks and versions. This is defined by the template file and not editable by the user. All benchmarks will be loaded regardless of which providers you provided csv outputs for.
The "Prowler CSV Folder" shows the path to the local directory you provided.
The "Loaded Prowler Exports" table shows the ingested csv files from the local directory. It will mark files that are treated as the latest assessment with a green checkmark.
![Prowler Validation](https://github.com/user-attachments/assets/a543ca9b-6cbe-4ad1-b32a-d4ac2163d447)
## Report Sections
The PowerBI Report is broken into three main report pages
| Report Page | Description |
| ----------- | ----------------------------------------------------------------------------------- |
| Overview | Provides general CIS Benchmark overview across both AWS, Azure, GCP, and Kubernetes |
| Benchmark | Provides overview of a single CIS Benchmark |
| Requirement | Drill-through page to view details of a single requirement |
### Overview Page
The overview page is a general CIS Benchmark overview across both AWS, Azure, GCP, and Kubernetes.
![image](https://github.com/user-attachments/assets/94164fa9-36a4-4bb9-890d-e9a9a63a3e7d)
The page has the following components:
| Component | Description |
| ---------------------------------------- | ------------------------------------------------------------------------ |
| CIS Benchmark Overview | Table with benchmark name, Version, and overall compliance percentage |
| Provider by Requirement Status | Bar chart showing benchmark requirements by status by provider |
| Compliance Percent Heatmap | Heatmap showing compliance percent by benchmark and profile level |
| Profile level by Requirement Status | Bar chart showing requirements by status and profile level |
| Compliance Percent Over Time by Provider | Line chart showing overall compliance perecentage over time by provider. |
### Benchmark Page
The benchmark page provides an overview of a single CIS Benchmark. You can select the benchmark from the dropdown as well as scope down to specific profile levels or regions.
![image](https://github.com/user-attachments/assets/34498ee8-317b-4b81-b241-c561451d8def)
The page has the following components:
| Component | Description |
| --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
| Compliance Percent Heatmap | Heatmap showing compliance percent by region and profile level |
| Benchmark Section by Requirement Status | Bar chart showing benchmark requirements by bennchmark section and status |
| Compliance percent Over Time by Region | Line chart showing overall compliance percentage over time by region |
| Benchmark Requirements | Table showing requirement section, requirement number, reuqirement title, number of resources tested, status, and number of failing checks |
### Requirement Page
The requirement page is a drill-through page to view details of a single requirement. To populate the requirement page right click on a requiement from the "Benchmark Requirements" table on the benchmark page and select "Drill through" -&gt; "Requirement".
![image](https://github.com/user-attachments/assets/5c9172d9-56fe-4514-b341-7e708863fad6)
The requirement page has the following components:
| Component | Description |
| ------------------------------------------ | --------------------------------------------------------------------------------- |
| Title | Title of the requirement |
| Rationale | Rationale of the requirement |
| Remediation | Remedation guidance for the requirement |
| Region by Check Status | Bar chart showing Prowler checks by region and status |
| Resource Checks for Benchmark Requirements | Table showing Resource ID, Resource Name, Status, Description, and Prowler Checkl |
## Walkthrough Video
[![image](https://github.com/user-attachments/assets/866642c6-43ac-4aac-83d3-bb625002da0b)](https://www.youtube.com/watch?v=lfKFkTqBxjU)
@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_container_iso
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ATTRIBUTES_CATEGORY",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_NAME",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_container_iso(
aux, "REQUIREMENTS_ATTRIBUTES_CATEGORY", "REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID"
)
-43
View File
@@ -1,43 +0,0 @@
import warnings
from dashboard.common_methods import get_section_containers_3_levels
warnings.filterwarnings("ignore")
def get_table(data):
data["REQUIREMENTS_DESCRIPTION"] = (
data["REQUIREMENTS_ID"] + " - " + data["REQUIREMENTS_DESCRIPTION"]
)
data["REQUIREMENTS_DESCRIPTION"] = data["REQUIREMENTS_DESCRIPTION"].apply(
lambda x: x[:150] + "..." if len(str(x)) > 150 else x
)
data["REQUIREMENTS_ATTRIBUTES_SECTION"] = data[
"REQUIREMENTS_ATTRIBUTES_SECTION"
].apply(lambda x: x[:80] + "..." if len(str(x)) > 80 else x)
data["REQUIREMENTS_ATTRIBUTES_SUBSECTION"] = data[
"REQUIREMENTS_ATTRIBUTES_SUBSECTION"
].apply(lambda x: x[:150] + "..." if len(str(x)) > 150 else x)
aux = data[
[
"REQUIREMENTS_DESCRIPTION",
"REQUIREMENTS_ATTRIBUTES_SECTION",
"REQUIREMENTS_ATTRIBUTES_SUBSECTION",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_containers_3_levels(
aux,
"REQUIREMENTS_ATTRIBUTES_SECTION",
"REQUIREMENTS_ATTRIBUTES_SUBSECTION",
"REQUIREMENTS_DESCRIPTION",
)
+1 -4
View File
@@ -4,10 +4,7 @@ from dash import html
def create_provider_card(
provider: str,
provider_logo: str,
account_type: str,
filtered_data,
provider: str, provider_logo: str, account_type: str, filtered_data
) -> List[html.Div]:
"""
Card to display the provider's name and icon.
-25
View File
@@ -245,31 +245,6 @@ def create_service_dropdown(services: list) -> html.Div:
)
def create_provider_dropdown(providers: list) -> html.Div:
"""
Dropdown to select the provider.
Args:
providers (list): List of providers.
Returns:
html.Div: Dropdown to select the provider.
"""
return html.Div(
[
html.Label(
"Provider:", className="text-prowler-stone-900 font-bold text-sm"
),
dcc.Dropdown(
id="provider-filter",
options=[{"label": i, "value": i} for i in providers],
value=["All"],
clearable=False,
multi=True,
style={"color": "#000000"},
),
],
)
def create_status_dropdown(status: list) -> html.Div:
"""
Dropdown to select the status.
+2 -5
View File
@@ -9,11 +9,9 @@ def create_layout_overview(
download_button_xlsx: html.Button,
severity_dropdown: html.Div,
service_dropdown: html.Div,
provider_dropdown: html.Div,
table_row_dropdown: html.Div,
status_dropdown: html.Div,
table_div_header: html.Div,
amount_providers: int,
) -> html.Div:
"""
Create the layout of the dashboard.
@@ -49,10 +47,9 @@ def create_layout_overview(
[
html.Div([severity_dropdown], className=""),
html.Div([service_dropdown], className=""),
html.Div([provider_dropdown], className=""),
html.Div([status_dropdown], className=""),
],
className="grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-4",
className="grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-3",
),
html.Div(
[
@@ -62,7 +59,7 @@ def create_layout_overview(
html.Div(className="flex", id="k8s_card", n_clicks=0),
html.Div(className="flex", id="m365_card", n_clicks=0),
],
className=f"grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-{amount_providers}",
className="grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-5",
),
html.H4(
"Count of Findings by severity",
+20 -13
View File
@@ -346,27 +346,34 @@ def display_data(
if item == "nan" or item.__class__.__name__ != "str":
region_filter_options.remove(item)
# Convert ASSESSMENTDATE to datetime
data["ASSESSMENTDATE"] = pd.to_datetime(data["ASSESSMENTDATE"], errors="coerce")
data["ASSESSMENTDAY"] = data["ASSESSMENTDATE"].dt.date
data["ASSESSMENTDATE"] = data["ASSESSMENTDATE"].dt.strftime("%Y-%m-%d %H:%M:%S")
# Find the latest timestamp per account per day
latest_per_account_day = data.groupby(["ACCOUNTID", "ASSESSMENTDAY"])[
"ASSESSMENTDATE"
].transform("max")
# Choosing the date that is the most recent
data_values = data["ASSESSMENTDATE"].unique()
data_values.sort()
data_values = data_values[::-1]
aux = []
# Keep only rows with the latest timestamp for each account and day
data = data[data["ASSESSMENTDATE"] == latest_per_account_day]
data_values = [str(i) for i in data_values]
for value in data_values:
if value.split(" ")[0] not in [aux[i].split(" ")[0] for i in range(len(aux))]:
aux.append(value)
data_values = [str(i) for i in aux]
# Prepare the date filter options (unique days, as strings)
options_date = sorted(data["ASSESSMENTDAY"].astype(str).unique(), reverse=True)
data = data[data["ASSESSMENTDATE"].isin(data_values)]
data["ASSESSMENTDATE"] = data["ASSESSMENTDATE"].apply(lambda x: x.split(" ")[0])
# Filter by selected date (as string)
options_date = data["ASSESSMENTDATE"].unique()
options_date.sort()
options_date = options_date[::-1]
# Filter DATE
if date_filter_analytics in options_date:
data = data[data["ASSESSMENTDAY"].astype(str) == date_filter_analytics]
data = data[data["ASSESSMENTDATE"] == date_filter_analytics]
else:
date_filter_analytics = options_date[0]
data = data[data["ASSESSMENTDAY"].astype(str) == date_filter_analytics]
data = data[data["ASSESSMENTDATE"] == date_filter_analytics]
if data.empty:
fig = px.pie()
+35 -87
View File
@@ -38,7 +38,6 @@ from dashboard.lib.cards import create_provider_card
from dashboard.lib.dropdowns import (
create_account_dropdown,
create_date_dropdown,
create_provider_dropdown,
create_region_dropdown,
create_service_dropdown,
create_severity_dropdown,
@@ -84,18 +83,7 @@ def load_csv_files(csv_files):
"""Load CSV files into a single pandas DataFrame."""
dfs = []
for file in csv_files:
account_columns = ["ACCOUNT_ID", "ACCOUNT_UID", "SUBSCRIPTION"]
df_sample = pd.read_csv(file, sep=";", on_bad_lines="skip", nrows=1)
dtype_dict = {}
for col in account_columns:
if col in df_sample.columns:
dtype_dict[col] = str
# Read the full file with proper dtypes
df = pd.read_csv(file, sep=";", on_bad_lines="skip", dtype=dtype_dict)
df = pd.read_csv(file, sep=";", on_bad_lines="skip")
if "CHECK_ID" in df.columns:
if "TIMESTAMP" in df.columns or df["PROVIDER"].unique() == "aws":
dfs.append(df.astype(str))
@@ -132,6 +120,7 @@ if data is None:
]
)
else:
# This handles the case where we are using v3 outputs
if "ASSESSMENT_START_TIME" in data.columns:
data["ASSESSMENT_START_TIME"] = data["ASSESSMENT_START_TIME"].str.replace(
@@ -299,13 +288,6 @@ else:
service_dropdown = create_service_dropdown(services)
# Provider Dropdown
providers = ["All"] + list(data["PROVIDER"].unique())
providers = [
x for x in providers if str(x) != "nan" and x.__class__.__name__ == "str"
]
provider_dropdown = create_provider_dropdown(providers)
# Create the download button
download_button_csv = html.Button(
"Download this table as CSV",
@@ -487,11 +469,9 @@ else:
download_button_xlsx,
severity_dropdown,
service_dropdown,
provider_dropdown,
table_row_dropdown,
status_dropdown,
table_div_header,
len(data["PROVIDER"].unique()),
)
@@ -518,8 +498,6 @@ else:
Output("severity-filter", "value"),
Output("severity-filter", "options"),
Output("service-filter", "value"),
Output("provider-filter", "value"),
Output("provider-filter", "options"),
Output("service-filter", "options"),
Output("table-rows", "value"),
Output("table-rows", "options"),
@@ -538,7 +516,6 @@ else:
Input("download_link_xlsx", "n_clicks"),
Input("severity-filter", "value"),
Input("service-filter", "value"),
Input("provider-filter", "value"),
Input("table-rows", "value"),
Input("status-filter", "value"),
Input("search-input", "value"),
@@ -562,7 +539,6 @@ def filter_data(
n_clicks_xlsx,
severity_values,
service_values,
provider_values,
table_row_values,
status_values,
search_value,
@@ -888,25 +864,6 @@ def filter_data(
filtered_data["SERVICE_NAME"].isin(updated_service_values)
]
provider_filter_options = ["All"] + list(filtered_data["PROVIDER"].unique())
# Filter Provider
if provider_values == ["All"]:
updated_provider_values = filtered_data["PROVIDER"].unique()
elif "All" in provider_values and len(provider_values) > 1:
# Remove 'All' from the list
provider_values.remove("All")
updated_provider_values = provider_values
elif len(provider_values) == 0:
updated_provider_values = filtered_data["PROVIDER"].unique()
provider_values = ["All"]
else:
updated_provider_values = provider_values
filtered_data = filtered_data[
filtered_data["PROVIDER"].isin(updated_provider_values)
]
# Filter Status
if status_values == ["All"]:
updated_status_values = filtered_data["STATUS"].unique()
@@ -1127,17 +1084,25 @@ def filter_data(
table_row_options = []
# Calculate table row options as percentages
percentages = [0.05, 0.10, 0.25, 0.50, 0.75, 1.0]
total_rows = len(filtered_data)
for pct in percentages:
value = max(1, int(total_rows * pct))
label = f"{int(pct * 100)}%"
table_row_options.append({"label": label, "value": value})
# Default to 25% if not set
# Take the values from the table_row_values
if table_row_values is None or table_row_values == -1:
table_row_values = table_row_options[0]["value"]
if len(filtered_data) < 25:
table_row_values = len(filtered_data)
else:
table_row_values = 25
if len(filtered_data) < 25:
table_row_values = len(filtered_data)
if len(filtered_data) >= 25:
table_row_options.append(25)
if len(filtered_data) >= 50:
table_row_options.append(50)
if len(filtered_data) >= 75:
table_row_options.append(75)
if len(filtered_data) >= 100:
table_row_options.append(100)
table_row_options.append(len(filtered_data))
# For the values that are nan or none, replace them with ""
filtered_data = filtered_data.replace({np.nan: ""})
@@ -1372,36 +1337,21 @@ def filter_data(
]
# Create Provider Cards
if "aws" in list(data["PROVIDER"].unique()):
aws_card = create_provider_card(
"aws", aws_provider_logo, "Accounts", full_filtered_data
)
else:
aws_card = None
if "azure" in list(data["PROVIDER"].unique()):
azure_card = create_provider_card(
"azure", azure_provider_logo, "Subscriptions", full_filtered_data
)
else:
azure_card = None
if "gcp" in list(data["PROVIDER"].unique()):
gcp_card = create_provider_card(
"gcp", gcp_provider_logo, "Projects", full_filtered_data
)
else:
gcp_card = None
if "kubernetes" in list(data["PROVIDER"].unique()):
k8s_card = create_provider_card(
"kubernetes", ks8_provider_logo, "Clusters", full_filtered_data
)
else:
k8s_card = None
if "m365" in list(data["PROVIDER"].unique()):
m365_card = create_provider_card(
"m365", m365_provider_logo, "Accounts", full_filtered_data
)
else:
m365_card = None
aws_card = create_provider_card(
"aws", aws_provider_logo, "Accounts", full_filtered_data
)
azure_card = create_provider_card(
"azure", azure_provider_logo, "Subscriptions", full_filtered_data
)
gcp_card = create_provider_card(
"gcp", gcp_provider_logo, "Projects", full_filtered_data
)
k8s_card = create_provider_card(
"kubernetes", ks8_provider_logo, "Clusters", full_filtered_data
)
m365_card = create_provider_card(
"m365", m365_provider_logo, "Accounts", full_filtered_data
)
# Subscribe to Prowler Cloud card
subscribe_card = [
@@ -1485,8 +1435,6 @@ def filter_data(
severity_values,
severity_filter_options,
service_values,
provider_values,
provider_filter_options,
service_filter_options,
table_row_values,
table_row_options,
-122
View File
@@ -1,122 +0,0 @@
# AWS Provider
In this page you can find all the details about [Amazon Web Services (AWS)](https://aws.amazon.com/) provider implementation in Prowler.
By default, Prowler will audit just one account and organization settings per scan. To configure it, follow the [getting started](../index.md#aws) page.
## AWS Provider Classes Architecture
The AWS provider implementation follows the general [Provider structure](./provider.md). This section focuses on the AWS-specific implementation, highlighting how the generic provider concepts are realized for AWS in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md). In next subsection you can find a list of the main classes of the AWS provider.
### `AwsProvider` (Main Class)
- **Location:** [`prowler/providers/aws/aws_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/aws_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for AWS-specific logic, session management, credential validation, role assumption, region and organization discovery, and configuration.
- **Key AWS Responsibilities:**
- Initializes and manages AWS sessions (with or without role assumption, MFA, etc.).
- Validates credentials and sets up the AWS identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Discovers enabled AWS regions and organization metadata.
- Provides properties and methods for downstream AWS service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/aws/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/models.py)
- **Purpose:** Define structured data for AWS identity, session, credentials, organization info, and more.
- **Key AWS Models:**
- `AWSOrganizationsInfo`: Holds AWS Organizations metadata, to be used by the checks.
- `AWSCredentials`, `AWSAssumeRoleInfo`, `AWSAssumeRoleConfiguration`: Used for role assumption and session management.
- `AWSIdentityInfo`: Stores account, user, partition, and region context for the scan.
- `AWSSession`: Wraps the current and original [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) sessions and config.
### `AWSService` (Service Base Class)
- **Location:** [`prowler/providers/aws/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/lib/service/service.py)
- **Purpose:** Abstract base class that all AWS service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for AWS.
- **Key AWS Responsibilities:**
- Receives an `AwsProvider` instance to access session, identity, and configuration.
- Manages clients for all services by regions.
- Provides `__threading_call__` method to make boto3 calls in parallel. By default, this calls are made by region, but it can be overridden with the first parameter of the method and use by resource.
- Exposes common audit context (`audited_account`, `audited_account_arn`, `audited_partition`, `audited_resources`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/aws/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for AWS-specific error handling, such as credential and role errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/aws/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/lib/)
- **Purpose:** Helpers for session setup, ARN parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in AWS Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the right now implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/aws/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/aws/services)
- In the [Prowler Hub](https://hub.prowler.com/). For a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other services already implemented as reference. In next subsection you can find a list of common patterns that are used accross all AWS services.
### AWS Service Common Patterns
- Services communicate with AWS using boto3, you can find the documentation with all the services [here](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/index.html).
- Every AWS service class inherits from `AWSService`, ensuring access to session, identity, configuration, and threading utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service name and provider (e.g. `super().__init__(__class__.__name__, provider))`). Ensure that the service name in boto3 is the same that you use in the constructor. Usually is used the `__class__.__name__` to get the service name because it is the same as the class name.
- Resource containers **must** be initialized in the constructor. They should be dictionaries, with the key being the resource ARN or equivalent unique identifier and the value being the resource object.
- Resource discovery and attribute collection are parallelized using `self.__threading_call__`, typically by region or resource, for performance. The first parameter of the method is the iterator, if not provided, it will be the region; but if present indicate an array of the resources to be processed.
- Resource filtering is consistently enforced using `self.audit_resources` attribute and `is_resource_filtered` function, it is used to see if user has provided some resource that is not in the audit scope, so we can skip it in the service logic. Normally it is used befor storing the resource in the service container as follows: `if not self.audit_resources or (is_resource_filtered(resource["arn"], self.audit_resources)):`.
- All AWS resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- AWS API calls are wrapped in try/except blocks, with specific handling for `ClientError` and generic exceptions, always logging errors.
- If ARN is not present for some resource, it can be constructed using string interpolation, always including partition, service, region, account, and resource ID.
- Tags and additional attributes that cannot be retrieved from the default call, should be collected and stored for each resource using dedicated methods and threading using the resource object list as iterator.
## Specific Patterns in AWS Checks
The AWS checks pattern is described in [checks page](./checks.md). You can find all the right now implemented checks:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/aws/services/s3/s3_bucket_acl_prohibited/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/aws/services/s3/s3_bucket_acl_prohibited))
- In the [Prowler Hub](https://hub.prowler.com/). For a more human-readable view.
The best reference to understand how to implement a new check is following the [check creation documentation](./checks.md#creating-a-check) and taking other similar checks as reference.
### Check Report Class
The `Check_Report_AWS` class models a single finding for an AWS resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_AWS` extends the base report structure with AWS-specific fields, enabling detailed tracking of the resource, ARN, and region associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_AWS`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its AWS-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses `resource.id` if present.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if none are available.
- **`resource_arn`**:
- Uses `resource.arn` if present.
- Defaults to an empty string if ARN is not present in the resource object.
- **`region`**:
- Uses `resource.region` if present.
- Defaults to an empty string if region is not present in the resource object.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from that ones you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_AWS(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
-121
View File
@@ -1,121 +0,0 @@
# Azure Provider
In this page you can find all the details about [Microsoft Azure](https://azure.microsoft.com/) provider implementation in Prowler.
By default, Prowler will audit all the subscriptions that it is able to list in the Microsoft Entra tenant, and tenant Entra ID service. To configure it, follow the [getting started](../index.md#azure) page.
## Azure Provider Classes Architecture
The Azure provider implementation follows the general [Provider structure](./provider.md). This section focuses on the Azure-specific implementation, highlighting how the generic provider concepts are realized for Azure in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md). In next subsection you can find a list of the main classes of the Azure provider.
### `AzureProvider` (Main Class)
- **Location:** [`prowler/providers/azure/azure_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/azure_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for Azure-specific logic, session management, credential validation, and configuration.
- **Key Azure Responsibilities:**
- Initializes and manages Azure sessions (supports Service Principal, CLI, Browser, and Managed Identity authentication).
- Validates credentials and sets up the Azure identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Retrieves subscription(s) metadata.
- Provides properties and methods for downstream Azure service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/azure/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/models.py)
- **Purpose:** Define structured data for Azure identity, session, region configuration, and subscription info.
- **Key Azure Models:**
- `AzureIdentityInfo`: Holds Azure identity metadata, including tenant ID, domain, subscription names and IDs, and locations.
- `AzureRegionConfig`: Stores the specific region that will be audited. That can be: Global, US Government or China.
- `AzureSubscription`: Represents a subscription with ID, display name, and state.
### `AzureService` (Service Base Class)
- **Location:** [`prowler/providers/azure/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/lib/service/service.py)
- **Purpose:** Abstract base class that all Azure service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for Azure.
- **Key Azure Responsibilities:**
- Receives an `AzureProvider` instance to access session, identity, and configuration.
- Manages clients for all services by subscription.
- Exposes common audit context (`subscriptions`, `locations`, `audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/azure/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for Azure-specific error handling, such as credential, region, and session errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/azure/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/lib/)
- **Purpose:** Helpers for argument parsing, region setup, mutelist management, and other cross-cutting concerns.
## Specific Patterns in Azure Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/azure/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/azure/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other services already implemented as reference. In next subsection you can find a list of common patterns that are used accross all Azure services.
### Azure Service Common Patterns
- Services communicate with Azure using the Azure Python SDK, mainly using the Azure Management Client (except for the Microsoft Entra ID service, that is using the Microsoft Graph API), you can find the documentation with all the management services [here](https://learn.microsoft.com/en-us/python/api/overview/azure/?view=azure-python).
- Every Azure service class inherits from `AzureService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service Azure Management Client and Prowler provider object (e.g `super().__init__(WebSiteManagementClient, provider)`).
- Resource containers **must** be initialized in the constructor, and they should be dictionaries, with the key being the subscription ID, the value being a dictionary with the resource ID as key and the resource object as value.
- All Azure resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes. Some are represented as dataclasses due to legacy reasons, but new resources should be represented as Pydantic `BaseModel` classes.
- Azure SDK functions are wrapped in try/except blocks, with specific handling for errors, always logging errors. It is a best practice to create a custom function for every Azure SDK call, in that way we can handle the errors in a more specific way.
## Specific Patterns in Azure Checks
The Azure checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/azure/services/storage/storage_blob_public_access_level_is_disabled/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/azure/services/storage/storage_blob_public_access_level_is_disabled))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is the [Azure check implementation documentation](./checks.md#creating-a-check) and taking other similar checks as reference.
### Check Report Class
The `Check_Report_Azure` class models a single finding for an Azure resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_Azure` extends the base report structure with Azure-specific fields, enabling detailed tracking of the resource, resource ID, name, subscription, and location associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_Azure`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its Azure-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses `resource.id` if present.
- Otherwise, uses `resource.resource_id` if present.
- Defaults to an empty string if not available.
- **`resource_name`**:
- Uses `resource.name` if present.
- Otherwise, uses `resource.resource_name` if present.
- Defaults to an empty string if not available.
- **`subscription`**:
- Defaults to an empty string, it **must** be set in the check logic.
- **`location`**:
- Uses `resource.location` if present.
- Defaults to an empty string if not available.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_Azure(
metadata=check_metadata,
resource=resource_object
)
report.subscription = subscription_id
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
+278 -222
View File
@@ -1,314 +1,370 @@
# Prowler Checks
# Create a new Check for a Provider
This guide explains how to create new checks in Prowler.
Here you can find how to create new checks for Prowler.
**To create a check is required to have a Prowler provider service already created, so if the service is not present or the attribute you want to audit is not retrieved by the service, please refer to the [Service](./services.md) documentation.**
## Introduction
Checks are the core component of Prowler. A check is a piece of code designed to validate whether a configuration aligns with cybersecurity best practices. Execution of a check yields a finding, which includes the result and contextual metadata (e.g., outcome, risks, remediation).
The checks are the fundamental piece of Prowler. A check is a simply piece of code that ensures if something is configured against cybersecurity best practices. Then the check generates a finding with the result and includes the check's metadata to give the user more contextual information about the result, the risk and how to remediate it.
### Creating a Check
To create a new check for a supported Prowler provider, you will need to create a folder with the check name inside the specific service for the selected provider.
To create a new check:
- Prerequisites: A Prowler provider and service must exist. Verify support and check for pre-existing checks via [Prowler Hub](https://hub.prowler.com). If the provider or service is not present, please refer to the [Provider](./provider.md) and [Service](./services.md) documentation for creation instructions.
- Navigate to the service directory. The path should be as follows: `prowler/providers/<provider>/services/<service>`.
- Create a check-specific folder. The path should follow this pattern: `prowler/providers/<provider>/services/<service>/<check_name>`. Adhere to the [Naming Format for Checks](#naming-format-for-checks).
- Populate the folder with files as specified in [File Creation](#file-creation).
### Naming Format for Checks
Checks must be named following the format: `service_subservice_resource_action`.
The name components are:
- `service` The main service being audited (e.g., ec2, entra, iam, etc.)
- `subservice` An individual component or subset of functionality within the service that is being audited. This may correspond to a shortened version of the class attribute accessed within the check. If there is no subservice, just omit.
- `resource` The specific resource type being evaluated (e.g., instance, policy, role, etc.)
- `action` The security aspect or configuration being checked (e.g., public, encrypted, enabled, etc.)
### File Creation
Each check in Prowler follows a straightforward structure. Within the newly created folder, three files must be added to implement the check logic:
- `__init__.py` (empty file) Ensures Python treats the check folder as a package.
- `<check_name>.py` (code file) Contains the check logic, following the prescribed format. Please refer to the [prowler's check code structure](./checks.md#prowlers-check-code-structure) for more information.
- `<check_name>.metadata.json` (metadata file) Defines the check's metadata for contextual information. Please refer to the [check metadata](./checks.md#) for more information.
## Prowler's Check Code Structure
Prowler's check structure is designed for clarity and maintainability. It follows a dynamic loading approach based on predefined paths, ensuring seamless integration of new checks into a provider's service without additional manual steps.
Below the code for a generic check is presented. It is strongly recommended to consult other checks from the same provider and service to understand provider-specific details and patterns. This will help ensure consistency and proper implementation of provider-specific requirements.
Report fields are the most dependent on the provider, consult the `CheckReport<Provider>` class for more information on what can be included in the report [here](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py).
We are going to use the `ec2_ami_public` check from the `AWS` provider as an example. So the folder name will be `prowler/providers/aws/services/ec2/ec2_ami_public` (following the format `prowler/providers/<provider>/services/<service>/<check_name>`), with the name of check following the pattern: `service_subservice_resource_action`.
???+ note
Legacy providers (AWS, Azure, GCP, Kubernetes) follow the `Check_Report_<Provider>` naming convention. This is not recommended for current instances. Newer providers adopt the `CheckReport<Provider>` naming convention. Learn more at [Prowler Code](https://github.com/prowler-cloud/prowler/tree/master/prowler/lib/check/models.py).
A subservice is an specific component of a service that is gonna be audited. Sometimes it could be the shortened name of the class attribute that is gonna be accessed in the check.
```python title="Generic Check Class"
# Required Imports
# Import the base Check class and the provider-specific CheckReport class
from prowler.lib.check.models import Check, CheckReport<Provider>
# Import the provider service client
from prowler.providers.<provider>.services.<service>.<service>_client import <service>_client
Inside that folder, we need to create three files:
# Defining the Check Class
# Each check must be implemented as a Python class with the same name as its corresponding file.
# The class must inherit from the Check base class.
class <check_name>(Check):
"""Short description of what is being checked"""
- An empty `__init__.py`: to make Python treat this check folder as a package.
- A `check_name.py` with the above format containing the check's logic. Refer to the [check](./checks.md#check)
- A `check_name.metadata.json` containing the check's metadata. Refer to the [check metadata](./checks.md#check-metadata)
## Check
The Prowler's check structure is very simple and following it there is nothing more to do to include a check in a provider's service because the load is done dynamically based on the paths.
The following is the code for the `ec2_ami_public` check:
```python title="Check Class"
# At the top of the file we need to import the following:
# - Check class which is in charge of the following:
# - Retrieve the check metadata and expose the `metadata()`
# to return a JSON representation of the metadata,
# read more at Check Metadata Model down below.
# - Enforce that each check requires to have the `execute()` function
from prowler.lib.check.models import Check, Check_Report_AWS
# Then you have to import the provider service client
# read more at the Service documentation.
from prowler.providers.aws.services.ec2.ec2_client import ec2_client
# For each check we need to create a python class called the same as the
# file which inherits from the Check class.
class ec2_ami_public(Check):
"""ec2_ami_public verifies if an EC2 AMI is publicly shared"""
# Then, within the check's class we need to create the "execute(self)"
# function, which is enforce by the "Check" class to implement
# the Check's interface and let Prowler to run this check.
def execute(self):
"""Execute <check short description>
Returns:
List[CheckReport<Provider>]: A list of reports containing the result of the check.
"""
# Inside the execute(self) function we need to create
# the list of findings initialised to an empty list []
findings = []
# Iterate over the target resources using the provider service client
for resource in <service>_client.<resources>:
# Initialize the provider-specific report class, passing metadata and resource
report = Check_Report_<Provider>(metadata=self.metadata(), resource=resource)
# Set required fields and implement check logic
# Then, using the service client we need to iterate by the resource we
# want to check, in this case EC2 AMIs stored in the
# "ec2_client.images" object.
for image in ec2_client.images:
# Once iterating for the images, we have to intialise
# the Check_Report_AWS class passing the check's metadata
# using the "metadata" function explained above.
report = Check_Report_AWS(self.metadata())
# For each Prowler check we MUST fill the following
# Check_Report_AWS fields:
# - region
# - resource_id
# - resource_arn
# - resource_tags
# - status
# - status_extended
report.region = image.region
report.resource_id = image.id
report.resource_arn = image.arn
# The resource_tags should be filled if the resource has the ability
# of having tags, please check the service first.
report.resource_tags = image.tags
# Then we need to create the business logic for the check
# which always should be simple because the Prowler service
# must do the heavy lifting and the check should be in charge
# of parsing the data provided
report.status = "PASS"
report.status_extended = f"<Description about why the resource is compliant>"
# If some of the information needed for the report is not inside the resource, it can be set it manually here.
# This depends on the provider and the resource that is being audited.
# report.region = resource.region
# report.resource_tags = getattr(resource, "tags", [])
# ...
# Example check logic (replace with actual logic):
if <non_compliant_condition>:
report.status_extended = f"EC2 AMI {image.id} is not public."
# In this example each "image" object has a boolean attribute
# called "public" to set if the AMI is publicly shared
if image.public:
report.status = "FAIL"
report.status_extended = f"<Description about why the resource is not compliant>"
report.status_extended = (
f"EC2 AMI {image.id} is currently public."
)
# Then at the same level as the "report"
# object we need to append it to the findings list.
findings.append(report)
# Last thing to do is to return the findings list to Prowler
return findings
```
### Data Requirements for Checks in Prowler
### Check Status
One of the most important aspects when creating a new check is ensuring that all required data is available from the service client. Often, default API calls are insufficient. Extending the service class with new methods or resource attributes may be required to fetch and store requisite data.
All the checks MUST fill the `report.status` and `report.status_extended` with the following criteria:
### Statuses for Checks in Prowler
- Status -- `report.status`
- `PASS` --> If the check is passing against the configured value.
- `FAIL` --> If the check is failing against the configured value.
- `MANUAL` --> This value cannot be used unless a manual operation is required in order to determine if the `report.status` is whether `PASS` or `FAIL`.
- Status Extended -- `report.status_extended`
- MUST end in a dot `.`
- MUST include the service audited with the resource and a brief explanation of the result generated, e.g.: `EC2 AMI ami-0123456789 is not public.`
Required Fields: status and status\_extended
### Check Region
Each check **must** populate the `report.status` and `report.status_extended` fields according to the following criteria:
All the checks MUST fill the `report.region` with the following criteria:
- Status field: `report.status`
- `PASS` Assigned when the check confirms compliance with the configured value.
- `FAIL` Assigned when the check detects non-compliance with the configured value.
- `MANUAL` This status must not be used unless manual verification is necessary to determine whether the status (`report.status`) passes (`PASS`) or fails (`FAIL`).
- If the audited resource is regional use the `region` (the name changes depending on the provider: `location` in Azure and GCP and `namespace` in K8s) attribute within the resource object.
- If the audited resource is global use the `service_client.region` within the service client object.
- Status extended field: `report.status_extended`
- It **must** end with a period (`.`).
- It **must** include the audited service, the resource, and a concise explanation of the check result, for instance: `EC2 AMI ami-0123456789 is not public.`.
### Check Severity
### Prowler's Check Severity Levels
The severity of the checks are defined in the metadata file with the `Severity` field. The severity is always in lowercase and can be one of the following values:
The severity of each check is defined in the metadata file using the `Severity` field. Severity values are always lowercase and must be one of the predefined categories below.
- `critical`
- `high`
- `medium`
- `low`
- `informational`
- `critical` Issue that must be addressed immediately.
- `high` Issue that should be addressed as soon as possible.
- `medium` Issue that should be addressed within a reasonable timeframe.
- `low` Issue that can be addressed in the future.
- `informational` Not an issue but provides valuable information.
If the check involves multiple scenarios that may alter its severity, adjustments can be made dynamically within the check's logic using the severity `report.check_metadata.Severity` attribute:
You may need to change it in the check's code if the check has different scenarios that could change the severity. This can be done by using the `report.check_metadata.Severity` attribute:
```python
if <generic_condition_1>:
if <valid for more than 6 months>:
report.status = "PASS"
report.check_metadata.Severity = "informational"
report.status_extended = f"<Resource> is compliant with <requirement>."
elif <generic_condition_2>:
report.status = "FAIL"
report.status_extended = f"RDS Instance {db_instance.id} certificate has over 6 months of validity left."
elif <valid for more than 3 months>:
report.status = "PASS"
report.check_metadata.Severity = "low"
report.status_extended = f"<Resource> is not compliant with <requirement>: <reason>."
elif <generic_condition_3>:
report.status_extended = f"RDS Instance {db_instance.id} certificate has between 3 and 6 months of validity."
elif <valid for more than 1 month>:
report.status = "FAIL"
report.check_metadata.Severity = "medium"
report.status_extended = f"<Resource> is not compliant with <requirement>: <reason>."
elif <generic_condition_4>:
report.status_extended = f"RDS Instance {db_instance.id} certificate less than 3 months of validity."
elif <valid for less than 1 month>:
report.status = "FAIL"
report.check_metadata.Severity = "high"
report.status_extended = f"<Resource> is not compliant with <requirement>: <reason>."
report.status_extended = f"RDS Instance {db_instance.id} certificate less than 1 month of validity."
else:
report.status = "FAIL"
report.check_metadata.Severity = "critical"
report.status_extended = f"<Resource> is not compliant with <requirement>: <critical reason>."
report.status_extended = (
f"RDS Instance {db_instance.id} certificate has expired."
)
```
### Resource Identification in Prowler
Each check **must** populate the report with an unique identifier for the audited resource. This identifier or identifiers are going to depend on the provider and the resource that is being audited. Here are the criteria for each provider:
### Resource ID, Name and ARN
All the checks MUST fill the `report.resource_id` and `report.resource_arn` with the following criteria:
- AWS
- Amazon Resource ID — `report.resource_id`.
- The resource identifier. This is the name of the resource, the ID of the resource, or a resource path. Some resource identifiers include a parent resource (sub-resource-type/parent-resource/sub-resource) or a qualifier such as a version (resource-type:resource-name:qualifier).
- If the resource ID cannot be retrieved directly from the audited resource, it can be extracted from the ARN. It is the last part of the ARN after the last slash (`/`) or colon (`:`).
- If no actual resource to audit exists, this format can be used: `<resource_type>/unknown`
- Amazon Resource Name — `report.resource_arn`.
- The [Amazon Resource Name (ARN)](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html) of the audited entity.
- If the ARN cannot be retrieved directly from the audited resource, construct a valid ARN using the `resource_id` component as the audited entity. Examples:
- Bedrock — `arn:<partition>:bedrock:<region>:<account-id>:model-invocation-logging`.
- DirectConnect — `arn:<partition>:directconnect:<region>:<account-id>:dxcon`.
- If no actual resource to audit exists, this format can be used: `arn:<partition>:<service>:<region>:<account-id>:<resource_type>/unknown`.
- Resouce ID and resource ARN:
- If the resource audited is the AWS account:
- `resource_id` -> AWS Account Number
- `resource_arn` -> AWS Account Root ARN
- If we cant get the ARN from the resource audited, we create a valid ARN with the `resource_id` part as the resource audited. Examples:
- Bedrock -> `arn:<partition>:bedrock:<region>:<account-id>:model-invocation-logging`
- DirectConnect -> `arn:<partition>:directconnect:<region>:<account-id>:dxcon`
- If there is no real resource to audit we do the following:
- resource_id -> `resource_type/unknown`
- resource_arn -> `arn:<partition>:<service>:<region>:<account-id>:<resource_type>/unknown`
- Examples:
- AWS Security Hub `arn:<partition>:security-hub:<region>:<account-id>:hub/unknown`.
- Access Analyzer `arn:<partition>:access-analyzer:<region>:<account-id>:analyzer/unknown`.
- GuardDuty `arn:<partition>:guardduty:<region>:<account-id>:detector/unknown`.
- AWS Security Hub -> `arn:<partition>:security-hub:<region>:<account-id>:hub/unknown`
- Access Analyzer -> `arn:<partition>:access-analyzer:<region>:<account-id>:analyzer/unknown`
- GuardDuty -> `arn:<partition>:guardduty:<region>:<account-id>:detector/unknown`
- GCP
- Resource ID — `report.resource_id`.
- Resource ID represents the full, [unambiguous path to a resource](https://google.aip.dev/122#full-resource-names), known as the full resource name. Typically, it follows the format: `//{api_service/resource_path}`.
- If the resource ID cannot be retrieved directly from the audited resource, by default the resource name is used.
- Resource Name — `report.resource_name`.
- Resource Name usually refers to the name of a resource within its service.
- Resource ID -- `report.resource_id`
- GCP Resource --> Resource ID
- Resource Name -- `report.resource_name`
- GCP Resource --> Resource Name
- Azure
- Resource ID -- `report.resource_id`
- Azure Resource --> Resource ID
- Resource Name -- `report.resource_name`
- Azure Resource --> Resource Name
- Resource ID — `report.resource_id`.
- Resource ID represents the full Azure Resource Manager path to a resource, which follows the format: `/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/{resourceProviderNamespace}/{resourceType}/{resourceName}`.
- Resource Name — `report.resource_name`.
- Resource Name usually refers to the name of a resource within its service.
- If the [resource name](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules) cannot be retrieved directly from the audited resource, the last part of the resource ID can be used.
### Python Model
The following is the Python model for the check's class.
- Kubernetes
As per April 11th 2024 the `Check_Metadata_Model` can be found [here](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py#L36-L82).
- Resource ID — `report.resource_id`.
- The UID of the Kubernetes object. This is a system-generated string that uniquely identifies the object within the cluster for its entire lifetime. See [Kubernetes Object Names and IDs - UIDs](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids).
- Resource Name — `report.resource_name`.
- The name of the Kubernetes object. This is a client-provided string that must be unique for the resource type within a namespace (for namespaced resources) or cluster (for cluster-scoped resources). Names typically follow DNS subdomain or label conventions. See [Kubernetes Object Names and IDs - Names](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names).
```python
class Check(ABC, Check_Metadata_Model):
"""Prowler Check"""
- M365
def __init__(self, **data):
"""Check's init function. Calls the CheckMetadataModel init."""
# Parse the Check's metadata file
metadata_file = (
os.path.abspath(sys.modules[self.__module__].__file__)[:-3]
+ ".metadata.json"
)
# Store it to validate them with Pydantic
data = Check_Metadata_Model.parse_file(metadata_file).dict()
# Calls parents init function
super().__init__(**data)
- Resource ID — `report.resource_id`.
- If the audited resource has a globally unique identifier such as a `guid`, use it as the `resource_id`.
- If no `guid` exists, use another unique and relevant identifier for the resource, such as the tenant domain, the internal policy ID, or a representative string following the format `<resource_type>/<name_or_id>`.
- Resource Name — `report.resource_name`.
- Use the visible or descriptive name of the audited resource. If no explicit name is available, use a clear description of the resource or configuration being evaluated.
- Examples:
- For an organization:
- `resource_id`: Organization GUID
- `resource_name`: Organization name
- For a policy:
- `resource_id`: Unique policy ID
- `resource_name`: Policy display name
- For global configurations:
- `resource_id`: Tenant domain or representative string (e.g., "userSettings")
- `resource_name`: Description of the configuration (e.g., "SharePoint Settings")
def metadata(self) -> dict:
"""Return the JSON representation of the check's metadata"""
return self.json()
- GitHub
@abstractmethod
def execute(self):
"""Execute the check's logic"""
```
- Resource ID — `report.resource_id`.
- The ID of the Github resource. This is a system-generated integer that uniquely identifies the resource within the Github platform.
- Resource Name — `report.resource_name`.
- The name of the Github resource. In the case of a repository, this is just the repository name. For full repository names use the resource `full_name`.
### Using the audit config
### Configurable Checks in Prowler
Prowler has a [configuration file](../tutorials/configuration_file.md) which is used to pass certain configuration values to the checks, like the following:
See [Configurable Checks](./configurable-checks.md) for detailed information on making checks configurable using the `audit_config` object and configuration file.
```python title="ec2_securitygroup_with_many_ingress_egress_rules.py"
class ec2_securitygroup_with_many_ingress_egress_rules(Check):
def execute(self):
findings = []
## Metadata Structure for Prowler Checks
# max_security_group_rules, default: 50
max_security_group_rules = ec2_client.audit_config.get(
"max_security_group_rules", 50
)
for security_group_arn, security_group in ec2_client.security_groups.items():
```
Each Prowler check must include a metadata file named `<check_name>.metadata.json` that must be located in its directory. This file supplies crucial information for execution, reporting, and context.
```yaml title="config.yaml"
# AWS Configuration
aws:
# AWS EC2 Configuration
### Example Metadata File
# aws.ec2_securitygroup_with_many_ingress_egress_rules
# The default value is 50 rules
max_security_group_rules: 50
```
Below is a generic example of a check metadata file. **Do not include comments in actual JSON files.**
As you can see in the above code, within the service client, in this case the `ec2_client`, there is an object called `audit_config` which is a Python dictionary containing the values read from the configuration file.
In order to use it, you have to check first if the value is present in the configuration file. If the value is not present, you can create it in the `config.yaml` file and then, read it from the check.
???+ note
It is mandatory to always use the `dictionary.get(value, default)` syntax to set a default value in the case the configuration value is not present.
## Check Metadata
Each Prowler check has metadata associated which is stored at the same level of the check's folder in a file called A `check_name.metadata.json` containing the check's metadata.
???+ note
We are going to include comments in this example metadata JSON but they cannot be included because the JSON format does not allow comments.
```json
{
# Provider holds the Prowler provider which the checks belongs to
"Provider": "aws",
"CheckID": "example_check_id",
"CheckTitle": "Example Check Title",
"CheckType": ["Infrastructure Security"],
# CheckID holds check name
"CheckID": "ec2_ami_public",
# CheckTitle holds the title of the check
"CheckTitle": "Ensure there are no EC2 AMIs set as Public.",
# CheckType holds Software and Configuration Checks, check more here
# https://docs.aws.amazon.com/securityhub/latest/userguide/asff-required-attributes.html#Types
"CheckType": [
"Infrastructure Security"
],
# ServiceName holds the provider service name
"ServiceName": "ec2",
# SubServiceName holds the service's subservice or resource used by the check
"SubServiceName": "ami",
# ResourceIdTemplate holds the unique ID for the resource used by the check
"ResourceIdTemplate": "arn:partition:service:region:account-id:resource-id",
# Severity holds the check's severity, always in lowercase (critical, high, medium, low or informational)
"Severity": "critical",
# ResourceType only for AWS, holds the type from here
# https://docs.aws.amazon.com/securityhub/latest/userguide/asff-resources.html
# In case of not existing, use CloudFormation type but removing the "::" and using capital letters only at the beginning of each word. Example: "AWS::EC2::Instance" -> "AwsEc2Instance"
# CloudFormation type reference: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html
# If the resource type does not exist in the CloudFormation types, use "Other".
"ResourceType": "Other",
"Description": "Example description of the check.",
"Risk": "Example risk if the check fails.",
"RelatedUrl": "https://example.com",
# Description holds the title of the check, for now is the same as CheckTitle
"Description": "Ensure there are no EC2 AMIs set as Public.",
# Risk holds the check risk if the result is FAIL
"Risk": "When your AMIs are publicly accessible, they are available in the Community AMIs where everyone with an AWS account can use them to launch EC2 instances. Your AMIs could contain snapshots of your applications (including their data), therefore exposing your snapshots in this manner is not advised.",
# RelatedUrl holds an URL with more information about the check purpose
"RelatedUrl": "",
# Remediation holds the information to help the practitioner to fix the issue in the case of the check raise a FAIL
"Remediation": {
# Code holds different methods to remediate the FAIL finding
"Code": {
"CLI": "example CLI command",
# CLI holds the command in the provider native CLI to remediate it
"CLI": "aws ec2 modify-image-attribute --region <REGION> --image-id <EC2_AMI_ID> --launch-permission {\"Remove\":[{\"Group\":\"all\"}]}",
# NativeIaC holds the native IaC code to remediate it, use "https://docs.bridgecrew.io/docs"
"NativeIaC": "",
"Other": "",
# Other holds the other commands, scripts or code to remediate it, use "https://www.trendmicro.com/cloudoneconformity"
"Other": "https://docs.prowler.com/checks/public_8#aws-console",
# Terraform holds the Terraform code to remediate it, use "https://docs.bridgecrew.io/docs"
"Terraform": ""
},
# Recommendation holds the recommendation for this check with a description and a related URL
"Recommendation": {
"Text": "Example recommendation text.",
"Url": "https://example.com/remediation"
"Text": "We recommend your EC2 AMIs are not publicly accessible, or generally available in the Community AMIs.",
"Url": "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/cancel-sharing-an-AMI.html"
}
},
"Categories": ["example-category"],
# Categories holds the category or categories where the check can be included, if applied
"Categories": [
"internet-exposed"
],
# DependsOn is not actively used for the moment but it will hold other
# checks wich this check is dependant to
"DependsOn": [],
# RelatedTo is not actively used for the moment but it will hold other
# checks wich this check is related to
"RelatedTo": [],
# Notes holds additional information not covered in this file
"Notes": ""
}
```
### Metadata Fields and Their Purpose
### Remediation Code
- **Provider** — The Prowler provider related to the check. The name **must** be lowercase and match the provider folder name. For supported providers refer to [Prowler Hub](https://hub.prowler.com/check) or directly to [Prowler Code](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
For the Remediation Code we use the following knowledge base to fill it:
- **CheckID** — The unique identifier for the check inside the provider, this field **must** match the check's folder and python file and json metadata file name. For more information about the naming refer to the [Naming Format for Checks](#naming-format-for-checks) section.
- Official documentation for the provider
- https://docs.prowler.com/checks/checks-index
- https://www.trendmicro.com/cloudoneconformity
- https://github.com/cloudmatos/matos/tree/master/remediations
- **CheckTitle** — A concise, descriptive title for the check.
### RelatedURL and Recommendation
- **CheckType***For now this field is only standardized for the AWS provider*.
- For AWS this field must follow the [AWS Security Hub Types](https://docs.aws.amazon.com/securityhub/latest/userguide/asff-required-attributes.html#Types) format. So the common pattern to follow is `namespace/category/classifier`, refer to the attached documentation for the valid values for this fields.
The RelatedURL field must be filled with an URL from the provider's official documentation like https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/sharingamis-intro.html
- **ServiceName** — The name of the provider service being audited. This field **must** be in lowercase and match with the service folder name. For supported services refer to [Prowler Hub](https://hub.prowler.com/check) or directly to [Prowler Code](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
Also, if not present you can use the Risk and Recommendation texts from the TrendMicro [CloudConformity](https://www.trendmicro.com/cloudoneconformity) guide.
- **SubServiceName** — The subservice or resource within the service, if applicable. For more information refer to the [Naming Format for Checks](#naming-format-for-checks) section.
- **ResourceIdTemplate** — A template for the unique resource identifier. For more information refer to the [Prowler's Resource Identification](#prowlers-resource-identification) section.
### Python Model
The following is the Python model for the check's metadata model. We use the Pydantic's [BaseModel](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel) as the parent class.
- **Severity** — The severity of the finding if the check fails. Must be one of: `critical`, `high`, `medium`, `low`, or `informational`, this field **must** be in lowercase. To get more information about the severity levels refer to the [Prowler's Check Severity Levels](#prowlers-check-severity-levels) section.
As per August 5th 2023 the `Check_Metadata_Model` can be found [here](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py#L34-L56).
```python
class Check_Metadata_Model(BaseModel):
"""Check Metadata Model"""
- **ResourceType** — The type of resource being audited. *For now this field is only standardized for the AWS provider*.
- For AWS use the [Security Hub resource types](https://docs.aws.amazon.com/securityhub/latest/userguide/asff-resources.html) or, if not available, the PascalCase version of the [CloudFormation type](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html) (e.g., `AwsEc2Instance`). Use "Other" if no match exists.
- **Description** — A short description of what the check does.
- **Risk** — The risk or impact if the check fails, explaining why the finding matters.
- **RelatedUrl** — A URL to official documentation or further reading about the check's purpose. If no official documentation is available, use the risk and recommendation text from trusted third-party sources.
- **Remediation** — Guidance for fixing a failed check, including:
- **Code** — Remediation commands or code snippets for CLI, Terraform, native IaC, or other tools like the Web Console.
- **Recommendation** — A textual human readable recommendation. Here it is not necessary to include actual steps, but rather a general recommendation about what to do to fix the check.
- **Categories** — One or more categories for grouping checks in execution (e.g., `internet-exposed`). For the current list of categories, refer to the [Prowler Hub](https://hub.prowler.com/check).
- **DependsOn** — Currently not used.
- **RelatedTo** — Currently not used.
- **Notes** — Any additional information not covered by other fields.
### Remediation Code Guidelines
When providing remediation steps, reference the following sources:
- Official provider documentation.
- [Prowler Checks Remediation Index](https://docs.prowler.com/checks/checks-index)
- [TrendMicro Cloud One Conformity](https://www.trendmicro.com/cloudoneconformity)
- [CloudMatos Remediation Repository](https://github.com/cloudmatos/matos/tree/master/remediations)
### Python Model Reference
The metadata structure is enforced in code using a Pydantic model. For reference, see the [`CheckMetadata`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py).
Provider: str
CheckID: str
CheckTitle: str
CheckType: list[str]
ServiceName: str
SubServiceName: str
ResourceIdTemplate: str
Severity: str
ResourceType: str
Description: str
Risk: str
RelatedUrl: str
Remediation: Remediation
Categories: list[str]
DependsOn: list[str]
RelatedTo: list[str]
Notes: str
# We set the compliance to None to
# store the compliance later if supplied
Compliance: list = None
```
@@ -1,46 +0,0 @@
# Configurable Checks in Prowler
Prowler empowers users to extend and adapt cloud security coverage by making checks configurable through the use of the `audit_config` object. This approach enables customization of checks to meet specific requirements through a configuration file.
## Understanding the `audit_config` Object
The `audit_config` object is a dictionary attached to each provider's service client (for example, `<service_name>_client.audit_config`). This object loads configuration values from the main configuration file (`prowler/config/config.yaml`). Use `audit_config` to make checks flexible and user-configurable.
## Using `audit_config` to Configure Checks
Retrieve configuration values in a check by using the `.get()` method on the `audit_config` object. For example, to get the minimum number of Availability Zones for Lambda from the configuration file, use the following code. If the value is not set in the configuration, the check defaults to 2:
```python
LAMBDA_MIN_AZS = awslambda_client.audit_config.get("lambda_min_azs", 2)
```
Always provide a default value in `.get()` to ensure the check works even if the configuration is missing the variable.
### Example: Security Group Rule Limit
```python title="ec2_securitygroup_with_many_ingress_egress_rules.py"
class ec2_securitygroup_with_many_ingress_egress_rules(Check):
def execute(self):
findings = []
max_security_group_rules = ec2_client.audit_config.get(
"max_security_group_rules", 50
)
for security_group_arn, security_group in ec2_client.security_groups.items():
# ... check logic ...
```
## Required File Updates for Configurable Variables
When adding a new configurable check to Prowler, update the following files:
- **Configuration File:** Add the new variable under the relevant provider or service section in `prowler/config/config.yaml`.
```yaml
# aws.awslambda_function_vpc_multi_az
lambda_min_azs: 2
```
- **Test Fixtures:** If tests depend on this configuration, add the variable to `tests/config/fixtures/config.yaml`.
- **Documentation:** Document the new variable in the list of configurable checks in `docs/tutorials/configuration_file.md`.
For a complete list of checks that already support configuration, see the [Configuration File Tutorial](../tutorials/configuration_file.md).
This approach ensures that checks are easily configurable, making Prowler highly adaptable to different environments and requirements.
+7 -9
View File
@@ -1,16 +1,14 @@
# Debugging in Prowler
# Debugging
Debugging in Prowler simplifies the development process, allowing developers to efficiently inspect and resolve unexpected issues during execution.
Debugging in Prowler make things easier!
If you are developing Prowler, it's possible that you will encounter some situations where you have to inspect the code in depth to fix some unexpected issues during the execution.
## Debugging with Visual Studio Code
## VSCode
Visual Studio Code (also referred to as VSCode) provides an integrated debugger for executing and analyzing Prowler code. Refer to the official VSCode debugger [documentation](https://code.visualstudio.com/docs/editor/debugging) for detailed instructions.
In VSCode you can run the code using the integrated debugger. Please, refer to this [documentation](https://code.visualstudio.com/docs/editor/debugging) for guidance about the debugger in VSCode.
The following file is an example of the [debugging configuration](https://code.visualstudio.com/docs/editor/debugging#_launch-configurations) file that you can add to [Virtual Studio Code](https://code.visualstudio.com/).
### Debugging Configuration Example
The following file is an example of a [debugging configuration](https://code.visualstudio.com/docs/editor/debugging#_launch-configurations) file for [Virtual Studio Code](https://code.visualstudio.com/).
This file must be placed inside the *.vscode* directory and named *launch.json*:
This file should inside the *.vscode* folder and its name has to be *launch.json*:
```json
{
+6 -26
View File
@@ -1,28 +1,8 @@
## Contributing to Documentation
## Contribute with documentation
Prowler documentation is built using `mkdocs`, allowing contributors to easily add or enhance documentation.
We use `mkdocs` to build this Prowler documentation site so you can easily contribute back with new docs or improving them. To install all necessary dependencies use `poetry install --with docs`.
### Installation and Setup
Install all necessary dependencies using: `poetry install --with docs`.
1. Install `mkdocs` using your preferred package manager.
2. Running the Documentation Locally
Navigate to the `prowler` repository folder.
Start the local documentation server by running: `mkdocs serve`.
Open `http://localhost:8000` in your browser to view live updates.
3. Making Documentation Changes
Make all needed changes to docs or add new documents. Edit existing Markdown (.md) files inside `prowler/docs`.
To add new sections or files, update the `mkdocs.yaml` file located in the root directory of Prowlers repository.
4. Submitting Changes
Once documentation updates are complete:
Submit a pull request for review.
The Prowler team will assess and merge contributions.
Your efforts help improve Prowler documentation—thank you for contributing!
1. Install `mkdocs` with your favorite package manager.
2. Inside the `prowler` repository folder run `mkdocs serve` and point your browser to `http://localhost:8000` and you will see live changes to your local copy of this documentation site.
3. Make all needed changes to docs or add new documents. To do so just edit existing md files inside `prowler/docs` and if you are adding a new section or file please make sure you add it to `mkdocs.yaml` file in the root folder of the Prowler repo.
4. Once you are done with changes, please send a pull request to us for review and merge. Thank you in advance!
-133
View File
@@ -1,133 +0,0 @@
# Google Cloud Provider
This page details the [Google Cloud Platform (GCP)](https://cloud.google.com/) provider implementation in Prowler.
By default, Prowler will audit all the GCP projects that the authenticated identity can access. To configure it, follow the [getting started](../index.md#google-cloud) page.
## GCP Provider Classes Architecture
The GCP provider implementation follows the general [Provider structure](./provider.md). This section focuses on the GCP-specific implementation, highlighting how the generic provider concepts are realized for GCP in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### Main Class
- **Location:** [`prowler/providers/gcp/gcp_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/gcp_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for GCP-specific logic, session management, credential validation, project and organization discovery, and configuration.
- **Key GCP Responsibilities:**
- Initializes and manages GCP sessions (supports Application Default Credentials, Service Account, OAuth, and impersonation).
- Validates credentials and sets up the GCP identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Discovers accessible GCP projects and organization metadata.
- Provides properties and methods for downstream GCP service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/gcp/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/models.py)
- **Purpose:** Define structured data for GCP identity, project, and organization info.
- **Key GCP Models:**
- `GCPIdentityInfo`: Holds GCP identity metadata, such as the profile name.
- `GCPOrganization`: Represents a GCP organization with ID, name, and display name.
- `GCPProject`: Represents a GCP project with number, ID, name, organization, labels, and lifecycle state.
### `GCPService` (Service Base Class)
- **Location:** [`prowler/providers/gcp/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/lib/service/service.py)
- **Purpose:** Abstract base class that all GCP service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for GCP.
- **Key GCP Responsibilities:**
- Receives a `GcpProvider` instance to access session, identity, and configuration.
- Manages clients for all services by project.
- Filters projects to only those with the relevant API enabled.
- Provides `__threading_call__` method to make API calls in parallel by project or resource.
- Exposes common audit context (`project_ids`, `projects`, `default_project_id`, `audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/gcp/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for GCP-specific error handling, such as credential, session, and project access errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/gcp/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/lib/)
- **Purpose:** Helpers for argument parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in GCP Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/gcp/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/gcp/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other services already implemented as reference. In next subsection you can find a list of common patterns that are used accross all GCP services.
### GCP Service Common Patterns
- Services communicate with GCP using the Google Cloud Python SDK, you can find the documentation with all the services [here](https://cloud.google.com/python/docs/reference).
- Every GCP service class inherits from `GCPService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service name, provider, region (default "global"), and API version (default "v1"). Usually, the service name is the class name in lowercase, so it is called like `super().__init__(__class__.__name__, provider)`.
- Resource containers **must** be initialized in the constructor, typically as dictionaries keyed by resource ID and the value is the resource object.
- Only projects with the API enabled are included in the audit scope.
- Resource discovery and attribute collection can be parallelized using `self.__threading_call__`, typically by region/zone or resource.
- All GCP resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- Each GCP API calls are wrapped in try/except blocks, always logging errors.
- Tags and additional attributes that cannot be retrieved from the default call should be collected and stored for each resource using dedicated methods and threading.
## Specific Patterns in GCP Checks
The GCP checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/gcp/services/iam/iam_sa_user_managed_key_unused/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/gcp/services/iam/iam_sa_user_managed_key_unused))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is following the [GCP check implementation documentation](./checks.md#creating-a-check) and taking other similar checks as reference.
### Check Report Class
The `Check_Report_GCP` class models a single finding for a GCP resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_GCP` extends the base report structure with GCP-specific fields, enabling detailed tracking of the resource, project, and location associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_GCP`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its GCP-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses the explicit `resource_id` argument if provided.
- Otherwise, uses `resource.id` if present.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if none are available.
- **`resource_name`**:
- Uses the explicit `resource_name` argument if provided.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string.
- **`project_id`**:
- Uses the explicit `project_id` argument if provided.
- Otherwise, uses `resource.project_id` if present.
- Defaults to an empty string.
- **`location`**:
- Uses the explicit `location` argument if provided.
- Otherwise, uses `resource.location` if present.
- Otherwise, uses `resource.region` if present.
- Defaults to "global" if none are available.
All these attributes can be overridden by passing the corresponding argument to the constructor. If the resource object does not contain the required attributes, you must set them manually.
Others attributes are inherited from the `Check_Report` class, from that ones you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_GCP(
metadata=check_metadata,
resource=resource_object,
resource_id="custom-id", # Optional override
resource_name="custom-name", # Optional override
project_id="my-gcp-project", # Optional override
location="us-central1" # Optional override
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
-116
View File
@@ -1,116 +0,0 @@
# GitHub Provider
This page details the [GitHub](https://github.com/) provider implementation in Prowler.
By default, Prowler will audit the GitHub account - scanning all repositories, organizations, and applications that your configured credentials can access. To configure it, follow the [getting started](../index.md#github) page.
## GitHub Provider Classes Architecture
The GitHub provider implementation follows the general [Provider structure](./provider.md). This section focuses on the GitHub-specific implementation, highlighting how the generic provider concepts are realized for GitHub in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### `GithubProvider` (Main Class)
- **Location:** [`prowler/providers/github/github_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/github_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for GitHub-specific logic, session management, credential validation, and configuration.
- **Key GitHub Responsibilities:**
- Initializes and manages GitHub sessions (supports Personal Access Token, OAuth App, and GitHub App authentication).
- Validates credentials and sets up the GitHub identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Provides properties and methods for downstream GitHub service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/github/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/models.py)
- **Purpose:** Define structured data for GitHub identity, session, and output options.
- **Key GitHub Models:**
- `GithubSession`: Holds authentication tokens and keys for the session.
- `GithubIdentityInfo`, `GithubAppIdentityInfo`: Store account or app identity metadata.
### `GithubService` (Service Base Class)
- **Location:** [`prowler/providers/github/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/lib/service/service.py)
- **Purpose:** Abstract base class for all GitHub service-specific classes.
- **Key GitHub Responsibilities:**
- Receives a `GithubProvider` instance to access session, identity, and configuration.
- Manages GitHub API clients for the authenticated user or app.
- Exposes common audit context (`audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/github/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for GitHub-specific error handling, such as credential and session errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/github/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/lib/)
- **Purpose:** Helpers for argument parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in GitHub Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/github/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/github/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and by taking other already implemented services as reference.
### GitHub Service Common Patterns
- Services communicate with GitHub using the PyGithub Python SDK. See the [official documentation](https://pygithub.readthedocs.io/).
- Every GitHub service class inherits from `GithubService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service name and provider (e.g. `super().__init__(__class__.__name__, provider))`). Ensure that the service name in PyGithub is the same that you use in the constructor. Usually is used the `__class__.__name__` to get the service name because it is the same as the class name.
- Resource containers **must** be initialized in the constructor, typically as dictionaries keyed by resource ID or name.
- All GitHub resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- GitHub API calls are wrapped in try/except blocks, always logging errors.
## Specific Patterns in GitHub Checks
The GitHub checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks in:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/github/services/repository/repository_secret_scanning_enabled/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/github/services/repository/repository_secret_scanning_enabled))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is the [GitHub check implementation documentation](./checks.md#creating-a-check) and by taking other checks as reference.
### Check Report Class
The `CheckReportGithub` class models a single finding for a GitHub resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`CheckReportGithub` extends the base report structure with GitHub-specific fields, enabling detailed tracking of the resource, name, and owner associated with each finding.
#### Constructor and Attribute Population
When you instantiate `CheckReportGithub`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its GitHub-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses the explicit `resource_id` argument if provided.
- Otherwise, uses `resource.id` if present.
- Defaults to an empty string if not available.
- **`resource_name`**:
- Uses the explicit `resource_name` argument if provided.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if not available.
- **`owner`**:
- Uses the explicit `owner` argument if provided.
- Otherwise, uses `resource.owner` for repositories and `resource.name` for organizations.
- Defaults to an empty string if not available.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = CheckReportGithub(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
+1 -1
View File
@@ -1,3 +1,3 @@
# Integration Tests
Coming soon ...
Coming soon ...
+43 -83
View File
@@ -2,89 +2,66 @@
## Introduction
Integrating Prowler with external tools enhances its functionality and enables seamless workflow automation. Prowler supports a variety of integrations to optimize security assessments and reporting.
Integrating Prowler with external tools enhances its functionality and seamlessly embeds it into your workflows. Prowler supports a wide range of integrations to streamline security assessments and reporting. Common integration targets include messaging platforms like Slack, project management tools like Jira, and cloud services such as AWS Security Hub.
### Supported Integration Targets
- Messaging Platforms Example: Slack
- Project Management Tools Example: Jira
- Cloud Services Example: AWS Security Hub
### Integration Guidelines
To integrate Prowler with a specific product:
Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) to understand its architecture and integration mechanisms.
* Identify the most suitable integration method for the intended platform.
* Consult the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) to understand how Prowler works and the way that you can integrate it with the desired product!
* Identify the best approach for the specific platform youre targeting.
## Steps to Create an Integration
### Defining the Integration Purpose
### Identify the Integration Purpose
* Before implementing an integration, clearly define its objective. Common purposes include:
* Clearly define the objective of the integration. For example:
* Sending Prowler findings to a platform for alerts, tracking, or further analysis.
* Review existing integrations in the [`prowler/lib/outputs`](https://github.com/prowler-cloud/prowler/tree/master/prowler/lib/outputs) folder for inspiration and implementation examples.
* Sending Prowler findings to a platform for alerting, tracking, or further analysis.
* For inspiration and implementation examples, please review the existing integrations in the [`prowler/lib/outputs`](https://github.com/prowler-cloud/prowler/tree/master/prowler/lib/outputs) folder.
### Developing the Integration
### Develop the Integration
* Script Development:
* Write a script to process Prowlers output and interact with the target platforms API.
* If the goal is to send findings, parse Prowlers results and use the platforms API to create entries or notifications.
* For example, to send findings, parse Prowlers results and use the platforms API to create entries or notifications.
* Configuration:
* Ensure the script supports environment-specific settings, such as:
- API endpoints
- Authentication tokens
- Any necessary configurable parameters.
* Ensure your script includes configurable options for environment-specific settings, such as API endpoints and authentication tokens.
### Fundamental Structure
* Integration Class:
* To implement an integration, create a class that encapsulates the required attributes and methods for interacting with the target platform. Example: Jira Integration
* Create a class that encapsulates attributes and methods for the integration.
Here is an example with Jira integration:
```python title="Jira Class"
class Jira:
"""
Jira class to interact with the Jira API
[Note]
This integration is limited to a single Jira Cloud instance, meaning all issues will be created under the same Jira Cloud ID. Future improvements will include the ability to specify a Jira Cloud ID for users associated with multiple accounts.
This integration is limited to a single Jira Cloud, therefore all the issues will be created for same Jira Cloud ID. We will need to work on the ability of providing a Jira Cloud ID if the user is present in more than one.
Attributes
- _redirect_uri: The redirect URI used
- _client_id: The client identifier
Attributes:
- _redirect_uri: The redirect URI
- _client_id: The client ID
- _client_secret: The client secret
- _access_token: The access token
- _refresh_token: The refresh token
- _expiration_date: The authentication expiration
- _cloud_id: The cloud identifier
- _cloud_id: The cloud ID
- _scopes: The scopes needed to authenticate, read:jira-user read:jira-work write:jira-work
- AUTH_URL: The URL to authenticate with Jira
- PARAMS_TEMPLATE: The template for the parameters to authenticate with Jira
- TOKEN_URL: The URL to get the access token from Jira
- API_TOKEN_URL: The URL to get the accessible resources from Jira
Methods
__init__: Initializes the Jira object
- input_authorization_code: Inputs the authorization code
- auth_code_url: Generates the URL to authorize the application
- get_auth: Gets the access token and refreshes it
- get_cloud_id: Gets the cloud identifier from Jira
- get_access_token: Gets the access token
- refresh_access_token: Refreshes the access token from Jira
- test_connection: Tests the connection to Jira and returns a Connection object
- get_projects: Gets the projects from Jira
- get_available_issue_types: Gets the available issue types for a project
- send_findings: Sends the findings to Jira and creates an issue
Methods:
- __init__: Initialize the Jira object
- input_authorization_code: Input the authorization code
- auth_code_url: Generate the URL to authorize the application
- get_auth: Get the access token and refresh token
- get_cloud_id: Get the cloud ID from Jira
- get_access_token: Get the access token
- refresh_access_token: Refresh the access token from Jira
- test_connection: Test the connection to Jira and return a Connection object
- get_projects: Get the projects from Jira
- get_available_issue_types: Get the available issue types for a project
- send_findings: Send the findings to Jira and create an issue
Raises:
- JiraGetAuthResponseError: Failed to get the access token and refresh token
@@ -151,17 +128,9 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
# More properties and methods
```
* Test Connection Method:
* Validating Credentials or Tokens
To ensure a successful connection to the target platform, implement a method that validates authentication credentials or tokens.
#### Method Implementation
The following example demonstrates the `test_connection` method for the `Jira` class:
* Implement a method to validate credentials or tokens, ensuring the connection to the target platform is successful.
The following is the code for the `test_connection` method for the `Jira` class:
```python title="Test connection"
@staticmethod
def test_connection(
@@ -173,8 +142,8 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
"""Test the connection to Jira
Args:
- redirect_uri: The redirect URI used
- client_id: The client identifier
- redirect_uri: The redirect URI
- client_id: The client ID
- client_secret: The client secret
- raise_on_exception: Whether to raise an exception or not
@@ -246,15 +215,9 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
)
return Connection(is_connected=False, error=error)
```
* Send Findings Method:
* Add a method to send Prowler findings to the target platform, adhering to its API specifications.
#### Method Implementation
The following example demonstrates the `send_findings` method for the `Jira` class:
The following is the code for the `send_findings` method for the `Jira` class:
```python title="Send findings method"
def send_findings(
self,
@@ -358,19 +321,16 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
)
```
### Testing the Integration
### Testing
* Conduct integration testing in a controlled environment to validate expected behavior. Ensure the following:
* Transmission Accuracy Verify that Prowler findings are correctly sent and processed by the target platform.
* Error Handling Simulate edge cases to assess robustness and failure recovery mechanisms.
* Test the integration in a controlled environment to confirm it behaves as expected.
* Verify that Prowlers findings are accurately transmitted and correctly processed by the target platform.
* Simulate edge cases to ensure robust error handling.
### Documentation
* Ensure the following elements are included:
* Setup Instructions List all necessary dependencies and installation steps.
* Configuration Details Specify required environment variables, authentication steps, etc.
* Example Use Cases Provide practical scenarios demonstrating functionality.
* Troubleshooting Guide Document common issues and resolution steps.
* Comprehensive and clear documentation improves maintainability and simplifies onboarding.
* Provide clear, detailed documentation for your integration:
* Setup instructions, including any required dependencies.
* Configuration details, such as environment variables or authentication steps.
* Example use cases and troubleshooting tips.
* Good documentation ensures maintainability and simplifies onboarding for team members.
+36 -137
View File
@@ -1,176 +1,75 @@
# Introduction to developing in Prowler
# Developer Guide
Extending Prowler
You can extend Prowler Open Source in many different ways, in most cases you will want to create your own checks and compliance security frameworks, here is where you can learn about how to get started with it. We also include how to create custom outputs, integrations and more.
Prowler can be extended in various ways, with common use cases including:
## Get the code and install all dependencies
- New security checks
- New compliance frameworks
- New output formats
- New integrations
- New proposed features
First of all, you need a version of Python 3.9 or higher and also `pip` installed to be able to install all dependencies required.
All the relevant information for these cases is included in this guide.
Then, to start working with the Prowler Github repository you need to fork it to be able to propose changes for new features, bug fixing, etc. To fork the Prowler repo please refer to [this guide](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo?tool=webui#forking-a-repository).
## Getting the Code and Installing All Dependencies
### Prerequisites
Before proceeding, ensure the following:
- Git is installed.
- Python 3.9 or higher is installed.
- `poetry` is installed to manage dependencies.
### Forking the Prowler Repository
To contribute to Prowler, fork the Prowler GitHub repository. This allows you to propose changes, submit new features, and fix bugs. For guidance on forking, refer to the [official GitHub documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo?tool=webui#forking-a-repository).
### Cloning Your Forked Repository
Once your fork is created, clone it using the following commands:
Once that is satisfied go ahead and clone your forked repo:
```
git clone https://github.com/<your-github-user>/prowler
cd prowler
```
For isolation and to avoid conflicts with other environments, we recommend using `poetry`, a Python dependency management tool. You can install it by following the instructions [here](https://python-poetry.org/docs/#installation).
### Dependency Management and Environment Isolation
To prevent conflicts between environments, we recommend using `poetry`, a Python dependency management solution. Install it by following the [instructions](https://python-poetry.org/docs/#installation).
### Installing Dependencies
To install all required dependencies, including those needed for development, run:
Then install all dependencies including the ones for developers:
```
poetry install --with dev
eval $(poetry env activate)
eval $(poetry env activate) \
```
> [!IMPORTANT]
> Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
>
> If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
> In case you have any doubts, consult the Poetry environment activation guide: https://python-poetry.org/docs/managing-environments/#activating-the-environment
???+ important
Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
In case you have any doubts, consult the [Poetry environment activation guide](https://python-poetry.org/docs/managing-environments/#activating-the-environment).
## Contributing to Prowler
### Ways to Contribute
Here are some ideas for collaborating with Prowler:
1. **Review Current Issues**: Check out our [GitHub Issues](https://github.com/prowler-cloud/prowler/issues) page. We often tag issues as `good first issue` - these are perfect for new contributors as they are typically well-defined and manageable in scope.
2. **Expand Prowler's Capabilities**: Prowler is constantly evolving, and you can be a part of its growth. Whether you are adding checks, supporting new services, or introducing integrations, your contributions help improve the tool for everyone. Here is how you can get involved:
- **Adding New Checks**
Want to improve Prowler's detection capabilities for your favorite cloud provider? You can contribute by writing new checks. To get started, follow the [create a new check guide](./checks.md).
- **Adding New Services**
One key service for your favorite cloud provider is missing? Add it to Prowler! To add a new service, check out the [create a new service guide](./services.md). Do not forget to include relevant checks to validate functionality.
- **Adding New Providers**
If you would like to extend Prowler to work with a new cloud provider, follow the [create a new provider guide](./provider.md). This typically involves setting up new services and checks to ensure compatibility.
- **Adding New Output Formats**
Want to tailor how results are displayed or exported? You can add custom output formats by following the [create a new output format guide](./outputs.md).
- **Adding New Integrations**
Prowler can work with other tools and platforms through integrations. If you would like to add one, see the [create a new integration guide](./integrations.md).
- **Proposing or Implementing Features**
Got an idea to make Prowler better? Whether it is a brand-new feature or an enhancement to an existing one, you are welcome to propose it or help implement community-requested improvements.
3. **Improve Documentation**: Help make Prowler more accessible by enhancing our documentation, fixing typos, or adding examples/tutorials. See the tutorial of how we write our documentation [here](./documentation.md).
4. **Bug Fixes**: If you find any issues or bugs, you can report them in the [GitHub Issues](https://github.com/prowler-cloud/prowler/issues) page and if you want you can also fix them.
Remember, our community is here to help! If you need guidance, do not hesitate to ask questions in the issues or join our [Slack workspace](https://goto.prowler.com/slack).
### Pre-Commit Hooks
This repository uses Git pre-commit hooks managed by the [pre-commit](https://pre-commit.com/) tool, it is installed with `poetry install --with dev`. Next, run the following command in the root of this repository:
## Contributing with your code or fixes to Prowler
This repo has git pre-commit hooks managed via the [pre-commit](https://pre-commit.com/) tool. [Install](https://pre-commit.com/#install) it how ever you like, then in the root of this repo run:
```shell
pre-commit install
```
Successful installation should produce the following output:
You should get an output like the following:
```shell
pre-commit installed at .git/hooks/pre-commit
```
### Code Quality and Security Checks
Before merging pull requests, several automated checks and utilities ensure code security and updated dependencies:
Before we merge any of your pull requests we pass checks to the code, we use the following tools and automation to make sure the code is secure and dependencies up-to-dated:
???+ note
These should have been already installed if `poetry install --with dev` was already run.
These should have been already installed if you ran `poetry install --with dev`
- [`bandit`](https://pypi.org/project/bandit/) for code security review.
- [`safety`](https://pypi.org/project/safety/) and [`dependabot`](https://github.com/features/security) for dependencies.
- [`hadolint`](https://github.com/hadolint/hadolint) and [`dockle`](https://github.com/goodwithtech/dockle) for container security.
- [`Snyk`](https://docs.snyk.io/integrations/snyk-container-integrations/container-security-with-docker-hub-integration) for container security in Docker Hub.
- [`clair`](https://github.com/quay/clair) for container security in Amazon ECR.
- [`vulture`](https://pypi.org/project/vulture/), [`flake8`](https://pypi.org/project/flake8/), [`black`](https://pypi.org/project/black/), and [`pylint`](https://pypi.org/project/pylint/) for formatting and best practices.
- [`hadolint`](https://github.com/hadolint/hadolint) and [`dockle`](https://github.com/goodwithtech/dockle) for our containers security.
- [`Snyk`](https://docs.snyk.io/integrations/snyk-container-integrations/container-security-with-docker-hub-integration) in Docker Hub.
- [`clair`](https://github.com/quay/clair) in Amazon ECR.
- [`vulture`](https://pypi.org/project/vulture/), [`flake8`](https://pypi.org/project/flake8/), [`black`](https://pypi.org/project/black/) and [`pylint`](https://pypi.org/project/pylint/) for formatting and best practices.
Additionally, ensure the latest version of [`TruffleHog`](https://github.com/trufflesecurity/trufflehog) is installed to scan for sensitive data in the code. Follow the official [installation guide](https://github.com/trufflesecurity/trufflehog?tab=readme-ov-file#floppy_disk-installation) for setup.
You can see all dependencies in file `pyproject.toml`.
### Dependency Management
Moreover, you would need to install [`TruffleHog`](https://github.com/trufflesecurity/trufflehog) on the latest version to check for secrets in the code. You can install it using the official installation guide [here](https://github.com/trufflesecurity/trufflehog?tab=readme-ov-file#floppy_disk-installation).
All dependencies are listed in the `pyproject.toml` file.
For proper code documentation, refer to the following and follow the code documentation practices presented there: [Google Python Style Guide - Comments and Docstrings](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings).
Additionally, please ensure to follow the code documentation practices outlined in this guide: [Google Python Style Guide - Comments and Docstrings](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings).
???+ note
If you encounter issues when committing to the Prowler repository, use the `--no-verify` flag with the `git commit` command.
### Repository Folder Structure
Understanding the layout of the Prowler codebase will help you quickly find where to add new features, checks, or integrations. The following is a high-level overview from the root of the repository:
```
prowler/
├── prowler/ # Main source code for Prowler SDK (CLI, providers, services, checks, compliances, config, etc.)
├── api/ # API server and related code
├── dashboard/ # Local Dashboard extracted from the CLI output
├── ui/ # Web UI components
├── util/ # Utility scripts and helpers
├── tests/ # Prowler SDK test suite
├── docs/ # Documentation, including this guide
├── examples/ # Example output formats for providers and scripts
├── permissions/ # Permission-related files and policies
├── contrib/ # Community-contributed scripts or modules
├── kubernetes/ # Kubernetes deployment files
├── .github/ # GitHub related files (workflows, issue templates, etc.)
├── pyproject.toml # Python project configuration (Poetry)
├── poetry.lock # Poetry lock file
├── README.md # Project overview and getting started
├── Makefile # Common development commands
├── Dockerfile # SDK Docker container
├── docker-compose.yml # Prowler App Docker compose
└── ... # Other supporting files
```
If you have any trouble when committing to the Prowler repository, add the `--no-verify` flag to the `git commit` command.
## Pull Request Checklist
When creating or reviewing a pull request in https://github.com/prowler-cloud/prowler, follow [this checklist](https://github.com/prowler-cloud/prowler/blob/master/.github/pull_request_template.md#checklist).
If you create or review a PR in https://github.com/prowler-cloud/prowler please follow this checklist:
## Contribution Appreciation
- [ ] Make sure you've read the Prowler Developer Guide at https://docs.prowler.cloud/en/latest/developer-guide/introduction/
- [ ] Are we following the style guide, hence installed all the linters and formatters? Please check https://docs.prowler.cloud/en/latest/developer-guide/introduction/#contributing-with-your-code-or-fixes-to-prowler
- [ ] Are we increasing/decreasing the test coverage? Please, review if we need to include/modify tests for the new code.
- [ ] Are we modifying outputs? Please review it carefully.
- [ ] Do we need to modify the Prowler documentation to reflect the changes introduced?
- [ ] Are we introducing possible breaking changes? Are we modifying a core feature?
If you enjoy swag, wed love to thank you for your contribution with laptop stickers or other Prowler merchandise!
To request swag: Share your pull request details in our [Slack workspace](https://goto.prowler.com/slack).
## Want some swag as appreciation for your contribution?
You can also reach out to Toni de la Fuente on [Twitter](https://twitter.com/ToniBlyx)his DMs are open!
# Testing a Pull Request from a Specific Branch
To test Prowler from a specific branch (for example, to try out changes from a pull request before it is merged), you can use `pipx` to install directly from GitHub:
```sh
pipx install "git+https://github.com/prowler-cloud/prowler.git@branch-name"
```
Replace `branch-name` with the name of the branch you want to test. This will install Prowler in an isolated environment, allowing you to try out the changes safely.
If you are like us and you love swag, we are happy to thank you for your contribution with some laptop stickers or whatever other swag we may have at that time. Please, tell us more details and your pull request link in our [Slack workspace here](https://goto.prowler.com/slack). You can also reach out to Toni de la Fuente on Twitter [here](https://twitter.com/ToniBlyx), his DMs are open.
-117
View File
@@ -1,117 +0,0 @@
# Kubernetes Provider
This page details the [Kubernetes](https://kubernetes.io/) provider implementation in Prowler.
By default, Prowler will audit all namespaces in the Kubernetes cluster accessible by the configured context. To configure it, follow the [getting started](../index.md#kubernetes) page.
## Kubernetes Provider Classes Architecture
The Kubernetes provider implementation follows the general [Provider structure](./provider.md). This section focuses on the Kubernetes-specific implementation, highlighting how the generic provider concepts are realized for Kubernetes in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### `KubernetesProvider` (Main Class)
- **Location:** [`prowler/providers/kubernetes/kubernetes_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/kubernetes_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for Kubernetes-specific logic, session management, context and namespace discovery, credential validation, and configuration.
- **Key Kubernetes Responsibilities:**
- Initializes and manages Kubernetes sessions (supports kubeconfig file or content, context selection, and namespace scoping).
- Validates credentials and sets up the Kubernetes identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Discovers accessible namespaces and cluster metadata.
- Provides properties and methods for downstream Kubernetes service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/kubernetes/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/models.py)
- **Purpose:** Define structured data for Kubernetes identity and session info.
- **Key Kubernetes Models:**
- `KubernetesIdentityInfo`: Holds Kubernetes identity metadata, such as context, cluster, and user.
- `KubernetesSession`: Stores the Kubernetes API client and context information.
### `KubernetesService` (Service Base Class)
- **Location:** [`prowler/providers/kubernetes/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/lib/service/service.py)
- **Purpose:** Abstract base class that all Kubernetes service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for Kubernetes.
- **Key Kubernetes Responsibilities:**
- Receives a `KubernetesProvider` instance to access session, identity, and configuration.
- Manages the Kubernetes API client and context.
- Provides a `__threading_call__` method to make API calls in parallel by resource.
- Exposes common audit context (`context`, `api_client`, `audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/kubernetes/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for Kubernetes-specific error handling, such as session, API, and configuration errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/kubernetes/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/lib/)
- **Purpose:** Helpers for argument parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in Kubernetes Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/kubernetes/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/kubernetes/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other already implemented services as reference.
### Kubernetes Service Common Patterns
- Services communicate with Kubernetes using the Kubernetes Python SDK. See the [official documentation](https://github.com/kubernetes-client/python/blob/master/kubernetes/README.md/).
- Every Kubernetes service class inherits from `KubernetesService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the provider object, and initializes resource containers (typically as dictionaries keyed by resource UID or name).
- Resource discovery and attribute collection can be parallelized using `self.__threading_call__`.
- All Kubernetes resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- Kubernetes API calls are wrapped in try/except blocks, always logging errors.
- Additional attributes that cannot be retrieved from the default call should be collected and stored for each resource using dedicated methods and threading.
## Specific Patterns in Kubernetes Checks
The Kubernetes checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks in:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/kubernetes/services/rbac/rbac_minimize_wildcard_use_roles/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/kubernetes/services/rbac/rbac_minimize_wildcard_use_roles))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is following the [Kubernetes check implementation documentation](./checks.md#creating-a-check) and taking other checks as reference.
### Check Report Class
The `Check_Report_Kubernetes` class models a single finding for a Kubernetes resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_Kubernetes` extends the base report structure with Kubernetes-specific fields, enabling detailed tracking of the resource, name, and namespace associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_Kubernetes`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its Kubernetes-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses `resource.uid` if present.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if none are available.
- **`resource_name`**:
- Uses `resource.name` if present.
- Defaults to an empty string if not available.
- **`namespace`**:
- Uses `resource.namespace` if present.
- Defaults to "cluster-wide" for cluster-scoped resources.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_Kubernetes(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
-131
View File
@@ -1,131 +0,0 @@
# Microsoft 365 (M365) Provider
This page details the [Microsoft 365 (M365)](https://www.microsoft.com/en-us/microsoft-365) provider implementation in Prowler.
By default, Prowler will audit the Microsoft Entra ID tenant and its supported services. To configure it, follow the [getting started](../index.md#microsoft-365) page.
---
## PowerShell Requirements for M365 Checks
> **Most Microsoft 365 checks in Prowler require PowerShell, not just the Microsoft Graph API.**
- **PowerShell is essential** for retrieving data from Exchange Online, Teams, Defender, Purview, and other M365 services. Many checks cannot be performed using only the Graph API.
- **PowerShell 7.4 or higher is required** (7.5 recommended). PowerShell 5.1 and earlier versions are not supported for M365 checks.
- **Required modules:**
- [ExchangeOnlineManagement](https://www.powershellgallery.com/packages/ExchangeOnlineManagement/3.6.0) (≥ 3.6.0)
- [MicrosoftTeams](https://www.powershellgallery.com/packages/MicrosoftTeams/6.6.0) (≥ 6.6.0)
- If you use Prowler Cloud or the official containers, PowerShell is pre-installed. For local or pip installations, you must install PowerShell and the modules yourself. See [Requirements: Supported PowerShell Versions](../getting-started/requirements.md#supported-powershell-versions) and [Needed PowerShell Modules](../getting-started/requirements.md#needed-powershell-modules).
- For more details and troubleshooting, see [Use of PowerShell in M365](../tutorials/microsoft365/use-of-powershell.md).
---
## M365 Provider Classes Architecture
The M365 provider implementation follows the general [Provider structure](./provider.md). This section focuses on the M365-specific implementation, highlighting how the generic provider concepts are realized for M365 in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### `M365Provider` (Main Class)
- **Location:** [`prowler/providers/m365/m365_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/m365_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for M365-specific logic, session management, credential validation, region/authority configuration, and identity context.
- **Key M365 Responsibilities:**
- Initializes and manages M365 sessions (supports Service Principal, environment variables, Azure CLI, browser, and user/password authentication).
- Validates credentials and sets up the M365 identity context.
- Manages the Microsoft Graph API client and the PowerShell client.
- Loads and manages configuration, mutelist, and fixer settings.
- Provides properties and methods for downstream M365 service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/m365/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/models.py)
- **Purpose:** Define structured data for M365 identity, session, region configuration, and credentials.
- **Key M365 Models:**
- `M365IdentityInfo`: Holds M365 identity metadata, including tenant ID, domain(s), user, and location.
- `M365RegionConfig`: Stores the specific region/authority and API base URL for the tenant.
- `M365Credentials`: Represents credentials for authentication (user, password, client ID, client secret, tenant ID, etc.).
### `M365Service` (Service Base Class)
- **Location:** [`prowler/providers/m365/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/lib/service/service.py)
- **Purpose:** Abstract base class for all M365 service-specific classes.
- **Key M365 Responsibilities:**
- Receives an `M365Provider` instance to access session, identity, and configuration.
- Manages the Microsoft Graph API client for the service.
- Initializes a PowerShell client for most services if credentials and identity are available.
- Exposes common audit context (`audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/m365/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for M365-specific error handling, such as credential, session, region, and argument errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/m365/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/lib/)
- **Purpose:** Helpers for argument parsing, region/authority setup, mutelist management, PowerShell integration, and other cross-cutting concerns.
> **Key File: [`m365_powershell.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/lib/powershell/m365_powershell.py)**
>
> This is the core module for Microsoft 365 PowerShell integration. It manages authentication, session handling, and provides a comprehensive set of methods for interacting with Microsoft Teams, Exchange Online, and Defender policies via PowerShell.
>
> This module provides secure credential management and authentication using MSAL and PowerShell. It handles automated installation and initialization of required PowerShell modules. The module offers a rich set of methods for retrieving and managing Teams, Exchange, and Defender configurations. It serves as the central component for all M365 provider operations that require PowerShell automation.
## Specific Patterns in M365 Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/m365/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/m365/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is by following the [service implementation documentation](./services.md#adding-a-new-service) and by taking other already implemented services as reference.
### M365 Service Common Patterns
- Services communicate with Microsoft 365 using the Microsoft Graph API **and/or PowerShell**. See the [official documentation](https://learn.microsoft.com/en-us/graph/api/overview) and [PowerShell reference](https://learn.microsoft.com/en-us/powershell/).
- Every M365 service class inherits from `M365Service`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the provider object, and initializes the Graph client and the PowerShell client.
- Resource containers **must** be initialized in the constructor, typically as objects that represent the different settings of the service.
- All M365 resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- Microsoft Graph API and PowerShell calls are wrapped in try/except blocks, always logging errors.
- To retrieve some data in the services, it is so common that you have to create a new method also in the `m365_powershell.py` file to later be called in the service.
## Specific Patterns in M365 Checks
The M365 checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks in:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/m365/services/entra/entra_users_mfa_enabled/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/m365/services/entra/entra_users_mfa_enabled))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is following the [M365 check implementation documentation](./checks.md#creating-a-check) and by taking other checks as reference.
### Check Report Class
The `CheckReportM365` class models a single finding for a Microsoft 365 resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`CheckReportM365` extends the base report structure with M365-specific fields, enabling detailed tracking of the resource, name, and location associated with each finding.
#### Constructor and Attribute Population
When you instantiate `CheckReportM365`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its M365-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**: A required field that **must** be explicitly set in the constructor to identify the resource being checked.
- **`resource_name`**: A required field that **must** be explicitly set in the constructor to provide a human-readable name for the resource.
- **`location`**: A required field that can be explicitly set in the constructor to indicate where the resource is located. If not specified, defaults to "global".
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = CheckReportM365(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
+122 -162
View File
@@ -2,38 +2,21 @@
## Introduction
Prowler supports multiple output formats, allowing users to tailor findings presentation to their needs. Custom output formats are valuable when integrating Prowler with third-party tools, generating specialized reports, or adapting data for specific workflows. By defining a custom output format, users can refine how findings are structured, extracting and displaying only the most relevant information.
Prowler can generate outputs in multiple formats, allowing users to customize the way findings are presented. This is particularly useful when integrating Prowler with third-party tools, creating specialized reports, or simply tailoring the data to meet specific requirements. A custom output format gives you the flexibility to extract and display only the most relevant information in the way you need it.
- Output Organization in Prowler
Prowler outputs are managed within the `/lib/outputs` directory. Each format—such as JSON, CSV, HTML—is implemented as a Python class.
- Outputs are generated based on scan findings, which are stored as structured dictionaries containing details such as:
- Resource IDs
- Severities
- Descriptions
- Other relevant metadata
- Creation Guidelines
Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) for insights into Prowlers architecture and best practices for creating custom outputs.
- Identify the most suitable integration method for the output being targeted.
* Prowler organizes its outputs in the `/lib/outputs` directory. Each format (e.g., JSON, CSV, HTML) is implemented as a Python class.
* Outputs are generated based on findings collected during a scan. Each finding is represented as a structured dictionary containing details like resource IDs, severities, descriptions, and more.
* Consult the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) to understand how Prowler works and the way that you can create it with the desired output!
* Identify the best approach for the specific output youre targeting.
## Steps to Create a Custom Output Format
### Schema
- Output Class:
- The class must inherit from `Output`. Review the [Output Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/outputs/output.py).
- Create a class that encapsulates the required attributes and methods for interacting with the target platform. Below the code for the `CSV` class is presented:
* Output Class:
* The class must inherit from `Output`. Review the [Output Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/outputs/output.py).
* Create a class that encapsulates attributes and methods for the output.
The following is the code for the `CSV` class:
```python title="CSV Class"
class CSV(Output):
def transform(self, findings: List[Finding]) -> None:
@@ -45,137 +28,118 @@ Prowler supports multiple output formats, allowing users to tailor findings pres
"""
...
```
* Transform Method:
* This method will transform the findings provided by Prowler to a specific format.
The following is the code for the `transform` method for the `CSV` class:
```python title="Transform"
def transform(self, findings: List[Finding]) -> None:
"""Transforms the findings into the CSV format.
Args:
findings (list[Finding]): a list of Finding objects
- Transform Method:
- This method will transform the findings provided by Prowler to a specific format.
#### Method Implementation
The following example demonstrates the `transform` method for the `CSV` class:
```python title="Transform"
def transform(self, findings: List[Finding]) -> None:
"""Transforms the findings into the CSV format.
Args:
findings (list[Finding]): a list of Finding objects
"""
try:
for finding in findings:
finding_dict = {}
finding_dict["AUTH_METHOD"] = finding.auth_method
finding_dict["TIMESTAMP"] = finding.timestamp
finding_dict["ACCOUNT_UID"] = finding.account_uid
finding_dict["ACCOUNT_NAME"] = finding.account_name
finding_dict["ACCOUNT_EMAIL"] = finding.account_email
finding_dict["ACCOUNT_ORGANIZATION_UID"] = (
finding.account_organization_uid
)
finding_dict["ACCOUNT_ORGANIZATION_NAME"] = (
finding.account_organization_name
)
finding_dict["ACCOUNT_TAGS"] = unroll_dict(
finding.account_tags, separator=":"
)
finding_dict["FINDING_UID"] = finding.uid
finding_dict["PROVIDER"] = finding.metadata.Provider
finding_dict["CHECK_ID"] = finding.metadata.CheckID
finding_dict["CHECK_TITLE"] = finding.metadata.CheckTitle
finding_dict["CHECK_TYPE"] = unroll_list(finding.metadata.CheckType)
finding_dict["STATUS"] = finding.status.value
finding_dict["STATUS_EXTENDED"] = finding.status_extended
finding_dict["MUTED"] = finding.muted
finding_dict["SERVICE_NAME"] = finding.metadata.ServiceName
finding_dict["SUBSERVICE_NAME"] = finding.metadata.SubServiceName
finding_dict["SEVERITY"] = finding.metadata.Severity.value
finding_dict["RESOURCE_TYPE"] = finding.metadata.ResourceType
finding_dict["RESOURCE_UID"] = finding.resource_uid
finding_dict["RESOURCE_NAME"] = finding.resource_name
finding_dict["RESOURCE_DETAILS"] = finding.resource_details
finding_dict["RESOURCE_TAGS"] = unroll_dict(finding.resource_tags)
finding_dict["PARTITION"] = finding.partition
finding_dict["REGION"] = finding.region
finding_dict["DESCRIPTION"] = finding.metadata.Description
finding_dict["RISK"] = finding.metadata.Risk
finding_dict["RELATED_URL"] = finding.metadata.RelatedUrl
finding_dict["REMEDIATION_RECOMMENDATION_TEXT"] = (
finding.metadata.Remediation.Recommendation.Text
)
finding_dict["REMEDIATION_RECOMMENDATION_URL"] = (
finding.metadata.Remediation.Recommendation.Url
)
finding_dict["REMEDIATION_CODE_NATIVEIAC"] = (
finding.metadata.Remediation.Code.NativeIaC
)
finding_dict["REMEDIATION_CODE_TERRAFORM"] = (
finding.metadata.Remediation.Code.Terraform
)
finding_dict["REMEDIATION_CODE_CLI"] = (
finding.metadata.Remediation.Code.CLI
)
finding_dict["REMEDIATION_CODE_OTHER"] = (
finding.metadata.Remediation.Code.Other
)
finding_dict["COMPLIANCE"] = unroll_dict(
finding.compliance, separator=": "
)
finding_dict["CATEGORIES"] = unroll_list(finding.metadata.Categories)
finding_dict["DEPENDS_ON"] = unroll_list(finding.metadata.DependsOn)
finding_dict["RELATED_TO"] = unroll_list(finding.metadata.RelatedTo)
finding_dict["NOTES"] = finding.metadata.Notes
finding_dict["PROWLER_VERSION"] = finding.prowler_version
self._data.append(finding_dict)
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
"""
try:
for finding in findings:
finding_dict = {}
finding_dict["AUTH_METHOD"] = finding.auth_method
finding_dict["TIMESTAMP"] = finding.timestamp
finding_dict["ACCOUNT_UID"] = finding.account_uid
finding_dict["ACCOUNT_NAME"] = finding.account_name
finding_dict["ACCOUNT_EMAIL"] = finding.account_email
finding_dict["ACCOUNT_ORGANIZATION_UID"] = (
finding.account_organization_uid
)
```
- Batch Write Data To File Method:
- This method will write the modeled object to a file.
#### Method Implementation
The following example demonstrates the `batch_write_data_to_file` method for the `CSV` class:
```python title="Batch Write Data To File"
def batch_write_data_to_file(self) -> None:
"""Writes the findings to a file using the CSV format using the `Output._file_descriptor`."""
try:
if (
getattr(self, "_file_descriptor", None)
and not self._file_descriptor.closed
and self._data
):
csv_writer = DictWriter(
self._file_descriptor,
fieldnames=self._data[0].keys(),
delimiter=";",
)
csv_writer.writeheader()
for finding in self._data:
csv_writer.writerow(finding)
self._file_descriptor.close()
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
finding_dict["ACCOUNT_ORGANIZATION_NAME"] = (
finding.account_organization_name
)
```
finding_dict["ACCOUNT_TAGS"] = unroll_dict(
finding.account_tags, separator=":"
)
finding_dict["FINDING_UID"] = finding.uid
finding_dict["PROVIDER"] = finding.metadata.Provider
finding_dict["CHECK_ID"] = finding.metadata.CheckID
finding_dict["CHECK_TITLE"] = finding.metadata.CheckTitle
finding_dict["CHECK_TYPE"] = unroll_list(finding.metadata.CheckType)
finding_dict["STATUS"] = finding.status.value
finding_dict["STATUS_EXTENDED"] = finding.status_extended
finding_dict["MUTED"] = finding.muted
finding_dict["SERVICE_NAME"] = finding.metadata.ServiceName
finding_dict["SUBSERVICE_NAME"] = finding.metadata.SubServiceName
finding_dict["SEVERITY"] = finding.metadata.Severity.value
finding_dict["RESOURCE_TYPE"] = finding.metadata.ResourceType
finding_dict["RESOURCE_UID"] = finding.resource_uid
finding_dict["RESOURCE_NAME"] = finding.resource_name
finding_dict["RESOURCE_DETAILS"] = finding.resource_details
finding_dict["RESOURCE_TAGS"] = unroll_dict(finding.resource_tags)
finding_dict["PARTITION"] = finding.partition
finding_dict["REGION"] = finding.region
finding_dict["DESCRIPTION"] = finding.metadata.Description
finding_dict["RISK"] = finding.metadata.Risk
finding_dict["RELATED_URL"] = finding.metadata.RelatedUrl
finding_dict["REMEDIATION_RECOMMENDATION_TEXT"] = (
finding.metadata.Remediation.Recommendation.Text
)
finding_dict["REMEDIATION_RECOMMENDATION_URL"] = (
finding.metadata.Remediation.Recommendation.Url
)
finding_dict["REMEDIATION_CODE_NATIVEIAC"] = (
finding.metadata.Remediation.Code.NativeIaC
)
finding_dict["REMEDIATION_CODE_TERRAFORM"] = (
finding.metadata.Remediation.Code.Terraform
)
finding_dict["REMEDIATION_CODE_CLI"] = (
finding.metadata.Remediation.Code.CLI
)
finding_dict["REMEDIATION_CODE_OTHER"] = (
finding.metadata.Remediation.Code.Other
)
finding_dict["COMPLIANCE"] = unroll_dict(
finding.compliance, separator=": "
)
finding_dict["CATEGORIES"] = unroll_list(finding.metadata.Categories)
finding_dict["DEPENDS_ON"] = unroll_list(finding.metadata.DependsOn)
finding_dict["RELATED_TO"] = unroll_list(finding.metadata.RelatedTo)
finding_dict["NOTES"] = finding.metadata.Notes
finding_dict["PROWLER_VERSION"] = finding.prowler_version
self._data.append(finding_dict)
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
```
* Batch Write Data To File Method:
* This method will write the modeled object to a file.
The following is the code for the `batch_write_data_to_file` method for the `CSV` class:
```python title="Batch Write Data To File"
def batch_write_data_to_file(self) -> None:
"""Writes the findings to a file using the CSV format using the `Output._file_descriptor`."""
try:
if (
getattr(self, "_file_descriptor", None)
and not self._file_descriptor.closed
and self._data
):
csv_writer = DictWriter(
self._file_descriptor,
fieldnames=self._data[0].keys(),
delimiter=";",
)
csv_writer.writeheader()
for finding in self._data:
csv_writer.writerow(finding)
self._file_descriptor.close()
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
```
### Integrating the Custom Output Format into Prowler
Once the custom output format is created, it must be integrated into Prowler to ensure compatibility with the existing architecture.
#### Reviewing Current Supported Outputs
Before implementing the new output format, examine the usage of currently supported formats to understand their structure and integration approach. Example: CSV Output Creation in Prowler
Below is an example of how Prowler generates and processes CSV output within its [codebase](https://github.com/prowler-cloud/prowler/blob/master/prowler/__main__.py):
### Integration With The Current Code
Once that the desired output format is created it has to be integrated with Prowler. Take a look at the the usage from the current supported output in order to add the new one.
Here is an example of the CSV output creation inside [prowler code](https://github.com/prowler-cloud/prowler/blob/master/prowler/__main__.py):
```python title="CSV creation"
if mode == "csv":
csv_output = CSV(
@@ -184,23 +148,19 @@ if mode == "csv":
file_path=f"{filename}{csv_file_suffix}",
)
generated_outputs["regular"].append(csv_output)
# Write CSV Finding Object to file.
# Write CSV Finding Object to file
csv_output.batch_write_data_to_file()
```
### Testing
* Verify that Prowlers findings are accurately typed in the desired output format.
* Error Handling Simulate edge cases to assess robustness and failure recovery mechanisms.
* Verify that Prowlers findings are accurately writed in the desired output format.
* Simulate edge cases to ensure robust error handling.
### Documentation
* Ensure the following elements are included:
* Setup Instructions List all necessary dependencies and installation steps.
* Provide clear, detailed documentation for your output:
* Setup instructions, including any required dependencies.
* Configuration details.
* Example Use Cases Provide practical scenarios demonstrating functionality.
* Troubleshooting Guide Document common issues and resolution steps.
* Comprehensive and clear documentation improves maintainability and simplifies onboarding of new users.
* Example use cases and troubleshooting tips.
* Good documentation ensures maintainability and simplifies onboarding for new users.
+173 -71
View File
@@ -1,78 +1,187 @@
# Prowler Providers
# Create a new Provider for Prowler
Here you can find how to create a new Provider in Prowler to give support for making all security checks needed and make your cloud safer!
## Introduction
Providers form the backbone of Prowler, enabling security assessments across various cloud environments.
Providers are the foundation on which Prowler is built, a simple definition for a cloud provider could be "third-party company that offers a platform where any IT resource you need is available at any time upon request". The most well-known cloud providers are Amazon Web Services, Azure from Microsoft and Google Cloud which are already supported by Prowler.
A provider is any platform or service that offers resources, data, or functionality that can be audited for security and compliance. This includes:
To create a new provider that is not supported now by Prowler and add your security checks you must create a new folder to store all the related files within it (services, checks, etc.). It must be store in route `prowler/providers/<new_provider_name>/`.
- Cloud Infrastructure Providers (like Amazon Web Services, Microsoft Azure, and Google Cloud)
- Software as a Service (SaaS) Platforms (like Microsoft 365)
- Development Platforms (like GitHub)
- Container Orchestration Platforms (like Kubernetes)
Inside that folder, you MUST create the following files and folders:
For providers supported by Prowler, refer to [Prowler Hub](https://hub.prowler.com/).
- A `lib` folder: to store all extra functions.
- A `services` folder: to store all [services](./services.md) to audit.
- An empty `__init__.py`: to make Python treat this service folder as a package.
- A `<new_provider_name>_provider.py`, containing all the provider's logic necessary to get authenticated in the provider, configurations and extra data useful for final report.
- A `models.py`, containing all the models necessary for the new provider.
???+ important
There are some custom providers added by the community, like [NHN Cloud](https://www.nhncloud.com/), that are not maintained by the Prowler team, but can be used in the Prowler CLI. They can be checked directly at the [Prowler GitHub repository](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
## Provider
## Adding a New Provider
To integrate an unsupported Prowler provider and implement its security checks, create a dedicated folder for all related files (e.g., services, checks)."
This folder must be placed within [`prowler/providers/<new_provider_name>/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
Within this folder the following folders are also to be created:
- `lib` Stores additional utility functions and core files required by every provider. The following files and subfolders are commonly found in every provider's `lib` folder:
- `service/service.py` Provides a generic service class to be inherited by all services.
- `arguments/arguments.py` Handles provider-specific argument parsing.
- `mutelist/mutelist.py` Manages the mutelist functionality for the provider.
- `services` Stores all [services](./services.md) that the provider offers and want to be audited by [Prowler checks](./checks.md).
- `__init__.py` (empty) Ensures Python recognizes this folder as a package.
- `<new_provider_name>_provider.py` Defines authentication logic, configurations, and other provider-specific data.
- `models.py` Contains necessary models for the new provider.
By adhering to this structure, Prowler can effectively support services and security checks for additional providers.
???+ important
If your new provider requires a Python library (such as an official SDK or API client) to connect to its services, make sure to add it as a dependency in the `pyproject.toml` file. This ensures that all contributors and users have the necessary packages installed when working with your provider.
## Provider Structure in Prowler
Prowler's provider architecture is designed to facilitate security audits through a generic service tailored to each provider. This is accomplished by passing the necessary parameters to the constructor, which initializes all required session values.
The structure for Prowler's providers is set up in such a way that they can be utilized through a generic service specific to each provider. This is achieved by passing the required parameters to the constructor, which in turn initializes all the necessary session values.
### Base Class
All Prowler providers inherit from the same base class located in [`prowler/providers/common/provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py). It is an [abstract base class](https://docs.python.org/3/library/abc.html) that defines the interface for all provider classes.
All the providers in Prowler inherits from the same [base class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py). It is an [abstract base class](https://docs.python.org/3/library/abc.html) that defines the interface for all provider classes. The code of the class is the next:
```python title="Provider Base Class"
from abc import ABC, abstractmethod
from typing import Any
class Provider(ABC):
"""
The Provider class is an abstract base class that defines the interface for all provider classes in the auditing system.
Attributes:
type (property): The type of the provider.
identity (property): The identity of the provider for auditing.
session (property): The session of the provider for auditing.
audit_config (property): The audit configuration of the provider.
output_options (property): The output configuration of the provider for auditing.
Methods:
print_credentials(): Displays the provider's credentials used for auditing in the command-line interface.
setup_session(): Sets up the session for the provider.
validate_arguments(): Validates the arguments for the provider.
get_checks_to_execute_by_audit_resources(): Returns a set of checks based on the input resources to scan.
Note:
This is an abstract base class and should not be instantiated directly. Each provider should implement its own
version of the Provider class by inheriting from this base class and implementing the required methods and properties.
"""
@property
@abstractmethod
def type(self) -> str:
"""
type method stores the provider's type.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def identity(self) -> str:
"""
identity method stores the provider's identity to audit.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@abstractmethod
def setup_session(self) -> Any:
"""
setup_session sets up the session for the provider.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def session(self) -> str:
"""
session method stores the provider's session to audit.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def audit_config(self) -> str:
"""
audit_config method stores the provider's audit configuration.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@abstractmethod
def print_credentials(self) -> None:
"""
print_credentials is used to display in the CLI the provider's credentials used to audit.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def output_options(self) -> str:
"""
output_options method returns the provider's audit output configuration.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@output_options.setter
@abstractmethod
def output_options(self, value: str) -> Any:
"""
output_options.setter sets the provider's audit output configuration.
This method needs to be created in each provider.
"""
raise NotImplementedError()
def validate_arguments(self) -> None:
"""
validate_arguments validates the arguments for the provider.
This method can be overridden in each provider if needed.
"""
raise NotImplementedError()
def get_checks_to_execute_by_audit_resources(self) -> set:
"""
get_checks_to_execute_by_audit_resources returns a set of checks based on the input resources to scan.
This is a fallback that returns None if the service has not implemented this function.
"""
return set()
@property
@abstractmethod
def mutelist(self):
"""
mutelist method returns the provider's mutelist.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@mutelist.setter
@abstractmethod
def mutelist(self, path: str):
"""
mutelist.setter sets the provider's mutelist.
This method needs to be created in each provider.
"""
raise NotImplementedError()
```
### Provider Class
#### Provider Implementation Guidance
Given the complexity and variability of providers, use existing provider implementations as templates when developing new integrations.
Due to the complexity and differences of each provider use the rest of the providers as a template for the implementation.
- [AWS](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/aws_provider.py)
- [GCP](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/gcp_provider.py)
- [Azure](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/azure_provider.py)
- [Kubernetes](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/kubernetes_provider.py)
- [Microsoft365](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/microsoft365/microsoft365_provider.py)
- [GitHub](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/github_provider.py)
- [M365](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/m365_provider.py)
### Basic Provider Implementation: Pseudocode Example
To simplify understanding, the following pseudocode outlines the fundamental structure of a provider, including library imports necessary for authentication.
To facilitate understanding here is a pseudocode of how the most basic provider could be with examples.
```python title="Provider Example Class"
# Library Imports for Authentication
# When implementing authentication for a provider, import the required libraries.
# Library imports to authenticate in the Provider
from prowler.config.config import load_and_validate_config_file
from prowler.lib.logger import logger
@@ -81,14 +190,14 @@ from prowler.lib.utils.utils import print_boxes
from prowler.providers.common.models import Audit_Metadata
from prowler.providers.common.provider import Provider
from prowler.providers.<new_provider_name>.models import (
# All provider models needed.
# All providers models needed
ProviderSessionModel,
ProviderIdentityModel,
ProviderOutputOptionsModel
)
class NewProvider(Provider):
# All properties from the class, some of which are properties in the base class.
# All properties from the class, some of this are properties in the base class
_type: str = "<provider_name>"
_session: <ProviderSessionModel>
_identity: <ProviderIdentityModel>
@@ -104,30 +213,20 @@ class NewProvider(Provider):
arguments (dict): A dictionary containing configuration arguments.
"""
logger.info("Setting <NewProviderName> provider ...")
# First get from arguments the necessary from the cloud account (subscriptions or projects or whatever the provider use for storing services)
# Initializing the Provider Session
# Steps:
# - Retrieve Account Information
# - Extract relevant account identifiers (subscriptions, projects, or other service references) from the provided arguments.
# Establish a Session
# Use the method enforced by the parent class to set up the session:
# Set the session with the method enforced by parent class
self._session = self.setup_session(credentials_file)
# Define Provider Identity
# Assign the identity class, typically provided by the Python provider library:
# Set the Identity class normaly the provider class give by Python provider library
self._identity = <ProviderIdentityModel>()
# Configure the Provider
# Set the provider-specific configuration.
# Set the provider configuration
self._audit_config = load_and_validate_config_file(
self._type, arguments.config_file
)
# All the enforced properties by the parent class.
# All enforced properties by the parent class
@property
def identity(self):
return self._identity
@@ -153,7 +252,7 @@ class NewProvider(Provider):
Sets up the Provider session.
Args:
<all_needed_for_auth> Can include all necessary arguments to set up the session
<all_needed_for_auth> Can include all necessary arguments to setup the session
Returns:
Credentials necessary to communicate with the provider.
@@ -163,8 +262,11 @@ class NewProvider(Provider):
"""
This method is enforced by parent class and is used to print all relevant
information during the prowler execution as a header of execution.
Displaying Account Information with Color Formatting. In Prowler, Account IDs, usernames, and other identifiers are typically displayed using color formatting provided by the colorama module (Fore).
Normally the Account ID, User name or stuff like this is displayed in colors using the colorama module (Fore).
"""
def print_credentials(self):
pass
```
@@ -1,25 +1,20 @@
# Creating a New Security Compliance Framework in Prowler
# Create a new security compliance framework
## Introduction
To create or contribute a custom security framework for Prowler—or to integrate a public framework—you must ensure the necessary checks are available. If they are missing, they must be implemented before proceeding.
Each framework is defined in a compliance file per provider. The file should follow the structure used in `prowler/compliance/<provider>/` and be named `<framework>_<version>_<provider>.json`. Follow the format below to create your own.
If you want to create or contribute with your own security frameworks or add public ones to Prowler you need to make sure the checks are available if not you have to create your own. Then create a compliance file per provider like in `prowler/compliance/<provider>/` and name it as `<framework>_<version>_<provider>.json` then follow the following format to create yours.
## Compliance Framework
Each file version of a framework will have the following structure at high level with the case that each framework needs to be generally identified, one requirement can be also called one control but one requirement can be linked to multiple prowler checks.:
### Compliance Framework Structure
Each compliance framework file consists of structured metadata that identifies the framework and maps security checks to requirements or controls. Please note that a single requirement can be linked to multiple Prowler checks:
- `Framework`: string The distinguished name of the framework (e.g., CIS).
- `Provider`: string The cloud provider where the framework applies (AWS, Azure, OCI).
- `Version`: string The framework version (e.g., 1.4 for CIS).
- `Requirements`: array of objects. Defines security requirements and their mapping to Prowler checks. All requirements or controls are to be included with the mapping to Prowler.
- `Requirements_Id`: string A unique identifier for each requirement within the framework
- `Requirements_Description`: string The requirement description as specified in the framework.
- `Requirements_Attributes`: array of objects. Contains relevant metadata such as security levels, sections, and any additional data needed for reporting with the result of the findings. Attributes should be derived directly from the frameworks own terminology, ensuring consistency with its established definitions.
- `Requirements_Checks`: array. The Prowler checks that are needed to prove this requirement. It can be one or multiple checks. In case automation is not feasible, this can be empty.
- `Framework`: string. Distinguish name of the framework, like CIS
- `Provider`: string. Provider where the framework applies, such as AWS, Azure, OCI,...
- `Version`: string. Version of the framework itself, like 1.4 for CIS.
- `Requirements`: array of objects. Include all requirements or controls with the mapping to Prowler.
- `Requirements_Id`: string. Unique identifier per each requirement in the specific framework
- `Requirements_Description`: string. Description as in the framework.
- `Requirements_Attributes`: array of objects. Includes all needed attributes per each requirement, like levels, sections, etc. Whatever helps to create a dedicated report with the result of the findings. Attributes would be taken as closely as possible from the framework's own terminology directly.
- `Requirements_Checks`: array. Prowler checks that are needed to prove this requirement. It can be one or multiple checks. In case of no automation possible this can be empty.
```
{
@@ -28,9 +23,9 @@ Each compliance framework file consists of structured metadata that identifies t
"Requirements": [
{
"Id": "<unique-id>",
"Description": "Full description of the requirement",
"Description": "Requirement full description",
"Checks": [
"Here is the prowler check or checks that will be executed"
"Here is the prowler check or checks that is going to be executed"
],
"Attributes": [
{
@@ -43,4 +38,4 @@ Each compliance framework file consists of structured metadata that identifies t
}
```
Finally, to have a proper output file for your reports, your framework data model has to be created in `prowler/lib/outputs/models.py` and also the CLI table output in `prowler/lib/outputs/compliance.py`. Also, you need to add a new conditional in `prowler/lib/outputs/file_descriptors.py` if creating a new CSV model.
Finally, to have a proper output file for your reports, your framework data model has to be created in `prowler/lib/outputs/models.py` and also the CLI table output in `prowler/lib/outputs/compliance.py`. Also, you need to add a new conditional in `prowler/lib/outputs/file_descriptors.py` if you create a new CSV model.
+127 -124
View File
@@ -1,184 +1,197 @@
# Prowler Services
# Create a new Provider Service
Here you can find how to create a new service, or to complement an existing one, for a [Prowler Provider](./provider.md).
???+note
First ensure that the provider you want to add the service is already created. It can be checked [here](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers). If the provider is not present, please refer to the [Provider](./provider.md) documentation to create it from scratch.
Here you can find how to create a new service, or to complement an existing one, for a Prowler Provider.
## Introduction
In Prowler, a **service** represents a specific solution or resource offered by one of the supported [Prowler Providers](./provider.md), for example, [EC2](https://aws.amazon.com/ec2/) in AWS, or [Microsoft Exchange](https://www.microsoft.com/en-us/microsoft-365/exchange/exchange-online) in M365. Services are the building blocks that allow Prowler interact directly with the various resources exposed by each provider.
In Prowler, a service is basically a solution that is offered by a cloud provider i.e. [ec2](https://aws.amazon.com/ec2/). Essentially it is a class that stores all the necessary stuff that we will need later in the checks to audit some aspects of our Cloud account.
Each service is implemented as a class that encapsulates all the logic, data models, and API interactions required to gather and store information about that service's resources. All of this data is used by the [Prowler checks](./checks.md) to generate the security findings.
To create a new service, you will need to create a folder inside the specific provider, i.e. `prowler/providers/<provider>/services/<new_service_name>/`.
## Adding a New Service
Inside that folder, you MUST create the following files:
To create a new service, a new folder must be created inside the specific provider following this pattern: `prowler/providers/<provider>/services/<new_service_name>/`.
- An empty `__init__.py`: to make Python treat this service folder as a package.
- A `<new_service_name>_service.py`, containing all the service's logic and API calls.
- A `<new_service_name>_client_.py`, containing the initialization of the service's class we have just created so the checks's checks can use it.
Within this folder the following files are also to be created:
## Service
- `__init__.py` (empty) Ensures Python recognizes this folder as a package.
- `<new_service_name>_service.py` Contains all the logic and API calls of the service.
- `<new_service_name>_client_.py` Contains the initialization of the freshly created service's class so that the checks can use it.
## Service Structure and Initialisation
The Prowler's service structure is as outlined below. To initialise it, just import the service client in a check.
The Prowler's service structure is the following and the way to initialise it is just by importing the service client in a check.
### Service Base Class
All Prowler provider service should inherit from a common base class to avoid code duplication. This base class handles initialization and storage of functions and objects needed across services. The exact implementation depends on the provider's API requirements, but the following are the most common responsibilities:
- Initialize/store clients to interact with the provider's API.
- Store the audit and fixer configuration.
- Implement threading logic where applicable.
For reference, the base classes for each provider can be checked here:
All the Prowler provider's services inherits from a base class depending on the provider used.
- [AWS Service Base Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/lib/service/service.py)
- [GCP Service Base Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/lib/service/service.py)
- [Azure Service Base Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/lib/service/service.py)
- [Kubernetes Service Base Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/lib/service/service.py)
- [M365 Service Base Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/lib/service/service.py)
- [GitHub Service Base Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/lib/service/service.py)
Each class is used to initialize the credentials and the API's clients to be used in the service. If some threading is used it must be coded there.
### Service Class
Due to the complexity and differences across provider APIs, the following example demonstrates best practices for structuring a service in Prowler.
Due to the complexity and differences of each provider API we are going to use an example service to guide you in how can it be created.
File `<new_service_name>_service.py`:
The following is the `<new_service_name>_service.py` file:
```python title="Example Service Class"
```python title="Service Class"
from datetime import datetime
from typing import Optional
# To use the Pydantic's BaseModel.
# The following is just for the AWS provider
from botocore.client import ClientError
# To use the Pydantic's BaseModel
from pydantic import BaseModel
# Prowler logging library.
# Prowler logging library
from prowler.lib.logger import logger
# Provider parent class.
# Prowler resource filter, only for the AWS provider
from prowler.lib.scan_filters.scan_filters import is_resource_filtered
# Provider parent class
from prowler.providers.<provider>.lib.service.service import ServiceParentClass
# Create a class for the Service.
class <Service>(ServiceParentClass):
def __init__(self, provider: Provider):
"""Initialize the Service Class
Args:
provider: Prowler Provider object.
"""
# Call Service Parent Class __init__.
# The __class__.__name__ is used to obtain it automatically.
# From the Service Class name, but a custom one can be passed.
# String in case the provider's API service name is different.
# Create a class for the Service
class <Service>(ServiceParentClass):
def __init__(self, provider):
# Call Service Parent Class __init__
# We use the __class__.__name__ to get it automatically
# from the Service Class name but you can pass a custom
# string if the provider's API service name is different
super().__init__(__class__.__name__, provider)
# Create an empty dictionary of items to be gathered, using the unique ID as the dictionarys key, e.g., instances.
# Create an empty dictionary of items to be gathered,
# using the unique ID as the dictionary key
# e.g., instances
self.<items> = {}
# If parallelization can be carried out by regions or locations, the function __threading_call__ to be used must be implemented in the Service Parent Class.
# If it is not implemented, you can make it in a sequential way, just calling the function.
# If you can parallelize by regions or locations
# you can use the __threading_call__ function
# available in the Service Parent Class
self.__threading_call__(self.__describe_<items>__)
# If it is needed you can create another function to retrieve more data from the items.
# Here we are using the second parameter of the __threading_call__ function to create one thread per item.
# You can also make it sequential without using the __threading_call__ function iterating over the items inside the function.
self.__threading_call__(self.__describe_<item>__, self.<items>.values())
# Optionally you can create another function to retrieve
# more data about each item without parallel
self.__describe_<item>__()
# In case of use the __threading_call__ function, you have to pass the regional_client to the function, as a parameter.
def __describe_<items>__(self, regional_client):
"""Get all <items> and store in the self.<items> dictionary
Args:
regional_client: Regional client object.
"""
"""Get ALL <Service> <Items>"""
logger.info("<Service> - Describing <Items>...")
# A try-except block must be created in each function.
# We MUST include a try/except block in each function
try:
# If pagination is supported by the provider, is always better to use it, call to the provider API to retrieve the desired data.
# Call to the provider API to retrieve the data we want
describe_<items>_paginator = regional_client.get_paginator("describe_<items>")
# Paginator to get every item.
# Paginator to get every item
for page in describe_<items>_paginator.paginate():
# Another try-except within the for loop to continue iterating in case something unexpected happens.
# Another try/except within the loop for to continue looping
# if something unexpected happens
try:
for <item> in page["<Items>"]:
# Adding Retrieved Resources to the Object
# Once the resource has been retrieved, it must be included in the previously created object to ensure proper data handling within the service.
self.<items>[<item_unique_id>] =
<Item>(
arn=stack["<item_arn>"],
name=stack["<item_name>"],
tags=stack.get("Tags", []),
region=regional_client.region,
)
# For the AWS provider we MUST include the following lines to retrieve
# or not data for the resource passed as argument using the --resource-arn
if not self.audit_resources or (
is_resource_filtered(<item>["<item_arn>"], self.audit_resources)
):
# Then we have to include the retrieved resource in the object
# previously created
self.<items>[<item_unique_id>] =
<Item>(
arn=stack["<item_arn>"],
name=stack["<item_name>"],
tags=stack.get("Tags", []),
region=regional_client.region,
)
except Exception as error:
logger.error(
f"{<provider_specific_field>} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
# Logging Errors in Exception Handling
# When handling exceptions, use the following approach to log errors appropriately based on the cloud provider being used:
# In the except part we have to use the following code to log the errors
except Exception as error:
# Depending on each provider we can must use different fields in the logger, e.g.: AWS: regional_client.region or self.region, GCP: project_id and location, Azure: subscription
# Depending on each provider we can use the following fields in the logger:
# - AWS: regional_client.region or self.region
# - GCP: project_id and location
# - Azure: subscription
logger.error(
f"{<provider_specific_field>} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
def __describe_<item>__(self, item: <Item>):
"""Get details for a <item>
Args:
item: Item object.
"""
def __describe_<item>__(self):
"""Get Details for a <Service> <Item>"""
logger.info("<Service> - Describing <Item> to get specific details...")
# A try-except block must be created in each function.
# We MUST include a try/except block in each function
try:
<item>_details = self.regional_clients[<item>.region].describe_<item>(
<Attribute>=<item>.name
)
# Loop over the items retrieved in the previous function
for <item> in self.<items>:
# E.g., check if item is Public. This case is important: if values are being retrieved from a dictionary, the function "dict.get()" must be used with a default value in case this value is not present.
<item>.public = <item>_details.get("Public", False)
# When we perform calls to the Provider API within a for loop we have
# to include another try/except block because in the cloud there are
# ephemeral resources that can be deleted at the time we are checking them
try:
<item>_details = self.regional_clients[<item>.region].describe_<item>(
<Attribute>=<item>.name
)
# For example, check if item is Public. Here is important if we are
# getting values from a dictionary we have to use the "dict.get()"
# function with a default value in the case this value is not present
<item>.public = <item>_details.get("Public", False)
# In this except block, for example for the AWS Provider we can use
# the botocore.ClientError exception and check for a specific error code
# to raise a WARNING instead of an ERROR if some resource is not present.
except ClientError as error:
if error.response["Error"]["Code"] == "InvalidInstanceID.NotFound":
logger.warning(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
else:
logger.error(
f"{<provider_specific_field>} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
continue
# In the except part we have to use the following code to log the errors
except Exception as error:
# Fields for logging errors with relevant item information, e.g.: AWS: <item>.region, GCP: <item>.project_id, Azure: <item>.region
# Depending on each provider we can use the following fields in the logger:
# - AWS: regional_client.region or self.region
# - GCP: project_id and location
# - Azure: subscription
logger.error(
f"{<item>.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
```
???+note
To prevent false findings, when Prowler fails to retrieve items due to Access Denied or similar errors, the affected item's value is set to `None`.
To avoid fake findings, when Prowler can't retrieve the items, because an Access Denied or similar error, we set that items value as `None`.
#### Service Models
Service models define structured classes used within services to store and process data extracted from API calls.
Using Pydantic for Data Validation
Prowler leverages Pydantic's [BaseModel](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel) to enforce data validation.
Service models are classes that are used in the service to design all that we need to store in each class object extrated from API calls. We use the Pydantic's [BaseModel](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel) to take advantage of the data validation.
```python title="Service Model"
# Implementation Approach
# Each service class should include custom model classes using Pydantic's BaseModel for the resources being audited.
# In each service class we have to create some classes using
# the Pydantic's Basemodel for the resources we want to audit.
class <Item>(BaseModel):
"""<Item> holds a <Service> <Item>"""
id: str
"""<Items>[].id"""
arn: str
"""<Items>[].arn"""
name: str
"""<Items>[].name"""
@@ -189,34 +202,26 @@ class <Item>(BaseModel):
public: bool
"""<Items>[].public"""
# Optional attributes can be created set to None by default.
# We can create Optional attributes set to None by default
tags: Optional[list]
"""<Items>[].tags"""
```
#### Service Objects
In the service each group of resources should be created as a Python [dictionary](https://docs.python.org/3/tutorial/datastructures.html#dictionaries). This is because we are performing lookups all the time and the Python dictionary lookup has [O(1) complexity](https://en.wikipedia.org/wiki/Big_O_notation#Orders_of_common_functions).
#### Service Attributes
*Optimized Data Storage with Python Dictionaries*
Each group of resources within a service should be structured as a Python [dictionary](https://docs.python.org/3/tutorial/datastructures.html#dictionaries) to enable efficient lookups. The dictionary lookup operation has [O(1) complexity](https://en.wikipedia.org/wiki/Big_O_notation#Orders_of_common_functions), and lookups are constantly executed.
*Assigning Unique Identifiers*
Each dictionary key must be a unique ID to identify the resource in a univocal way.
We MUST set as the dictionary key a unique ID, like the resource Unique ID or ARN.
Example:
```python
self.virtual_machines = {}
self.virtual_machines["vm-01234567890abcdef"] = VirtualMachine()
self.vpcs = {}
self.vpcs["vpc-01234567890abcdef"] = VPC_Object_Class()
```
### Service Client
Each Prowler service requires a service client to use the service in the checks.
The following is the `<new_service_name>_client.py` file, which contains the initialization of the freshly created service's class so that service checks can use it. This file is almost the same for all the services among the providers:
The following is the `<new_service_name>_client.py` containing the initialization of the service's class we have just created so the service's checks can use them:
```python
from prowler.providers.common.provider import Provider
@@ -225,13 +230,11 @@ from prowler.providers.<provider>.services.<new_service_name>.<new_service_name>
<new_service_name>_client = <Service>(Provider.get_global_provider())
```
## Provider Permissions in Prowler
## Permissions
Before implementing a new service, verify that Prowlers existing permissions for each provider are sufficient. If additional permissions are required, refer to the relevant documentation and update accordingly.
It is really important to check if the current Prowler's permissions for each provider are enough to implement a new service. If we need to include more please refer to the following documentaion and update it:
Provider-Specific Permissions Documentation:
- [AWS](../getting-started/requirements.md#authentication)
- [Azure](../getting-started/requirements.md#needed-permissions)
- [GCP](../getting-started/requirements.md#needed-permissions_1)
- [M365](../getting-started/requirements.md#needed-permissions_2)
- AWS: https://docs.prowler.cloud/en/latest/getting-started/requirements/#aws-authentication
- Azure: https://docs.prowler.cloud/en/latest/getting-started/requirements/#permissions
- GCP: https://docs.prowler.cloud/en/latest/getting-started/requirements/#gcp-authentication
- M365: https://docs.prowler.cloud/en/latest/getting-started/requirements/#m365-authentication
+244 -414
View File
@@ -1,20 +1,20 @@
# Unit Tests for Prowler Checks
# Unit Tests
Unit tests for Prowler checks vary based on the provider being evaluated.
The unit tests for the Prowler checks varies between each provider supported.
Below are key resources and insights gained throughout the testing process.
Here we left some good reads about unit testing and things we've learnt through all the process.
**Python Testing**
- https://docs.python-guide.org/writing/tests/
**Where to Patch**
**Where to patch**
- https://docs.python.org/3/library/unittest.mock.html#where-to-patch
- https://stackoverflow.com/questions/893333/multiple-variables-in-a-with-statement
- https://docs.python.org/3/reference/compound_stmts.html#the-with-statement
**Utilities for Tracing Mocking and Test Execution**
**Utils to trace mocking and test execution**
- https://news.ycombinator.com/item?id=36054868
- https://docs.python.org/3/library/sys.html#sys.settrace
@@ -22,283 +22,122 @@ Below are key resources and insights gained throughout the testing process.
## General Recommendations
When writing tests for Prowler provider checks, follow these guidelines to maximize coverage across test scenarios:
When creating tests for some provider's checks we follow these guidelines trying to cover as much test scenarios as possible:
1. Zero Findings Scenario:
Develop tests where no resources exist. Prowler returns zero findings if the audited service lacks the required resources.
1. Create a test without resource to generate 0 findings, because Prowler will generate 0 findings if a service does not contain the resources the check is looking for audit.
2. Create test to generate both a `PASS` and a `FAIL` result.
3. Create tests with more than 1 resource to evaluate how the check behaves and if the number of findings is right.
2. Positive and Negative Outcomes:
Create tests that generate both a passing (`PASS`) and a failing (`FAIL`) result.
## How to run Prowler tests
3. Multi-Resource Evaluations:
Design tests with multiple resources to verify check behavior and ensure the correct number of findings.
To run the Prowler test suite you need to install the testing dependencies already included in the `pyproject.toml` file. If you didn't install it yet please read the developer guide introduction [here](./introduction.md#get-the-code-and-install-all-dependencies).
## Running Prowler Tests
Then in the project's root path execute `pytest -n auto -vvv -s -x` or use the `Makefile` with `make test`.
To execute the Prowler test suite, install the necessary dependencies listed in the `pyproject.toml` file.
Other commands to run tests:
### Prerequisites
If you have not installed Prowler yet, refer to the [developer guide introduction](./introduction.md#get-the-code-and-install-all-dependencies).
### Executing Tests
Navigate to the project's root directory and execute: `pytest -n auto -vvv -s -x`
Alternatively, use:
`Makefile` with `make test`.
Other Commands for Running Tests
- Running tests for a provider:
`pytest -n auto -vvv -s -x tests/providers/<provider>/services`
- Running tests for a provider service:
`pytest -n auto -vvv -s -x tests/providers/<provider>/services/<service>`
- Running tests for a provider check:
`pytest -n auto -vvv -s -x tests/providers/<provider>/services/<service>/<check>`
- Run tests for a provider: `pytest -n auto -vvv -s -x tests/providers/<provider>/services`
- Run tests for a provider service: `pytest -n auto -vvv -s -x tests/providers/<provider>/services/<service>`
- Run tests for a provider check: `pytest -n auto -vvv -s -x tests/providers/<provider>/services/<service>/<check>`
???+ note
Refer to the [pytest documentation](https://docs.pytest.org/en/7.1.x/getting-started.html) for more details.
Refer to the [pytest documentation](https://docs.pytest.org/en/7.1.x/getting-started.html) documentation for more information.
## AWS Testing Approaches
## AWS
For AWS provider, different testing approaches apply based on API coverage based on several criteria.
For the AWS provider we have ways to test a Prowler check based on the following criteria:
???+ note
Prowler leverages and contributes to the[Moto](https://github.com/getmoto/moto) library for mocking AWS infrastructure in tests.
We use and contribute to the [Moto](https://github.com/getmoto/moto) library which allows us to easily mock out tests based on AWS infrastructure. **It's awesome!**
- AWS API Calls Covered by [Moto](https://github.com/getmoto/moto):
- Service Tests: `@mock_aws`
- Checks Tests: `@mock_aws`
- AWS API calls covered by [Moto](https://github.com/getmoto/moto):
- Service tests with `@mock_aws`
- Checks tests with `@mock_aws`
- AWS API calls not covered by Moto:
- Service test with `mock_make_api_call`
- Checks tests with [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock)
- AWS API calls partially covered by Moto:
- Service test with `@mock_aws` and `mock_make_api_call`
- Checks tests with `@mock_aws` and `mock_make_api_call`
- AWS API Calls Not Covered by Moto:
- Service Tests: `mock_make_api_call`
- Checks Tests: [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock)
In the following section we are going to explain all of the above scenarios with examples. The main difference between those scenarios comes from if the [Moto](https://github.com/getmoto/moto) library covers the AWS API calls made by the service. You can check the covered API calls [here](https://github.com/getmoto/moto/blob/master/IMPLEMENTATION_COVERAGE.md).
- AWS API Calls Partially Covered by Moto:
- Service Tests: `@mock_aws` and `mock_make_api_call`
- Check Tests: `@mock_aws` and `mock_make_api_call`
### Checks
#### AWS Check Testing Scenarios
For the AWS tests examples we are going to use the tests for the `iam_password_policy_uppercase` check.
The following section provides examples for each testing scenario. The primary distinction between these scenarios depends on whether the [Moto](https://github.com/getmoto/moto) library covers the AWS API calls made by the service. You can review the supported API calls [here](https://github.com/getmoto/moto/blob/master/IMPLEMENTATION_COVERAGE.md).
This section is going to be divided based on the API coverage of the [Moto](https://github.com/getmoto/moto) library.
### AWS Check Testing Approach
#### API calls covered
For AWS test examples, we reference tests for the `iam_password_policy_uppercase` check.
This section is categorized based on [Moto](https://github.com/getmoto/moto) API coverage.
#### API Calls Covered by Moto
When the [Moto](https://github.com/getmoto/moto) library supports the API calls required for testing, use the `@mock_aws` decorator. This ensures that all AWS API calls within the decorated function are properly mocked while maintaining state within the test.
If the [Moto](https://github.com/getmoto/moto) library covers the API calls we want to test, we can use the `@mock_aws` decorator. This will mocked out all the API calls made to AWS keeping the state within the code decorated, in this case the test function.
```python
# Import unittest.mock to enable object patching
# This prevents shared objects between tests, ensuring test isolation
# We need to import the unittest.mock to allow us to patch some objects
# not to use shared ones between test, hence to isolate the test
from unittest import mock
# Import Boto3 client and session for AWS API calls
# Boto3 client and session to call the AWS APIs
from boto3 import client, session
# Import Moto decorator for mocking AWS services
# Moto decorator
from moto import mock_aws
# Define constants for test execution
# Constants used
AWS_ACCOUNT_NUMBER = "123456789012"
AWS_REGION = "us-east-1"
# Test class naming convention: Test_<check_name>
# We always name the test classes like Test_<check_name>
class Test_iam_password_policy_uppercase:
# Apply the Moto decorator for AWS service mocking
# We include the Moto decorator
@mock_aws
# Test naming convention: test_<service>_<check_name>_<test_action>
# We name the tests with test_<service>_<check_name>_<test_action>
def test_iam_password_policy_no_uppercase_flag(self):
Steps
# Step 1: Create an IAM client for API calls in the specified region
# First, we have to create an IAM client
iam_client = client("iam", region_name=AWS_REGION)
# Step 2: Modify the account password policy to disable uppercase character enforcement
# Action: Setting RequireUppercaseCharacters to False
# Then, since all the AWS accounts have a password
# policy we want to set to False the RequireUppercaseCharacters
iam_client.update_account_password_policy(RequireUppercaseCharacters=False)
# Step 3: Mock the AWS provider to ensure isolated testing
# Using 'set_mocked_aws_provider' allows overriding the provider response
# This mocked provider is defined in test fixtures
# The aws_provider is mocked using set_mocked_aws_provider to use it as the return of the get_global_provider method.
# this mocked provider is defined in fixtures
aws_provider = set_mocked_aws_provider([AWS_REGION_US_EAST_1])
# Step 4: Ensure Prowler service imports occur within the decorated function
# This prevents accidental real API calls to AWS during test execution
# The Prowler service import MUST be made within the decorated
# code not to make real API calls to the AWS service.
from prowler.providers.aws.services.iam.iam_service import IAM
# Mocking AWS Provider and IAM Client for Prowler Tests
#Prowler for AWS relies on a shared object, aws_provider, which stores provider-related information.
# To ensure proper test isolation and prevent shared objects between tests, we apply mocking techniques.
# Mocking Global AWS Provider
#To mock the global provider, we use mock.patch() to override the get_global_provider() method, ensuring aws_provider is the return value.
# Prowler for AWS uses a shared object called aws_provider where it stores
# the info related with the provider
with mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=aws_provider,
),
# Mocking IAM Client for Test Isolation
#In addition to mocking the provider, we must also mock the iam_client from the check. This ensures that the IAM client used in the test is the one explicitly created within the test.
# ⚠️ Important:
# patch != import—simply importing does not ensure proper isolation.
# Running tests in parallel may cause unintended object initialization, impacting test integrity.
with mock.patch(
# We have to mock also the iam_client from the check to enforce that the iam_client used is the one
# created within this check because patch != import, and if you execute tests in parallel some objects
# can be already initialised hence the check won't be isolated
mock.patch(
"prowler.providers.aws.services.iam.iam_password_policy_uppercase.iam_password_policy_uppercase.iam_client",
new=IAM(aws_provider),
):
# Importing the IAM Check
# To prevent initialization issues, import the check inside the two-mock context.
# This ensures the IAM client does not retain shared data from aws_provider or the IAM service.
# We import the check within the two mocks not to initialise the iam_client with some shared information from
# the aws_provider or the IAM service.
from prowler.providers.aws.services.iam.iam_password_policy_uppercase.iam_password_policy_uppercase import (
iam_password_policy_uppercase,
)
# Executing the IAM Check
# Once imported, instantiate the checks class.
# Once imported, we only need to instantiate the check's class
check = iam_password_policy_uppercase()
# Then run the execute function()
# against the set up IAM client.
# And then, call the execute() function to run the check
# against the IAM client we've set up.
result = check.execute()
# Validating the Check Results
# Finally, assert all fields to verify expected results.
assert len(results) == 1
assert result[0].status == "FAIL"
assert result[0].status_extended == "IAM password policy does not srequire at least one uppercase letter."
assert result[0].resource_arn == f"arn:aws:iam::{AWS_ACCOUNT_NUMBER}:root"
assert result[0].resource_id == AWS_ACCOUNT_NUMBER
assert result[0].resource_tags == []
assert result[0].region == AWS_REGION
```
#### Handling API Calls Not Covered by Moto
If the IAM service required for testing is not supported by the Moto library, use [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock) to inject objects into the service client.
???+ warning
As stated above, direct service instantiation must be avoided to prevent actual AWS API calls.
???+ note
The example below demonstrates the IAM GetAccountPasswordPolicy API, which is covered by Moto, but is used for instructional purposes only.
#### Mocking Service Objects Using MagicMock
The following code demonstrates how to use MagicMock to create service objects.
```python
# Import unittest.mock to enable object patching
# This prevents shared objects between tests, ensuring test isolation
from unittest import mock
# Define constants for test execution
AWS_ACCOUNT_NUMBER = "123456789012"
AWS_REGION = "us-east-1"
# Test class naming convention: Test_<check_name>
class Test_iam_password_policy_uppercase:
# Test naming convention: test_<service>_<check_name>_<test_action>
def test_iam_password_policy_no_uppercase_flag(self):
# Mock IAM client with MagicMock
mocked_iam_client = mock.MagicMock
# Import IAM PasswordPolicy model, as it has its own model
from prowler.providers.aws.services.iam.iam_service import PasswordPolicy
# Create a mock PasswordPolicy object with predefined attributes
mocked_iam_client.password_policy = PasswordPolicy(
length=5,
symbols=True,
numbers=True,
# The value must be set to False to trigger a failure scenario
uppercase=False,
lowercase=True,
allow_change=False,
expiration=True,
)
# In this scenario, both the IAM service and the iam_client from the check must be mocked to ensure test isolation. This guarantees that the iam_client used in the test is the one explicitly instantiated within the test itself.
# Note: Simply applying a patch does not modify imports (patch != import).
# If tests are executed in parallel, objects may already be initialized,
# leading to unintended shared state and breaking test isolation.
# Unlike other cases, we do not use the Moto decorator here.
# Instead, we mock the IAM client for both objects to prevent real AWS API interactions.
with mock.patch(
"prowler.providers.aws.services.iam.iam_service.IAM",
new=mocked_iam_client,
), mock.patch(
"prowler.providers.aws.services.iam.iam_client.iam_client",
new=mocked_iam_client,
):
# Importing the IAM Check
# To prevent initialization issues, import the check inside the two-mock context.
# This ensures the IAM client does not retain shared data from aws_provider or the IAM service.
from prowler.providers.aws.services.iam.iam_password_policy_uppercase.iam_password_policy_uppercase import (
iam_password_policy_uppercase,
)
# Executing the IAM Check
# Once imported, instantiate the checks class.
check = iam_password_policy_uppercase()
# Then run the execute function()
# against the set up IAM client.
result = check.execute()
# Validating the Check Results
# Finally, assert all fields to verify expected results.
# Last but not least, we need to assert all the fields
# from the check's results
assert len(results) == 1
assert result[0].status == "FAIL"
assert result[0].status_extended == "IAM password policy does not require at least one uppercase letter."
@@ -308,15 +147,89 @@ class Test_iam_password_policy_uppercase:
assert result[0].region == AWS_REGION
```
#### Ensuring Test Isolation with Mocked/Patched Objects
#### API calls not covered
In all above scenarios, check execution must occur within the context of mocked or patched objects. This guarantees that the test only evaluates objects explicitly created within its scope, preventing interference from shared state or external dependencies.
If the IAM service for the check's we want to test is not covered by Moto, we have to inject the objects in the service client using [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock). As we have pointed above, we cannot instantiate the service since it will make real calls to the AWS APIs.
#### Handling Partially Covered API Calls
???+ note
The following example uses the IAM GetAccountPasswordPolicy which is covered by Moto but this is only for demonstration purposes.
When a service requires API calls that are partially covered by the Moto decorator, additional mocking is necessary. In such cases, custom mocked API calls must be implemented alongside Moto to ensure full coverage.
The following code shows how to use MagicMock to create the service objects.
```python
# We need to import the unittest.mock to allow us to patch some objects
# not to use shared ones between test, hence to isolate the test
from unittest import mock
# Constants used
AWS_ACCOUNT_NUMBER = "123456789012"
AWS_REGION = "us-east-1"
# We always name the test classes like Test_<check_name>
class Test_iam_password_policy_uppercase:
# We name the tests with test_<service>_<check_name>_<test_action>
def test_iam_password_policy_no_uppercase_flag(self):
# Mocked client with MagicMock
mocked_iam_client = mock.MagicMock
# Since the IAM Password Policy has their own model we have to import it
from prowler.providers.aws.services.iam.iam_service import PasswordPolicy
# Create the mock PasswordPolicy object
mocked_iam_client.password_policy = PasswordPolicy(
length=5,
symbols=True,
numbers=True,
# We set the value to False to test the check
uppercase=False,
lowercase=True,
allow_change=False,
expiration=True,
)
# In this scenario we have to mock also the IAM service and the iam_client from the check to enforce # that the iam_client used is the one created within this check because patch != import, and if you # execute tests in parallel some objects can be already initialised hence the check won't be isolated.
# In this case we don't use the Moto decorator, we use the mocked IAM client for both objects
with mock.patch(
"prowler.providers.aws.services.iam.iam_service.IAM",
new=mocked_iam_client,
), mock.patch(
"prowler.providers.aws.services.iam.iam_client.iam_client",
new=mocked_iam_client,
):
# We import the check within the two mocks not to initialise the iam_client with some shared information from
# the aws_provider or the IAM service.
from prowler.providers.aws.services.iam.iam_password_policy_uppercase.iam_password_policy_uppercase import (
iam_password_policy_uppercase,
)
# Once imported, we only need to instantiate the check's class
check = iam_password_policy_uppercase()
# And then, call the execute() function to run the check
# against the IAM client we've set up.
result = check.execute()
# Last but not least, we need to assert all the fields
# from the check's results
assert len(results) == 1
assert result[0].status == "FAIL"
assert result[0].status_extended == "IAM password policy does not require at least one uppercase letter."
assert result[0].resource_arn == f"arn:aws:iam::{AWS_ACCOUNT_NUMBER}:root"
assert result[0].resource_id == AWS_ACCOUNT_NUMBER
assert result[0].resource_tags == []
assert result[0].region == AWS_REGION
```
As it can be seen in the above scenarios, the check execution should always be into the context of mocked/patched objects. This way we ensure it reviews only the objects created under the scope the test.
#### API calls partially covered
If the API calls we want to use in the service are partially covered by the Moto decorator we have to create our own mocked API calls to use it in combination.
To do so, you need to mock the `botocore.client.BaseClient._make_api_call` function, which is the Boto3 function in charge of making the real API call to the AWS APIs, using `mock.patch <https://docs.python.org/3/library/unittest.mock.html#patch>`:
To achieve this, mock the `botocore.client.BaseClient._make_api_call` function—the method responsible for making actual API requests to AWS—using `mock.patch <https://docs.python.org/3/library/unittest.mock.html#patch>`:
```python
@@ -326,18 +239,13 @@ from unittest.mock import patch
from moto import mock_aws
# Original botocore _make_api_call function
orig = botocore.client.BaseClient._make_api_call
# Mocked botocore _make_api_call function
def mock_make_api_call(self, operation_name, kwarg):
# The 'operation_name' follows the snake_case format (get_account_password_policy),
# but we use the PascalCase form (GetAccountPasswordPolicy) for consistency with Boto3 conventions.
# Reference: https://github.com/boto/botocore/blob/develop/botocore/client.py#L810:L816
# As you can see the operation_name has the get_account_password_policy snake_case form but
# we are using the GetAccountPasswordPolicy form.
# Rationale -> https://github.com/boto/botocore/blob/develop/botocore/client.py#L810:L816
if operation_name == 'GetAccountPasswordPolicy':
return {
'PasswordPolicy': {
@@ -353,41 +261,29 @@ def mock_make_api_call(self, operation_name, kwarg):
'HardExpiry': True|False
}
}
# If API call patching is not required, return the original method execution.
# If we don't want to patch the API call
return orig(self, operation_name, kwarg)
# Test class naming convention: Test_<check_name>
# We always name the test classes like Test_<check_name>
class Test_iam_password_policy_uppercase:
# Apply custom API call mock decorator for the required service
# We include the custom API call mock decorator for the service we want to use
@patch("botocore.client.BaseClient._make_api_call", new=mock_make_api_call)
# Also include IAM Moto decorator for supported API calls
# We include also the IAM Moto decorator for the API calls supported
@mock_iam
# Test naming convention: test_<service>_<check_name>_<test_action>
# We name the tests with test_<service>_<check_name>_<test_action>
def test_iam_password_policy_no_uppercase_flag(self):
# Refer to the previous section for the check test, as the implementation remains unchanged.
# Check the previous section to see the check test since is the same
```
???+ note
This example does not use Moto to simplify the setup.
However, if additional `moto` decorators are applied alongside the patch, Moto will automatically intercept the call to `orig(self, operation_name, kwarg)`.
Note that this does not use Moto, to keep it simple, but if you use any `moto`-decorators in addition to the patch, the call to `orig(self, operation_name, kwarg)` will be intercepted by Moto.
???+ note
The source of the above implementation can be found here:[Patch Other Services with Moto](https://docs.getmoto.org/en/latest/docs/services/patching\_other\_services.html)
The above code comes from here https://docs.getmoto.org/en/latest/docs/services/patching_other_services.html
#### Mocking Several Services
Since the provider is being mocked, multiple attributes can be configured to customize its behavior:
#### Mocking more than one service
Since we are mocking the provider, it can be customized setting multiple attributes to the provider:
```python
def set_mocked_aws_provider(
audited_regions: list[str] = [],
@@ -410,7 +306,8 @@ def set_mocked_aws_provider(
) -> AwsProvider:
```
If a test is designed for a check that interacts with multiple provider services, each service used must be individually mocked. For instance, if the check `cloudtrail_logs_s3_bucket_access_logging_enabled` relies on both the CloudTrail and S3 clients, the test's service mocking section should be structured as follows:
If the test your are creating belongs to a check that uses more than one provider service, you should mock each of the services used. For example, the check `cloudtrail_logs_s3_bucket_access_logging_enabled` requires the CloudTrail and the S3 client, hence the service's mock part of the test will be as follows:
```python
with mock.patch(
@@ -431,45 +328,42 @@ with mock.patch(
):
```
As demonstrated in the code above, mocking both the AWS audit information and all utilized services is mandatory for proper test execution.
As you can see in the above code, it is required to mock the AWS audit info and both services used.
#### Patching vs. Importing
Properly understanding patching versus importing is critical for unit testing with Prowler checks. Given the dynamic nature of the check-loading mechanism, the process for importing a service client within a check follows this structured approach:
This is an important topic within the Prowler check's unit testing. Due to the dynamic nature of the check's load, the process of importing the service client from a check is the following:
1. `<check>.py`:
```python
from prowler.providers.<provider>.services.<service>.<service>_client import <service>_client
```
```python
from prowler.providers.<provider>.services.<service>.<service>_client import <service>_client
```
2. `<service>_client.py`:
```python
from prowler.providers.common.provider import Provider
from prowler.providers.<provider>.services.<service>.<service>_service import <SERVICE>
```python
from prowler.providers.common.provider import Provider
from prowler.providers.<provider>.services.<service>.<service>_service import <SERVICE>
<service>_client = <SERVICE>(Provider.get_global_provider())
```
<service>_client = <SERVICE>(Provider.get_global_provider())
```
Due to the import path structure, patching certain objects does not always ensure full isolation. If multiple tests—executed sequentially or in parallel—reuse service clients, some instances may already be initialized by another check. This can lead to unintended shared state, affecting test accuracy:
Due to the above import path it's not the same to patch the following objects because if you run a bunch of tests, either in parallel or not, some clients can be already instantiated by another check, hence your test execution will be using another test's service instance:
- `<service>_client` imported at `<check>.py`
- `<service>_client` initialised at `<service>_client.py`
- `<SERVICE>` imported at `<service>_client.py`
#### Additional Resources on Mocking Imports
A useful read about this topic can be found in the following article: https://stackoverflow.com/questions/8658043/how-to-mock-an-import
For a deeper understanding of mocking imports in Python, refer to the following article: https://stackoverflow.com/questions/8658043/how-to-mock-an-import
#### Approaches to Mocking a Service Client
#### Different ways to mock the service client
1\. Mocking the Service Client at the Service Client Level
##### Mocking the service client at the service client level
2\. Mocking a Service Client via Below Code Implementation
Once all required attributes are configured for the mocked provider, it can be used as the service client for test execution:
Mocking a service client using the following code ...
Once the needed attributes are set for the mocked provider, you can use the mocked provider:
```python title="Mocking the service_client"
with mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
@@ -479,20 +373,18 @@ with mock.patch(
new=<SERVICE>(set_mocked_aws_provider([<region>])),
):
```
will cause that the service will be initialised twice:
1. When `<SERVICE>(set_mocked_aws_provider([<region>]))` is mocked out using `mock.patch`, it must be properly prepared before patching to ensure test consistency.
1. When the `<SERVICE>(set_mocked_aws_provider([<region>]))` is mocked out using `mock.patch` to have the object ready for the patching.
2. At the `<service>_client.py` when we are patching it since the `mock.patch` needs to go to that object an initialise it, hence the `<SERVICE>(set_mocked_aws_provider([<region>]))` will be called again.
2. At the point of patching, in `<service>_client.py`, and since `mock.patch` needs to access said object and initialise it, `<SERVICE>(set_mocked_aws_provider([<region>]))` will be called again.
Then, when we import the `<service>_client.py` at `<check>.py`, since we are mocking where the object is used, Python will use the mocked one.
Later, when importing `<service>_client.py` at `<check>.py`, Python uses the mocked instance since the patch was applied at the correct reference point.
In the [next section](./unit-testing.md#mocking-the-service-and-the-service-client-at-the-service-client-level) you will see an improved version to mock objects.
In the [next section](./unit-testing.md#mocking-the-service-and-the-service-client-at-the-service-client-level) we will explore an improved approach to mock objects.
##### Mocking the Service and the Service Client at the Service Client Level
##### Mocking a Service Client via Below Code Implementation
##### Mocking the service and the service client at the service client level
Mocking a service client using the following code ...
```python title="Mocking the service and the service_client"
with mock.patch(
@@ -506,42 +398,35 @@ with mock.patch(
new=service_client,
):
```
will cause that the service will be initialised once, just when the `set_mocked_aws_provider([<region>])` is mocked out using `mock.patch`.
will cause that the service is initialized only once—at the moment of mocking out `set_mocked_aws_provider([<region>])` using `mock.patch`.
Then, at the check_level when Python tries to import the client with `from prowler.providers.<provider>.services.<service>.<service>_client`, since it is already mocked out, the execution will continue using the `service_client` without getting into the `<service>_client.py`.
Later, when Python attempts to import the client at the check level, the execution continues using`from prowler.providers.<provider>.services.<service>.<service>_client`. As a result of it being already mocked out, the execution will continue using `service_client` without getting into `<service>_client.py`.
### Testing AWS Services
### Services
AWS service testing follows the same methodology as AWS checks:
Verify whether the AWS API calls made by the service are covered by Moto.
For testing the AWS services we have to follow the same logic as with the AWS checks, we have to check if the AWS API calls made by the service are covered by Moto and we have to test the service `__init__` to verify that the information is being correctly retrieved.
Execute tests on the service `__init__` to ensure correct information retrieval.
The service tests could act as *Integration Tests* since we test how the service retrieves the information from the provider, but since Moto or the custom mock objects mocks that calls this test will fall into *Unit Tests*.
While service tests resemble *Integration Tests*, as they assess how the service interacts with the provider, they ultimately fall under *Unit Tests*, due to the use of Moto or custom mock objects.
For detailed guidance on test creation and existing service tests, refer to the [AWS checks test](./unit-testing.md#checks) [documentation](https://github.com/prowler-cloud/prowler/tree/master/tests/providers/aws/services).
Please refer to the [AWS checks tests](./unit-testing.md#checks) for more information on how to create tests and check the existing services tests [here](https://github.com/prowler-cloud/prowler/tree/master/tests/providers/aws/services).
## GCP
### GCP Check Testing Approach
### Checks
Currently the GCP Provider does not have a dedicated library for mocking API calls. To ensure proper test isolation, objects must be manually injected into the service client using [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock).
For the GCP Provider we don't have any library to mock out the API calls we use. So in this scenario we inject the objects in the service client using [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock).
Mocking Service Objects Using MagicMock
The following code demonstrates how to use MagicMock to create service objects for a GCP check test. This is a real-world implementation, adapted for instructional clarity.
The following code shows how to use MagicMock to create the service objects for a GCP check test. It is a real example adapted for informative purposes.
```python
from re import search
from unittest import mock
# Import constant values needed in every check
# Import some constant values needed in every check
from tests.providers.gcp.gcp_fixtures import GCP_PROJECT_ID, set_mocked_gcp_provider
# Create a test for the compute_project_os_login_enabled check
# We are going to create a test for the compute_project_os_login_enabled check
class Test_compute_project_os_login_enabled:
def test_one_compliant_project(self):
@@ -552,15 +437,13 @@ class Test_compute_project_os_login_enabled:
id=GCP_PROJECT_ID,
enable_oslogin=True,
)
# Mock IAM client with MagicMock
# Mocked client with MagicMock
compute_client = mock.MagicMock
compute_client.project_ids = [GCP_PROJECT_ID]
compute_client.projects = [project]
# In this scenario, the app_client from the check must be mocked to ensure that the compute_client used in the test is the explicitly created instance.
# Additionally, the return value of the get_global_provider function is mocked to return the predefined GCP mocked provider from the test fixtures.
# In this scenario we have to mock the app_client from the check to enforce that the compute_client used is the one created above
# And also is mocked the return value of get_global_provider function to return our GCP mocked provider defined in fixtures
with mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=set_mocked_gcp_provider(),
@@ -568,24 +451,16 @@ class Test_compute_project_os_login_enabled:
"prowler.providers.gcp.services.compute.compute_project_os_login_enabled.compute_project_os_login_enabled.compute_client",
new=compute_client,
):
# Import the check within the two mocks
# We import the check within the two mocks
from prowler.providers.gcp.services.compute.compute_project_os_login_enabled.compute_project_os_login_enabled import (
compute_project_os_login_enabled,
)
# Executing the IAM Check
# Once imported, instantiate the checks class.
# Once imported, we only need to instantiate the check's class
check = compute_project_os_login_enabled()
# Then run the execute function()
# against the set up Compute client.
# And then, call the execute() function to run the check
# against the Compute client we've set up.
result = check.execute()
# Assert the expected results
assert len(result) == 1
assert result[0].status == "PASS"
assert search(
@@ -596,10 +471,7 @@ class Test_compute_project_os_login_enabled:
assert result[0].location == "global"
assert result[0].project_id == GCP_PROJECT_ID
# Complementary Test
# The following is an additional test for a wider scenario coverage
# Complementary test to make more coverage for different scenarios
def test_one_non_compliant_project(self):
from prowler.providers.gcp.services.compute.compute_service import Project
@@ -638,28 +510,19 @@ class Test_compute_project_os_login_enabled:
```
### Testing GCP Services
### Services
The testing of Google Cloud Services follows the same principles as the one of Google Cloud checks. While all API calls must be mocked, attribute setup for API calls in this scenario is defined in the fixtures file, specifically within the [fixtures file](https://github.com/prowler-cloud/prowler/blob/master/tests/providers/gcp/gcp_fixtures.py) in the `mock_api_client` function.
For testing Google Cloud Services, we have to follow the same logic as with the Google Cloud checks. We still mocking all API calls, but in this case, every API call to set up an attribute is defined in [fixtures file](https://github.com/prowler-cloud/prowler/blob/master/tests/providers/gcp/gcp_fixtures.py) in `mock_api_client` function. Remember that EVERY method of a service must be tested.
???+ important
Every method within a service must be tested to ensure full coverage and accurate validation.
The following example presents a real testing class, but includes additional comments for educational purposes, explaining key concepts and implementation details.
The following code shows a real example of a testing class, but it has more comments than usual for educational purposes.
```python title="BigQuery Service Test"
# Import unittest.mock.patch to enable object patching
# This prevents shared objects between tests, ensuring test isolation
# We need to import the unittest.mock.patch to allow us to patch some objects
# not to use shared ones between test, hence to isolate the test
from unittest.mock import patch
# Import the class needed from the service file
from prowler.providers.gcp.services.bigquery.bigquery_service import BigQuery
# Use necessary constants and functions from fixtures file
# Necessary constans and functions from fixtures file
from tests.providers.gcp.gcp_fixtures import (
GCP_PROJECT_ID,
mock_api_client,
@@ -669,10 +532,10 @@ from tests.providers.gcp.gcp_fixtures import (
class TestBigQueryService:
# The only method needed to test full service
# Only method needed to test full service
def test_service(self):
# Mocking '__is_api_active__' ensures that the test utilizes the predefined mocked project instead of a real instance.
# Additionally, all client interactions are patched to use the mocked API calls.
# In this case we are mocking the __is_api_active__ to ensure our mocked project is used
# And all the client to use our mocked API calls
with patch(
"prowler.providers.gcp.lib.service.service.GCPService.__is_api_active__",
new=mock_is_api_active,
@@ -684,7 +547,7 @@ class TestBigQueryService:
bigquery_client = BigQuery(
set_mocked_gcp_provider(project_ids=[GCP_PROJECT_ID])
)
# Verify that all attributes of the tested class are correctly initialized based on the API calls mocked from the GCP fixture file.
# Check all attributes of the tested class is well set up according API calls mocked from GCP fixture file
assert bigquery_client.service == "bigquery"
assert bigquery_client.project_ids == [GCP_PROJECT_ID]
@@ -718,28 +581,16 @@ class TestBigQueryService:
assert not bigquery_client.tables[1].cmk_encryption
assert bigquery_client.tables[1].project_id == GCP_PROJECT_ID
```
As it can be confusing where all these values come from, I'll give an example to make this clearer. First we need to check
what is the API call used to obtain the datasets. In this case if we check the service the call is
`self.client.datasets().list(projectId=project_id)`.
Clarifying Value Origins with an Example
Now in the fixture file we have to mock this call in our `MagicMock` client in the function `mock_api_client`. The best way to mock
is following the actual format, add one function where the client is passed to be changed, the format of this function name must be
`mock_api_<endpoint>_calls` (*endpoint* refers to the first attribute pointed after *client*).
Understanding where specific values originate can be challenging, so the following example provides clarity.
- Step 1: Identify the API Call for Dataset Retrieval
To determine how datasets are obtained, examine the API call used by the service. In this case, the relevant service call is: `self.client.datasets().list(projectId=project_id)`.
- Step 2: Mocking the API Call in the Fixture File
In the fixture file, mock this call in the `MagicMock` client, in the function `mock_api_client`.
- Step 3: Structuring the Mock Function
The best approach for mocking is to adhere to the services existing format:
Define a dedicated function that modifies the client.
Follow the naming convention: `mock_api_<endpoint>_calls` (*endpoint* refers to the first attribute pointed after *client*).
For BigQuery, the mock function is called `mock_api_dataset_calls`. Within this function, an assignment is made for use in the `_get_datasets` method of the BigQuery class:
In the example of BigQuery the function is called `mock_api_dataset_calls`. And inside of this function we found an assignation to
be used in the `_get_datasets` method in BigQuery class:
```python
# Mocking datasets
@@ -768,46 +619,39 @@ client.datasets().list().execute.return_value = {
}
```
## Azure
### Azure Check Testing Approach
### Checks
Currently the Azure Provider does not have a dedicated library for mocking API calls. To ensure proper test isolation, objects must be manually injected into the service client using [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock).
For the Azure Provider we don't have any library to mock out the API calls we use. So in this scenario we inject the objects in the service client using [MagicMock](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.MagicMock).
Mocking Service Objects Using MagicMock
The following code demonstrates how to use MagicMock to create service objects for an Azure check test. This is a real-world implementation, adapted for instructional clarity.
The following code shows how to use MagicMock to create the service objects for a Azure check test. It is a real example adapted for informative purposes.
```python title="app_ensure_http_is_redirected_to_https_test.py"
# Import unittest.mock to enable object patching
# This prevents shared objects between tests, ensuring test isolation
# We need to import the unittest.mock to allow us to patch some objects
# not to use shared ones between test, hence to isolate the test
from unittest import mock
from uuid import uuid4
# Import some constans values needed in almost every check
from tests.providers.azure.azure_fixtures import (
AZURE_SUBSCRIPTION_ID,
set_mocked_azure_provider,
)
# Create a test for the app_ensure_http_is_redirected_to_https check
# We are going to create a test for the app_ensure_http_is_redirected_to_https check
class Test_app_ensure_http_is_redirected_to_https:
# Test naming convention: test_<service>_<check_name>_<test_action>
# We name the tests with test_<service>_<check_name>_<test_action>
def test_app_http_to_https_disabled(self):
resource_id = f"/subscriptions/{uuid4()}"
# Mock IAM client with MagicMock
# Mocked client with MagicMock
app_client = mock.MagicMock
# In this scenario, the app_client from the check must be mocked to ensure that the app_client used in the test is the explicitly created instance.
# Additionally, the return value of the get_global_provider function is mocked to return the predefined Azure mocked provider from the test fixtures.
# In this scenario we have to mock the app_client from the check to enforce that the app_client used is the one created above
# And also is mocked the return value of get_global_provider function to return our Azure mocked provider defined in fixtures
with mock.patch(
"prowler.providers.common.provider.Provider.get_global_provider",
return_value=set_mocked_azure_provider(),
@@ -815,7 +659,7 @@ class Test_app_ensure_http_is_redirected_to_https:
"prowler.providers.azure.services.app.app_ensure_http_is_redirected_to_https.app_ensure_http_is_redirected_to_https.app_client",
new=app_client,
):
# Import the check within the two mocks
# We import the check within the two mocks
from prowler.providers.azure.services.app.app_ensure_http_is_redirected_to_https.app_ensure_http_is_redirected_to_https import (
app_ensure_http_is_redirected_to_https,
)
@@ -837,11 +681,10 @@ class Test_app_ensure_http_is_redirected_to_https:
)
}
}
# Executing the IAM Check
# Once imported, instantiate the checks class.
# Once imported, we only need to instantiate the check's class
check = app_ensure_http_is_redirected_to_https()
# Then run the execute function()
# against the set up App client.
# And then, call the execute() function to run the check
# against the App client we've set up.
result = check.execute()
# Assert the expected results
assert len(result) == 1
@@ -855,9 +698,7 @@ class Test_app_ensure_http_is_redirected_to_https:
assert result[0].subscription == AZURE_SUBSCRIPTION_ID
assert result[0].location == "West Europe"
# Complementary Test
# The following is an additional test for a wider scenario coverage
# Complementary test to make more coverage for different scenarios
def test_app_http_to_https_enabled(self):
resource_id = f"/subscriptions/{uuid4()}"
app_client = mock.MagicMock
@@ -903,38 +744,28 @@ class Test_app_ensure_http_is_redirected_to_https:
```
### Testing Azure Services
### Services
The testing of Azure Services follows the same principles as the one of Google Cloud checks. All API calls are still mocked, but for methods that initialize attributes via an API call, use the [patch](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.patch) decorator at the beginning of the class to ensure proper mocking.
For testing Azure services, we have to follow the same logic as with the Azure checks. We still mock all the API calls, but in this case, every method that uses an API call to set up an attribute is mocked with the [patch](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.patch) decorator at the beginning of the class. Remember that every method of a service MUST be tested.
???+ important "Remember"
Every method within a service must be tested to ensure full coverage and accurate validation.
The following example presents a real testing class, but includes additional comments for educational purposes, explaining key concepts and implementation details.
The following code shows a real example of a testing class, but it has more comments than usual for educational purposes.
```python title="AppInsights Service Test"
# Import unittest.mock.patch to enable object patching
# This prevents shared objects between tests, ensuring test isolation
# We need to import the unittest.mock.patch to allow us to patch some objects
# not to use shared ones between test, hence to isolate the test
from unittest.mock import patch
# Import the models needed from the service file
from prowler.providers.azure.services.appinsights.appinsights_service import (
AppInsights,
Component,
)
# Import some constans values needed in almost every check
from tests.providers.azure.azure_fixtures import (
AZURE_SUBSCRIPTION_ID,
set_mocked_azure_provider,
)
# Function to mock the service function _get_components; the aim of this function is to return a possible value that a real function could return.
# Function to mock the service function _get_components, this function task is to return a possible value that real function could returns
def mock_appinsights_get_components(_):
return {
AZURE_SUBSCRIPTION_ID: {
@@ -946,25 +777,24 @@ def mock_appinsights_get_components(_):
}
}
# Patch decorator to use the mocked function instead of the function with the real API call
# Patch decorator to use the mocked function instead the function with the real API call
@patch(
"prowler.providers.azure.services.appinsights.appinsights_service.AppInsights._get_components",
new=mock_appinsights_get_components,
)
class Test_AppInsights_Service:
# Mandatory test for every service; this method tests if the instance of the client is correct.
# Mandatory test for every service, this method test the instance of the client is correct
def test_get_client(self):
app_insights = AppInsights(set_mocked_azure_provider())
assert (
app_insights.clients[AZURE_SUBSCRIPTION_ID].__class__.__name__
== "ApplicationInsightsManagementClient"
)
# Second typical method that tests if subscriptions are defined inside the client object.
# Second typical method that test if subscriptions is defined inside the client object
def test__get_subscriptions__(self):
app_insights = AppInsights(set_mocked_azure_provider())
assert app_insights.subscriptions.__class__.__name__ == "dict"
# Test for the function _get_components; the mocked function is used within this client.
# Test for the function _get_components, inside this client is used the mocked function
def test_get_components(self):
appinsights = AppInsights(set_mocked_azure_provider())
assert len(appinsights.components) == 1
+8 -81
View File
@@ -70,13 +70,9 @@ The other three cases does not need additional configuration, `--az-cli-auth` an
Prowler for Azure needs two types of permission scopes to be set:
- **Microsoft Entra ID permissions**: used to retrieve metadata from the identity assumed by Prowler and specific Entra checks (not mandatory to have access to execute the tool). The permissions required by the tool are the following:
- `Directory.Read.All`
- `Domain.Read.All`
- `Policy.Read.All`
- `UserAuthenticationMethod.Read.All` (used only for the Entra checks related with multifactor authentication)
???+ note
You can replace `Directory.Read.All` with `Domain.Read.All` that is a more restrictive permission but you won't be able to run the Entra checks related with DirectoryRoles and GetUsers.
- **Subscription scope permissions**: required to launch the checks against your resources, mandatory to launch the tool. It is required to add the following RBAC builtin roles per subscription to the entity that is going to be assumed by the tool:
- `Reader`
- `ProwlerRole` (custom role with minimal permissions defined in [prowler-azure-custom-role](https://github.com/prowler-cloud/prowler/blob/master/permissions/prowler-azure-custom-role.json))
@@ -139,7 +135,7 @@ Prowler for M365 currently supports the following authentication types:
???+ warning
For Prowler App only the Service Principal with User Credentials authentication method is supported.
### Service Principal authentication (recommended)
### Service Principal authentication
Authentication flag: `--sp-env-auth`
@@ -154,11 +150,9 @@ export AZURE_TENANT_ID="XXXXXXXXX"
If you try to execute Prowler with the `--sp-env-auth` flag and those variables are empty or not exported, the execution is going to fail.
Follow the instructions in the [Create Prowler Service Principal](../tutorials/microsoft365/getting-started-m365.md#create-the-service-principal-app) section to create a service principal.
If you don't add the external API permissions described in the mentioned section above you will only be able to run the checks that work through MS Graph. This means that you won't run all the provider.
With this credentials you will only be able to run the checks that work through MS Graph, this means that you won't run all the provider. If you want to scan all the checks from M365 you will need to use the recommended authentication method.
If you want to scan all the checks from M365 you will need to use the recommended authentication method or add the external API permissions.
### Service Principal and User Credentials authentication
### Service Principal and User Credentials authentication (recommended)
Authentication flag: `--env-auth`
@@ -175,11 +169,6 @@ export M365_PASSWORD="examplepassword"
These two new environment variables are **required** to execute the PowerShell modules needed to retrieve information from M365 services. Prowler uses Service Principal authentication to access Microsoft Graph and user credentials to authenticate to Microsoft PowerShell modules.
- `M365_USER` should be your Microsoft account email using the **assigned domain in the tenant**. This means it must look like `example@YourCompany.onmicrosoft.com` or `example@YourCompany.com`, but it must be the exact domain assigned to that user in the tenant.
???+ warning
If the user is newly created, you need to sign in with that account first, as Microsoft will prompt you to change the password. If you dont complete this step, user authentication will fail because Microsoft marks the initial password as expired.
???+ warning
The user must not be MFA capable. Microsoft does not allow MFA capable users to authenticate programmatically. See [Microsoft documentation](https://learn.microsoft.com/en-us/entra/identity-platform/scenario-desktop-acquire-token-username-password?tabs=dotnet) for more information.
???+ warning
Using a tenant domain other than the one assigned — even if it belongs to the same tenant — will cause Prowler to fail, as Microsoft authentication will not succeed.
@@ -210,23 +199,13 @@ Since this is a delegated permission authentication method, necessary permission
Prowler for M365 requires two types of permission scopes to be set (if you want to run the full provider including PowerShell checks). Both must be configured using Microsoft Entra ID:
- **Service Principal Application Permissions**: These are set at the **application** level and are used to retrieve data from the identity being assessed:
- `AuditLog.Read.All`: Required for Entra service.
- `Directory.Read.All`: Required for all services.
- `Domain.Read.All`: Required for all services.
- `Policy.Read.All`: Required for all services.
- `SharePointTenantSettings.Read.All`: Required for SharePoint service.
- `User.Read` (IMPORTANT: this must be set as **delegated**): Required for the sign-in.
- `Exchange.ManageAsApp` from external API `Office 365 Exchange Online`: Required for Exchange PowerShell module app authentication. You also need to assign the `Exchange Administrator` role to the app.
- `application_access` from external API `Skype and Teams Tenant Admin API`: Required for Teams PowerShell module app authentication.
- `SharePointTenantSettings.Read.All`: Required for SharePoint service.
- `AuditLog.Read.All`: Required for Entra service.
???+ note
You can replace `Directory.Read.All` with `Domain.Read.All` is a more restrictive permission but you won't be able to run the Entra checks related with DirectoryRoles and GetUsers.
> If you do this you will need to add also the `Organization.Read.All` permission to the service principal application in order to authenticate.
- **Powershell Modules Permissions** (if using user credentials): These are set at the `M365_USER` level, so the user used to run Prowler must have one of the following roles:
- **Powershell Modules Permissions**: These are set at the `M365_USER` level, so the user used to run Prowler must have one of the following roles:
- `Global Reader` (recommended): this allows you to read all roles needed.
- `Exchange Administrator` and `Teams Administrator`: user needs both roles but with this [roles](https://learn.microsoft.com/en-us/exchange/permissions-exo/permissions-exo#microsoft-365-permissions-in-exchange-online) you can access to the same information as a Global Reader (since only read access is needed, Global Reader is recommended).
@@ -444,7 +423,6 @@ The required modules are:
- [ExchangeOnlineManagement](https://www.powershellgallery.com/packages/ExchangeOnlineManagement/3.6.0): Minimum version 3.6.0. Required for several checks across Exchange, Defender, and Purview.
- [MicrosoftTeams](https://www.powershellgallery.com/packages/MicrosoftTeams/6.6.0): Minimum version 6.6.0. Required for all Teams checks.
- [MSAL.PS](https://www.powershellgallery.com/packages/MSAL.PS/4.32.0): Required for Exchange module via application authentication.
## GitHub
### Authentication
@@ -461,54 +439,3 @@ The provided credentials must have the appropriate permissions to perform all th
???+ note
GitHub App Credentials support less checks than other authentication methods.
## Infrastructure as Code (IaC)
Prowler's Infrastructure as Code (IaC) provider enables you to scan local infrastructure code for security and compliance issues using [Checkov](https://www.checkov.io/). This provider supports a wide range of IaC frameworks and requires no cloud authentication.
### Authentication
The IaC provider does not require any authentication or credentials since it scans local files directly. This makes it ideal for CI/CD pipelines and local development environments.
### Supported Frameworks
The IaC provider leverages Checkov to support multiple frameworks, including:
- Terraform
- CloudFormation
- Kubernetes
- ARM (Azure Resource Manager)
- Serverless
- Dockerfile
- YAML/JSON (generic IaC)
- Bicep
- Helm
- GitHub Actions, GitLab CI, Bitbucket Pipelines, Azure Pipelines, CircleCI, Argo Workflows
- Ansible
- Kustomize
- OpenAPI
- SAST, SCA (Software Composition Analysis)
### Usage
To run Prowler with the IaC provider, use the `iac` flag. You can specify the directory to scan, frameworks to include, and paths to exclude.
#### Basic Example
```console
prowler iac --scan-path ./my-iac-directory
```
#### Specify Frameworks
Scan only Terraform and Kubernetes files:
```console
prowler iac --scan-path ./my-iac-directory --frameworks terraform kubernetes
```
#### Exclude Paths
```console
prowler iac --scan-path ./my-iac-directory --exclude-path ./my-iac-directory/test,./my-iac-directory/examples
```
+3 -25
View File
@@ -572,12 +572,12 @@ With M365 you need to specify which auth method is going to be used:
```console
# To use service principal authentication for MSGraph and PowerShell modules
prowler m365 --sp-env-auth
# To use both service principal (for MSGraph) and user credentials (for PowerShell modules)
prowler m365 --env-auth
# To use service principal authentication
prowler m365 --sp-env-auth
# To use az cli authentication
prowler m365 --az-cli-auth
@@ -612,27 +612,5 @@ prowler github --github-app-id app_id --github-app-key app_key
2. `OAUTH_APP_TOKEN`
3. `GITHUB_APP_ID` and `GITHUB_APP_KEY`
#### Infrastructure as Code (IaC)
Prowler's Infrastructure as Code (IaC) provider enables you to scan local infrastructure code for security and compliance issues using [Checkov](https://www.checkov.io/). This provider supports a wide range of IaC frameworks, allowing you to assess your code before deployment.
```console
# Scan a directory for IaC files
prowler iac --scan-path ./my-iac-directory
# Specify frameworks to scan (default: all)
prowler iac --scan-path ./my-iac-directory --frameworks terraform kubernetes
# Exclude specific paths
prowler iac --scan-path ./my-iac-directory --exclude-path ./my-iac-directory/test,./my-iac-directory/examples
```
???+ note
- The IaC provider does not require cloud authentication
- It is ideal for CI/CD pipelines and local development environments
- For more details on supported frameworks and rules, see the [Checkov documentation](https://www.checkov.io/1.Welcome/Quick%20Start.html)
See more details about IaC scanning in the [IaC Tutorial](tutorials/iac/getting-started-iac.md) section.
## Prowler v2 Documentation
For **Prowler v2 Documentation**, please check it out [here](https://github.com/prowler-cloud/prowler/blob/8818f47333a0c1c1a457453c87af0ea5b89a385f/README.md).
-117
View File
@@ -1,117 +0,0 @@
# Prowler Multicloud CIS Benchmarks PowerBI Template
![Prowler Report](https://github.com/user-attachments/assets/560f7f83-1616-4836-811a-16963223c72f)
## Getting Started
1. Install Microsoft PowerBI Desktop
This report requires the Microsoft PowerBI Desktop software which can be downloaded for free from Microsoft.
2. Run compliance scans in Prowler
The report uses compliance csv outputs from Prowler. Compliance scans be run using either [Prowler CLI](https://docs.prowler.com/projects/prowler-open-source/en/latest/#prowler-cli) or [Prowler Cloud/App](https://cloud.prowler.com/sign-in)
1. Prowler CLI -&gt; Run a Prowler scan using the --compliance option
2. Prowler Cloud/App -&gt; Navigate to the compliance section to download csv outputs
![Download Compliance Scan](https://github.com/user-attachments/assets/42c11a60-8ce8-4c60-a663-2371199c052b)
The template supports the following CIS Benchmarks only:
| Compliance Framework | Version |
| ---------------------------------------------- | ------- |
| CIS Amazon Web Services Foundations Benchmark | v4.0.1 |
| CIS Google Cloud Platform Foundation Benchmark | v3.0.0 |
| CIS Microsoft Azure Foundations Benchmark | v3.0.0 |
| CIS Kubernetes Benchmark | v1.10.0 |
Ensure you run or download the correct benchmark versions.
3. Create a local directory to store Prowler csvoutputs
Once downloaded, place your csv outputs in a directory on your local machine. If you rename the files, they must maintain the provider in the filename.
To use time-series capabilities such as "compliance percent over time" you'll need scans from multiple dates.
4. Download and run the PowerBI template file (.pbit)
Running the .pbit file will open PowerBI Desktop and prompt you for the full filepath to the local directory
5. Enter the full filepath to the directory created in step 3
Provide the full filepath from the root directory.
Ensure that the filepath is not wrapped in quotation marks (""). If you use Window's "copy as path" feature, it will automatically include quotation marks.
6. Save the report as a PowerBI file (.pbix)
Once the filepath is entered, the template will automatically ingest and populate the report. You can then save this file as a new PowerBI report. If you'd like to generate another report, simply re-run the template file (.pbit) from step 4.
## Validation
After setting up your dashboard, you may want to validate the Prowler csv files were ingested correctly. To do this, navigate to the "Configuration" tab.
The "loaded CIS Benchmarks" table shows the supported benchmarks and versions. This is defined by the template file and not editable by the user. All benchmarks will be loaded regardless of which providers you provided csv outputs for.
The "Prowler CSV Folder" shows the path to the local directory you provided.
The "Loaded Prowler Exports" table shows the ingested csv files from the local directory. It will mark files that are treated as the latest assessment with a green checkmark.
![Prowler Validation](https://github.com/user-attachments/assets/a543ca9b-6cbe-4ad1-b32a-d4ac2163d447)
## Report Sections
The PowerBI Report is broken into three main report pages
| Report Page | Description |
| ----------- | ----------------------------------------------------------------------------------- |
| Overview | Provides general CIS Benchmark overview across both AWS, Azure, GCP, and Kubernetes |
| Benchmark | Provides overview of a single CIS Benchmark |
| Requirement | Drill-through page to view details of a single requirement |
### Overview Page
The overview page is a general CIS Benchmark overview across both AWS, Azure, GCP, and Kubernetes.
![image](https://github.com/user-attachments/assets/94164fa9-36a4-4bb9-890d-e9a9a63a3e7d)
The page has the following components:
| Component | Description |
| ---------------------------------------- | ------------------------------------------------------------------------ |
| CIS Benchmark Overview | Table with benchmark name, Version, and overall compliance percentage |
| Provider by Requirement Status | Bar chart showing benchmark requirements by status by provider |
| Compliance Percent Heatmap | Heatmap showing compliance percent by benchmark and profile level |
| Profile level by Requirement Status | Bar chart showing requirements by status and profile level |
| Compliance Percent Over Time by Provider | Line chart showing overall compliance perecentage over time by provider. |
### Benchmark Page
The benchmark page provides an overview of a single CIS Benchmark. You can select the benchmark from the dropdown as well as scope down to specific profile levels or regions.
![image](https://github.com/user-attachments/assets/34498ee8-317b-4b81-b241-c561451d8def)
The page has the following components:
| Component | Description |
| --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
| Compliance Percent Heatmap | Heatmap showing compliance percent by region and profile level |
| Benchmark Section by Requirement Status | Bar chart showing benchmark requirements by bennchmark section and status |
| Compliance percent Over Time by Region | Line chart showing overall compliance percentage over time by region |
| Benchmark Requirements | Table showing requirement section, requirement number, reuqirement title, number of resources tested, status, and number of failing checks |
### Requirement Page
The requirement page is a drill-through page to view details of a single requirement. To populate the requirement page right click on a requiement from the "Benchmark Requirements" table on the benchmark page and select "Drill through" -&gt; "Requirement".
![image](https://github.com/user-attachments/assets/5c9172d9-56fe-4514-b341-7e708863fad6)
The requirement page has the following components:
| Component | Description |
| ------------------------------------------ | --------------------------------------------------------------------------------- |
| Title | Title of the requirement |
| Rationale | Rationale of the requirement |
| Remediation | Remedation guidance for the requirement |
| Region by Check Status | Bar chart showing Prowler checks by region and status |
| Resource Checks for Benchmark Requirements | Table showing Resource ID, Resource Name, Status, Description, and Prowler Checkl |
## Walkthrough Video
[![image](https://github.com/user-attachments/assets/866642c6-43ac-4aac-83d3-bb625002da0b)](https://www.youtube.com/watch?v=lfKFkTqBxjU)
@@ -90,14 +90,11 @@ A Service Principal is required to grant Prowler the necessary privileges.
Assign the following Microsoft Graph permissions:
- Directory.Read.All
- Domain.Read.All
- Policy.Read.All
- Policy.Read.All
- UserAuthenticationMethod.Read.All (optional, for MFA checks)
???+ note
You can replace `Directory.Read.All` with `Domain.Read.All` that is a more restrictive permission but you won't be able to run the Entra checks related with DirectoryRoles and GetUsers.
- UserAuthenticationMethod.Read.All (optional, for MFA checks)
1. Go to your App Registration > `API permissions`
@@ -110,7 +107,7 @@ Assign the following Microsoft Graph permissions:
3. Search and select:
- `Directory.Read.All`
- `Domain.Read.All`
- `Policy.Read.All`
- `UserAuthenticationMethod.Read.All`
Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 160 KiB

-1
View File
@@ -31,7 +31,6 @@ The following list includes all the AWS checks with configurable variables that
| `cloudtrail_threat_detection_privilege_escalation` | `threat_detection_privilege_escalation_minutes` | Integer |
| `cloudwatch_log_group_no_secrets_in_logs` | `secrets_ignore_patterns` | List of Strings |
| `cloudwatch_log_group_retention_policy_specific_days_enabled` | `log_group_retention_days` | Integer |
| `codebuild_github_allowed_organizations` | `github_allowed_organizations` | List of Strings |
| `codebuild_project_no_secrets_in_variables` | `excluded_sensitive_environment_variables` | List of Strings |
| `codebuild_project_no_secrets_in_variables` | `secrets_ignore_patterns` | List of Strings |
| `config_recorder_all_regions_enabled` | `mute_non_default_regions` | Boolean |
@@ -1,5 +1,7 @@
# Getting Started with GCP on Prowler Cloud/App
<iframe width="560" height="380" src="https://www.youtube-nocookie.com/embed/v1as8vTFlMg" title="Prowler Cloud Onboarding GCP" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen="1"></iframe>
Set up your GCP project to enable security scanning using Prowler Cloud/App.
## Requirements
-67
View File
@@ -1,67 +0,0 @@
# Getting Started with the IaC Provider
Prowler's Infrastructure as Code (IaC) provider enables you to scan local infrastructure code for security and compliance issues using [Checkov](https://www.checkov.io/). This provider supports a wide range of IaC frameworks, allowing you to assess your code before deployment.
## Supported Frameworks
The IaC provider leverages Checkov to support multiple frameworks, including:
- Terraform
- CloudFormation
- Kubernetes
- ARM (Azure Resource Manager)
- Serverless
- Dockerfile
- YAML/JSON (generic IaC)
- Bicep
- Helm
- GitHub Actions, GitLab CI, Bitbucket Pipelines, Azure Pipelines, CircleCI, Argo Workflows
- Ansible
- Kustomize
- OpenAPI
- SAST, SCA (Software Composition Analysis)
## How It Works
- The IaC provider scans your local directory (or a specified path) for supported IaC files.
- No cloud credentials or authentication are required.
- Mutelist logic is handled by Checkov, not Prowler.
- Results are output in the same formats as other Prowler providers (CSV, JSON, HTML, etc.).
## Usage
To run Prowler with the IaC provider, use the `iac` argument. You can specify the directory to scan, frameworks to include, and paths to exclude.
### Basic Example
```sh
prowler iac --scan-path ./my-iac-directory
```
### Specify Frameworks
Scan only Terraform and Kubernetes files:
```sh
prowler iac --scan-path ./my-iac-directory --frameworks terraform kubernetes
```
### Exclude Paths
```sh
prowler iac --scan-path ./my-iac-directory --exclude-path ./my-iac-directory/test,./my-iac-directory/examples
```
## Output
You can use the standard Prowler output options, for example:
```sh
prowler iac --scan-path ./iac --output-formats csv json html
```
## Notes
- The IaC provider does not require cloud authentication.
- It is ideal for CI/CD pipelines and local development environments.
- For more details on supported frameworks and rules, see the [Checkov documentation](https://www.checkov.io/1.Welcome/Quick%20Start.html).

Some files were not shown because too many files have changed in this diff Show More