Compare commits

..

7 Commits

Author SHA1 Message Date
sumit_chaturvedi d187b4da0a Merge branch 'master' into PRWLR-7134-api-performance-tests-for-resources 2025-06-17 09:20:12 +05:30
sumit_chaturvedi 2bdafe7e8c srn Reformatted the files 2025-06-17 08:56:23 +05:30
sumit_chaturvedi d365205e06 srn Removed unused imports 2025-06-17 08:44:16 +05:30
sumit_chaturvedi 75da655059 test(performance): Refactor resource tests, and address PR feedback 2025-06-16 19:06:13 +05:30
sumit_chaturvedi 5050345beb Merge branch 'master' into PRWLR-7134-api-performance-tests-for-resources 2025-06-16 18:50:48 +05:30
sumit_chaturvedi a069042304 test(performance): formatted the files 2025-06-03 11:35:23 +05:30
sumit_chaturvedi 4156033183 test(performance): Added resources performance tests 2025-06-03 10:21:06 +05:30
715 changed files with 12585 additions and 37912 deletions
+3 -6
View File
@@ -6,15 +6,11 @@
PROWLER_UI_VERSION="stable"
AUTH_URL=http://localhost:3000
API_BASE_URL=http://prowler-api:8080/api/v1
NEXT_PUBLIC_API_BASE_URL=${API_BASE_URL}
NEXT_PUBLIC_API_DOCS_URL=http://prowler-api:8080/api/v1/docs
AUTH_TRUST_HOST=true
UI_PORT=3000
# openssl rand -base64 32
AUTH_SECRET="N/c6mnaS5+SWq81+819OrzQZlmx1Vxtp/orjttJSmw8="
# Google Tag Manager ID
NEXT_PUBLIC_GOOGLE_TAG_MANAGER_ID=""
#### Prowler API Configuration ####
PROWLER_API_VERSION="stable"
@@ -131,7 +127,7 @@ SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.7.5
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.6.0
# Social login credentials
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
@@ -143,7 +139,8 @@ SOCIAL_GITHUB_OAUTH_CLIENT_ID=""
SOCIAL_GITHUB_OAUTH_CLIENT_SECRET=""
# Single Sign-On (SSO)
SAML_SSO_CALLBACK_URL="${AUTH_URL}/api/auth/callback/saml"
SAML_PUBLIC_CERT=""
SAML_PRIVATE_KEY=""
# Lighthouse tracing
LANGSMITH_TRACING=false
-5
View File
@@ -27,11 +27,6 @@ provider/github:
- any-glob-to-any-file: "prowler/providers/github/**"
- any-glob-to-any-file: "tests/providers/github/**"
provider/iac:
- changed-files:
- any-glob-to-any-file: "prowler/providers/iac/**"
- any-glob-to-any-file: "tests/providers/iac/**"
github_actions:
- changed-files:
- any-glob-to-any-file: ".github/workflows/*"
@@ -6,7 +6,6 @@ on:
- "master"
paths:
- "api/**"
- "prowler/**"
- ".github/workflows/api-build-lint-push-containers.yml"
# Uncomment the code below to test this action on PRs
@@ -77,7 +76,7 @@ jobs:
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push container image (latest)
# Comment the following line for testing
+2 -2
View File
@@ -48,12 +48,12 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/init@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/api-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/analyze@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
with:
category: "/language:${{matrix.language}}"
+8 -3
View File
@@ -136,6 +136,12 @@ jobs:
run: |
poetry check --lock
- name: Prevents known compatibility error between lxml and libxml2/libxmlsec versions - https://github.com/xmlsec/python-xmlsec/issues/320
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
poetry run pip install --force-reinstall --no-binary lxml lxml
- name: Lint with ruff
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
@@ -163,9 +169,8 @@ jobs:
- name: Safety
working-directory: ./api
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
# 76352, 76353, 77323 come from SDK, but they cannot upgrade it yet. It does not affect API
run: |
poetry run safety check --ignore 70612,66963,74429,76352,76353,77323
poetry run safety check --ignore 70612,66963,74429
- name: Vulture
working-directory: ./api
@@ -205,7 +210,7 @@ jobs:
files_ignore: ${{ env.IGNORE_FILES }}
- name: Set up Docker Buildx
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
+1 -1
View File
@@ -11,7 +11,7 @@ jobs:
with:
fetch-depth: 0
- name: TruffleHog OSS
uses: trufflesecurity/trufflehog@6641d4ba5b684fffe195b9820345de1bf19f3181 # v3.89.2
uses: trufflesecurity/trufflehog@90694bf9af66e7536abc5824e7a87246dbf933cb # v3.88.35
with:
path: ./
base: ${{ github.event.repository.default_branch }}
@@ -1,215 +0,0 @@
name: Prowler Release Preparation
run-name: Prowler Release Preparation for ${{ inputs.prowler_version }}
on:
workflow_dispatch:
inputs:
prowler_version:
description: 'Prowler version to release (e.g., 5.9.0)'
required: true
type: string
env:
PROWLER_VERSION: ${{ github.event.inputs.prowler_version }}
jobs:
prepare-release:
if: github.repository == 'prowler-cloud/prowler'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Parse version and determine branch
run: |
# Validate version format (reusing pattern from sdk-bump-version.yml)
if [[ $PROWLER_VERSION =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)$ ]]; then
MAJOR_VERSION=${BASH_REMATCH[1]}
MINOR_VERSION=${BASH_REMATCH[2]}
PATCH_VERSION=${BASH_REMATCH[3]}
# Export version components to environment
echo "MAJOR_VERSION=${MAJOR_VERSION}" >> "${GITHUB_ENV}"
echo "MINOR_VERSION=${MINOR_VERSION}" >> "${GITHUB_ENV}"
echo "PATCH_VERSION=${PATCH_VERSION}" >> "${GITHUB_ENV}"
# Determine branch name (format: v5.9)
BRANCH_NAME="v${MAJOR_VERSION}.${MINOR_VERSION}"
echo "BRANCH_NAME=${BRANCH_NAME}" >> "${GITHUB_ENV}"
# Calculate UI version (1.X.X format - matches Prowler minor version)
UI_VERSION="1.${MINOR_VERSION}.${PATCH_VERSION}"
echo "UI_VERSION=${UI_VERSION}" >> "${GITHUB_ENV}"
# Calculate API version (1.X.X format - one minor version ahead)
API_MINOR_VERSION=$((MINOR_VERSION + 1))
API_VERSION="1.${API_MINOR_VERSION}.${PATCH_VERSION}"
echo "API_VERSION=${API_VERSION}" >> "${GITHUB_ENV}"
echo "Prowler version: $PROWLER_VERSION"
echo "Branch name: $BRANCH_NAME"
echo "UI version: $UI_VERSION"
echo "API version: $API_VERSION"
echo "Is minor release: $([ $PATCH_VERSION -eq 0 ] && echo 'true' || echo 'false')"
else
echo "Invalid version syntax: '$PROWLER_VERSION' (must be N.N.N)" >&2
exit 1
fi
- name: Checkout existing branch for patch release
if: ${{ env.PATCH_VERSION != '0' }}
run: |
echo "Patch release detected, checking out existing branch $BRANCH_NAME..."
if git show-ref --verify --quiet "refs/heads/$BRANCH_NAME"; then
echo "Branch $BRANCH_NAME exists locally, checking out..."
git checkout "$BRANCH_NAME"
elif git show-ref --verify --quiet "refs/remotes/origin/$BRANCH_NAME"; then
echo "Branch $BRANCH_NAME exists remotely, checking out..."
git checkout -b "$BRANCH_NAME" "origin/$BRANCH_NAME"
else
echo "ERROR: Branch $BRANCH_NAME should exist for patch release $PROWLER_VERSION"
exit 1
fi
- name: Verify version in pyproject.toml
run: |
CURRENT_VERSION=$(grep '^version = ' pyproject.toml | sed -E 's/version = "([^"]+)"/\1/' | tr -d '[:space:]')
PROWLER_VERSION_TRIMMED=$(echo "$PROWLER_VERSION" | tr -d '[:space:]')
if [ "$CURRENT_VERSION" != "$PROWLER_VERSION_TRIMMED" ]; then
echo "ERROR: Version mismatch in pyproject.toml (expected: '$PROWLER_VERSION_TRIMMED', found: '$CURRENT_VERSION')"
exit 1
fi
echo "✓ pyproject.toml version: $CURRENT_VERSION"
- name: Verify version in prowler/config/config.py
run: |
CURRENT_VERSION=$(grep '^prowler_version = ' prowler/config/config.py | sed -E 's/prowler_version = "([^"]+)"/\1/' | tr -d '[:space:]')
PROWLER_VERSION_TRIMMED=$(echo "$PROWLER_VERSION" | tr -d '[:space:]')
if [ "$CURRENT_VERSION" != "$PROWLER_VERSION_TRIMMED" ]; then
echo "ERROR: Version mismatch in prowler/config/config.py (expected: '$PROWLER_VERSION_TRIMMED', found: '$CURRENT_VERSION')"
exit 1
fi
echo "✓ prowler/config/config.py version: $CURRENT_VERSION"
- name: Verify version in api/pyproject.toml
run: |
CURRENT_API_VERSION=$(grep '^version = ' api/pyproject.toml | sed -E 's/version = "([^"]+)"/\1/' | tr -d '[:space:]')
API_VERSION_TRIMMED=$(echo "$API_VERSION" | tr -d '[:space:]')
if [ "$CURRENT_API_VERSION" != "$API_VERSION_TRIMMED" ]; then
echo "ERROR: API version mismatch in api/pyproject.toml (expected: '$API_VERSION_TRIMMED', found: '$CURRENT_API_VERSION')"
exit 1
fi
echo "✓ api/pyproject.toml version: $CURRENT_API_VERSION"
- name: Verify prowler dependency in api/pyproject.toml
run: |
CURRENT_PROWLER_REF=$(grep 'prowler @ git+https://github.com/prowler-cloud/prowler.git@' api/pyproject.toml | sed -E 's/.*@([^"]+)".*/\1/' | tr -d '[:space:]')
PROWLER_VERSION_TRIMMED=$(echo "$PROWLER_VERSION" | tr -d '[:space:]')
if [ "$CURRENT_PROWLER_REF" != "$PROWLER_VERSION_TRIMMED" ]; then
echo "ERROR: Prowler dependency mismatch in api/pyproject.toml (expected: '$PROWLER_VERSION_TRIMMED', found: '$CURRENT_PROWLER_REF')"
exit 1
fi
echo "✓ api/pyproject.toml prowler dependency: $CURRENT_PROWLER_REF"
- name: Verify version in api/src/backend/api/v1/views.py
run: |
CURRENT_API_VERSION=$(grep 'spectacular_settings.VERSION = ' api/src/backend/api/v1/views.py | sed -E 's/.*spectacular_settings.VERSION = "([^"]+)".*/\1/' | tr -d '[:space:]')
API_VERSION_TRIMMED=$(echo "$API_VERSION" | tr -d '[:space:]')
if [ "$CURRENT_API_VERSION" != "$API_VERSION_TRIMMED" ]; then
echo "ERROR: API version mismatch in views.py (expected: '$API_VERSION_TRIMMED', found: '$CURRENT_API_VERSION')"
exit 1
fi
echo "✓ api/src/backend/api/v1/views.py version: $CURRENT_API_VERSION"
- name: Create release branch for minor release
if: ${{ env.PATCH_VERSION == '0' }}
run: |
echo "Minor release detected (patch = 0), creating new branch $BRANCH_NAME..."
if git show-ref --verify --quiet "refs/heads/$BRANCH_NAME" || git show-ref --verify --quiet "refs/remotes/origin/$BRANCH_NAME"; then
echo "ERROR: Branch $BRANCH_NAME already exists for minor release $PROWLER_VERSION"
exit 1
fi
git checkout -b "$BRANCH_NAME"
- name: Extract changelog entries
run: |
set -e
# Function to extract changelog for a specific version
extract_changelog() {
local file="$1"
local version="$2"
local output_file="$3"
if [ ! -f "$file" ]; then
echo "Warning: $file not found, skipping..."
touch "$output_file"
return
fi
# Extract changelog section for this version
awk -v version="$version" '
/^## \[v?'"$version"'\]/ { found=1; next }
found && /^## \[v?[0-9]+\.[0-9]+\.[0-9]+\]/ { found=0 }
found && !/^## \[v?'"$version"'\]/ { print }
' "$file" > "$output_file"
# Remove --- separators
sed -i '/^---$/d' "$output_file"
# Remove trailing empty lines
sed -i '/^$/d' "$output_file"
}
# Extract changelogs
echo "Extracting changelog entries..."
extract_changelog "prowler/CHANGELOG.md" "$PROWLER_VERSION" "prowler_changelog.md"
extract_changelog "api/CHANGELOG.md" "$API_VERSION" "api_changelog.md"
extract_changelog "ui/CHANGELOG.md" "$UI_VERSION" "ui_changelog.md"
# Combine changelogs in order: UI, API, SDK
> combined_changelog.md
if [ -s "ui_changelog.md" ]; then
echo "## UI" >> combined_changelog.md
echo "" >> combined_changelog.md
cat ui_changelog.md >> combined_changelog.md
echo "" >> combined_changelog.md
fi
if [ -s "api_changelog.md" ]; then
echo "## API" >> combined_changelog.md
echo "" >> combined_changelog.md
cat api_changelog.md >> combined_changelog.md
echo "" >> combined_changelog.md
fi
if [ -s "prowler_changelog.md" ]; then
echo "## SDK" >> combined_changelog.md
echo "" >> combined_changelog.md
cat prowler_changelog.md >> combined_changelog.md
echo "" >> combined_changelog.md
fi
echo "Combined changelog preview:"
cat combined_changelog.md
- name: Create draft release
uses: softprops/action-gh-release@72f2c25fcb47643c292f7107632f7a47c1df5cd8 # v2.3.2
with:
tag_name: ${{ env.PROWLER_VERSION }}
name: Prowler ${{ env.PROWLER_VERSION }}
body_path: combined_changelog.md
draft: true
target_commitish: ${{ env.PATCH_VERSION == '0' && 'master' || env.BRANCH_NAME }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Clean up temporary files
run: |
rm -f prowler_changelog.md api_changelog.md ui_changelog.md combined_changelog.md
@@ -1,86 +0,0 @@
name: Check Changelog
on:
pull_request:
types: [opened, synchronize, reopened, labeled, unlabeled]
jobs:
check-changelog:
if: contains(github.event.pull_request.labels.*.name, 'no-changelog') == false
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
pull-requests: write
env:
MONITORED_FOLDERS: "api ui prowler"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Get list of changed files
id: changed_files
run: |
git fetch origin ${{ github.base_ref }}
git diff --name-only origin/${{ github.base_ref }}...HEAD > changed_files.txt
cat changed_files.txt
- name: Check for folder changes and changelog presence
id: check_folders
run: |
missing_changelogs=""
for folder in $MONITORED_FOLDERS; do
if grep -q "^${folder}/" changed_files.txt; then
echo "Detected changes in ${folder}/"
if ! grep -q "^${folder}/CHANGELOG.md$" changed_files.txt; then
echo "No changelog update found for ${folder}/"
missing_changelogs="${missing_changelogs}- \`${folder}\`\n"
fi
fi
done
echo "missing_changelogs<<EOF" >> $GITHUB_OUTPUT
echo -e "${missing_changelogs}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Find existing changelog comment
if: github.event.pull_request.head.repo.full_name == github.repository
id: find_comment
uses: peter-evans/find-comment@3eae4d37986fb5a8592848f6a574fdf654e61f9e #v3.1.0
with:
issue-number: ${{ github.event.pull_request.number }}
comment-author: 'github-actions[bot]'
body-includes: '<!-- changelog-check -->'
- name: Comment on PR if changelog is missing
if: github.event.pull_request.head.repo.full_name == github.repository && steps.check_folders.outputs.missing_changelogs != ''
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
issue-number: ${{ github.event.pull_request.number }}
comment-id: ${{ steps.find_comment.outputs.comment-id }}
body: |
<!-- changelog-check -->
⚠️ **Changes detected in the following folders without a corresponding update to the `CHANGELOG.md`:**
${{ steps.check_folders.outputs.missing_changelogs }}
Please add an entry to the corresponding `CHANGELOG.md` file to maintain a clear history of changes.
- name: Comment on PR if all changelogs are present
if: github.event.pull_request.head.repo.full_name == github.repository && steps.check_folders.outputs.missing_changelogs == ''
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
issue-number: ${{ github.event.pull_request.number }}
comment-id: ${{ steps.find_comment.outputs.comment-id }}
body: |
<!-- changelog-check -->
✅ All necessary `CHANGELOG.md` files have been updated. Great job! 🎉
- name: Fail if changelog is missing
if: steps.check_folders.outputs.missing_changelogs != ''
run: |
echo "ERROR: Missing changelog updates in some folders."
exit 1
@@ -123,7 +123,7 @@ jobs:
AWS_REGION: ${{ env.AWS_REGION }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push container image (latest)
if: github.event_name == 'push'
-2
View File
@@ -97,7 +97,6 @@ jobs:
commit-message: "chore(release): Bump version to v${{ env.BUMP_VERSION_TO }}"
branch: "version-bump-to-v${{ env.BUMP_VERSION_TO }}"
title: "chore(release): Bump version to v${{ env.BUMP_VERSION_TO }}"
labels: no-changelog
body: |
### Description
@@ -136,7 +135,6 @@ jobs:
commit-message: "chore(release): Bump version to v${{ env.PATCH_VERSION_TO }}"
branch: "version-bump-to-v${{ env.PATCH_VERSION_TO }}"
title: "chore(release): Bump version to v${{ env.PATCH_VERSION_TO }}"
labels: no-changelog
body: |
### Description
+2 -2
View File
@@ -56,12 +56,12 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/init@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/sdk-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/analyze@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
with:
category: "/language:${{matrix.language}}"
+1 -23
View File
@@ -102,15 +102,8 @@ jobs:
run: |
poetry run vulture --exclude "contrib,api,ui" --min-confidence 100 .
- name: Dockerfile - Check if Dockerfile has changed
id: dockerfile-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
Dockerfile
- name: Hadolint
if: steps.dockerfile-changed-files.outputs.any_changed == 'true'
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
run: |
/tmp/hadolint Dockerfile --ignore=DL3013
@@ -219,21 +212,6 @@ jobs:
run: |
poetry run pytest -n auto --cov=./prowler/providers/m365 --cov-report=xml:m365_coverage.xml tests/providers/m365
# Test IaC
- name: IaC - Check if any file has changed
id: iac-changed-files
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
with:
files: |
./prowler/providers/iac/**
./tests/providers/iac/**
.poetry.lock
- name: IaC - Test
if: steps.iac-changed-files.outputs.any_changed == 'true'
run: |
poetry run pytest -n auto --cov=./prowler/providers/iac --cov-report=xml:iac_coverage.xml tests/providers/iac
# Common Tests
- name: Lib - Test
if: steps.are-non-ignored-files-changed.outputs.any_changed == 'true'
@@ -30,7 +30,6 @@ env:
# Container Registries
PROWLERCLOUD_DOCKERHUB_REPOSITORY: prowlercloud
PROWLERCLOUD_DOCKERHUB_IMAGE: prowler-ui
NEXT_PUBLIC_API_BASE_URL: http://prowler-api:8080/api/v1
jobs:
repository-check:
@@ -77,7 +76,7 @@ jobs:
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push container image (latest)
# Comment the following line for testing
@@ -87,7 +86,6 @@ jobs:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=${{ env.SHORT_SHA }}
NEXT_PUBLIC_API_BASE_URL=${{ env.NEXT_PUBLIC_API_BASE_URL }}
# Set push: false for testing
push: true
tags: |
@@ -103,7 +101,6 @@ jobs:
context: ${{ env.WORKING_DIRECTORY }}
build-args: |
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v${{ env.RELEASE_TAG }}
NEXT_PUBLIC_API_BASE_URL=${{ env.NEXT_PUBLIC_API_BASE_URL }}
push: true
tags: |
${{ env.PROWLERCLOUD_DOCKERHUB_REPOSITORY }}/${{ env.PROWLERCLOUD_DOCKERHUB_IMAGE }}:${{ env.RELEASE_TAG }}
+2 -2
View File
@@ -48,12 +48,12 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/init@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql/ui-codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/analyze@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
with:
category: "/language:${{matrix.language}}"
+2 -51
View File
@@ -34,70 +34,21 @@ jobs:
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
cache-dependency-path: './ui/package-lock.json'
- name: Install dependencies
working-directory: ./ui
run: npm ci
run: npm install
- name: Run Healthcheck
working-directory: ./ui
run: npm run healthcheck
- name: Build the application
working-directory: ./ui
run: npm run build
e2e-tests:
runs-on: ubuntu-latest
env:
AUTH_SECRET: 'fallback-ci-secret-for-testing'
AUTH_TRUST_HOST: true
NEXTAUTH_URL: http://localhost:3000
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: '20.x'
cache: 'npm'
cache-dependency-path: './ui/package-lock.json'
- name: Install dependencies
working-directory: ./ui
run: npm ci
- name: Cache Playwright browsers
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
id: playwright-cache
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ hashFiles('ui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-playwright-
- name: Install Playwright browsers
working-directory: ./ui
if: steps.playwright-cache.outputs.cache-hit != 'true'
run: npm run test:e2e:install
- name: Build the application
working-directory: ./ui
run: npm run build
- name: Run Playwright tests
working-directory: ./ui
run: npm run test:e2e
- name: Upload Playwright report
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
if: failure()
with:
name: playwright-report
path: ui/playwright-report/
retention-days: 30
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build Container
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
with:
-10
View File
@@ -44,16 +44,6 @@ junit-reports/
# Cursor files
.cursorignore
.cursor/
# RooCode files
.roo/
.rooignore
.roomodes
# Cline files
.cline/
.clineignore
# Terraform
.terraform*
+1 -1
View File
@@ -115,7 +115,7 @@ repos:
- id: safety
name: safety
description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities"
entry: bash -c 'safety check --ignore 70612,66963,74429,76352,76353'
entry: bash -c 'safety check --ignore 70612,66963,74429'
language: system
- id: vulture
+6 -3
View File
@@ -1,4 +1,4 @@
FROM python:3.12.11-slim-bookworm AS build
FROM python:3.12.10-slim-bookworm AS build
LABEL maintainer="https://github.com/prowler-cloud/prowler"
LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
@@ -6,8 +6,7 @@ LABEL org.opencontainers.image.source="https://github.com/prowler-cloud/prowler"
ARG POWERSHELL_VERSION=7.5.0
# hadolint ignore=DL3008
RUN apt-get update && apt-get install -y --no-install-recommends \
wget libicu72 libunwind8 libssl3 libcurl4 ca-certificates apt-transport-https gnupg \
RUN apt-get update && apt-get install -y --no-install-recommends wget libicu72 \
&& rm -rf /var/lib/apt/lists/*
# Install PowerShell
@@ -47,6 +46,10 @@ ENV PATH="${HOME}/.local/bin:${PATH}"
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry
# By default poetry does not compile Python source files to bytecode during installation.
# This speeds up the installation process, but the first execution may take a little more
# time because Python then compiles source files to bytecode automatically. If you want to
# compile source files to bytecode during installation, you can use the --compile option
RUN poetry install --compile && \
rm -rf ~/.cache/pip
+1 -9
View File
@@ -91,7 +91,7 @@ prowler dashboard
| Azure | 142 | 18 | 10 | 3 |
| Kubernetes | 83 | 7 | 5 | 7 |
| GitHub | 16 | 2 | 1 | 0 |
| M365 | 69 | 7 | 3 | 2 |
| M365 | 69 | 7 | 2 | 2 |
| NHN (Unofficial) | 6 | 2 | 1 | 0 |
> [!Note]
@@ -136,14 +136,6 @@ If your workstation's architecture is incompatible, you can resolve this by:
> Once configured, access the Prowler App at http://localhost:3000. Sign up using your email and password to get started.
### Common Issues with Docker Pull Installation
> [!Note]
If you want to use AWS role assumption (e.g., with the "Connect assuming IAM Role" option), you may need to mount your local `.aws` directory into the container as a volume (e.g., `- "${HOME}/.aws:/home/prowler/.aws:ro"`). There are several ways to configure credentials for Docker containers. See the [Troubleshooting](./docs/troubleshooting.md) section for more details and examples.
You can find more information in the [Troubleshooting](./docs/troubleshooting.md) section.
### From GitHub
**Requirements**
+168
View File
@@ -0,0 +1,168 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.pyc
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
/_data/
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
*.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# VSCode
.vscode/
+91
View File
@@ -0,0 +1,91 @@
repos:
## GENERAL
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-merge-conflict
- id: check-yaml
args: ["--unsafe"]
- id: check-json
- id: end-of-file-fixer
- id: trailing-whitespace
- id: no-commit-to-branch
- id: pretty-format-json
args: ["--autofix", "--no-sort-keys", "--no-ensure-ascii"]
exclude: 'src/backend/api/fixtures/dev/.*\.json$'
## TOML
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.13.0
hooks:
- id: pretty-format-toml
args: [--autofix]
files: pyproject.toml
## BASH
- repo: https://github.com/koalaman/shellcheck-precommit
rev: v0.10.0
hooks:
- id: shellcheck
exclude: contrib
## PYTHON
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.0
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check
args: ["--directory=src"]
- id: poetry-lock
args: ["--no-update", "--directory=src"]
- repo: https://github.com/hadolint/hadolint
rev: v2.13.0-beta
hooks:
- id: hadolint
args: ["--ignore=DL3013", "Dockerfile"]
- repo: local
hooks:
- id: pylint
name: pylint
entry: bash -c 'poetry run pylint --disable=W,C,R,E -j 0 -rn -sn src/'
language: system
files: '.*\.py'
- id: trufflehog
name: TruffleHog
description: Detect secrets in your data.
entry: bash -c 'trufflehog --no-update git file://. --only-verified --fail'
# For running trufflehog in docker, use the following entry instead:
# entry: bash -c 'docker run -v "$(pwd):/workdir" -i --rm trufflesecurity/trufflehog:latest git file:///workdir --only-verified --fail'
language: system
stages: ["commit", "push"]
- id: bandit
name: bandit
description: "Bandit is a tool for finding common security issues in Python code"
entry: bash -c 'poetry run bandit -q -lll -x '*_test.py,./contrib/,./.venv/' -r .'
language: system
files: '.*\.py'
- id: safety
name: safety
description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities"
entry: bash -c 'poetry run safety check --ignore 70612,66963,74429'
language: system
- id: vulture
name: vulture
description: "Vulture finds unused code in Python programs."
entry: bash -c 'poetry run vulture --exclude "contrib,.venv,tests,conftest.py" --min-confidence 100 .'
language: system
files: '.*\.py'
+2 -46
View File
@@ -2,60 +2,16 @@
All notable changes to the **Prowler API** are documented in this file.
## [v1.10.0] (Prowler v5.9.0)
### Added
- SSO with SAML support [(#8175)](https://github.com/prowler-cloud/prowler/pull/8175)
- `GET /resources/metadata`, `GET /resources/metadata/latest` and `GET /resources/latest` to expose resource metadata and latest scan results [(#8112)](https://github.com/prowler-cloud/prowler/pull/8112)
### Changed
- `/processors` endpoints to post-process findings. Currently, only the Mutelist processor is supported to allow to mute findings.
- Optimized the underlying queries for resources endpoints [(#8112)](https://github.com/prowler-cloud/prowler/pull/8112)
- Optimized include parameters for resources view [(#8229)](https://github.com/prowler-cloud/prowler/pull/8229)
- Optimized overview background tasks [(#8300)](https://github.com/prowler-cloud/prowler/pull/8300)
### Fixed
- Search filter for findings and resources [(#8112)](https://github.com/prowler-cloud/prowler/pull/8112)
- RBAC is now applied to `GET /overviews/providers` [(#8277)](https://github.com/prowler-cloud/prowler/pull/8277)
### Changed
- `POST /schedules/daily` returns a `409 CONFLICT` if already created [(#8258)](https://github.com/prowler-cloud/prowler/pull/8258)
### Security
- Enhanced password validation to enforce 12+ character passwords with special characters, uppercase, lowercase, and numbers [(#8225)](https://github.com/prowler-cloud/prowler/pull/8225)
---
## [v1.9.1] (Prowler v5.8.1)
### Added
- Custom exception for provider connection errors during scans [(#8234)](https://github.com/prowler-cloud/prowler/pull/8234)
### Changed
- Summary and overview tasks now use a dedicated queue and no longer propagate errors to compliance tasks [(#8214)](https://github.com/prowler-cloud/prowler/pull/8214)
### Fixed
- Scan with no resources will not trigger legacy code for findings metadata [(#8183)](https://github.com/prowler-cloud/prowler/pull/8183)
- Invitation email comparison case-insensitive [(#8206)](https://github.com/prowler-cloud/prowler/pull/8206)
### Removed
- Validation of the provider's secret type during updates [(#8197)](https://github.com/prowler-cloud/prowler/pull/8197)
---
## [v1.9.0] (Prowler v5.8.0)
## [v1.9.0] (Prowler UNRELEASED)
### Added
- SSO with SAML support [(#7822)](https://github.com/prowler-cloud/prowler/pull/7822)
- Support GCP Service Account key [(#7824)](https://github.com/prowler-cloud/prowler/pull/7824)
- `GET /compliance-overviews` endpoints to retrieve compliance metadata and specific requirements statuses [(#7877)](https://github.com/prowler-cloud/prowler/pull/7877)
- Lighthouse configuration support [(#7848)](https://github.com/prowler-cloud/prowler/pull/7848)
### Changed
- Reworked `GET /compliance-overviews` to return proper requirement metrics [(#7877)](https://github.com/prowler-cloud/prowler/pull/7877)
- Optional `user` and `password` for M365 provider [(#7992)](https://github.com/prowler-cloud/prowler/pull/7992)
### Fixed
- Scheduled scans are no longer deleted when their daily schedule run is disabled [(#8082)](https://github.com/prowler-cloud/prowler/pull/8082)
---
+4
View File
@@ -57,6 +57,10 @@ RUN poetry install --no-root && \
RUN poetry run python "$(poetry env info --path)/src/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py"
# Prevents known compatibility error between lxml and libxml2/libxmlsec versions.
# See: https://github.com/xmlsec/python-xmlsec/issues/320
RUN poetry run pip install --force-reinstall --no-binary lxml lxml
COPY src/backend/ ./backend/
COPY docker-entrypoint.sh ./docker-entrypoint.sh
+1 -1
View File
@@ -257,7 +257,7 @@ cd src/backend
python manage.py loaddata api/fixtures/0_dev_users.json --database admin
```
> The default credentials are `dev@prowler.com:Thisisapassword123@` or `dev2@prowler.com:Thisisapassword123@`
> The default credentials are `dev@prowler.com:thisisapassword123` or `dev2@prowler.com:thisisapassword123`
## Run tests
+1 -1
View File
@@ -32,7 +32,7 @@ start_prod_server() {
start_worker() {
echo "Starting the worker..."
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans,scan-reports,deletion,backfill,overview -E --max-tasks-per-child 1
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans,scan-reports,deletion,backfill -E --max-tasks-per-child 1
}
start_worker_beat() {
+419 -1772
View File
File diff suppressed because it is too large Load Diff
+3 -5
View File
@@ -23,14 +23,12 @@ dependencies = [
"drf-spectacular==0.27.2",
"drf-spectacular-jsonapi==0.5.1",
"gunicorn==23.0.0",
"lxml==5.3.2",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.9",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (>=1.0.1,<2.0.0)",
"sentry-sdk[django] (>=2.20.0,<3.0.0)",
"uuid6==2024.7.10",
"openai (>=1.82.0,<2.0.0)",
"xmlsec==1.3.14"
"openai (>=1.82.0,<2.0.0)"
]
description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
@@ -38,7 +36,7 @@ name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
version = "1.10.0"
version = "1.9.0"
[project.scripts]
celery = "src.backend.config.settings.celery"
+58 -6
View File
@@ -3,7 +3,14 @@ from django.db import transaction
from api.db_router import MainRouter
from api.db_utils import rls_transaction
from api.models import Membership, Role, Tenant, User, UserRoleRelationship
from api.models import (
Membership,
Role,
SAMLConfiguration,
Tenant,
User,
UserRoleRelationship,
)
class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
@@ -17,7 +24,7 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
def pre_social_login(self, request, sociallogin):
# Link existing accounts with the same email address
email = sociallogin.account.extra_data.get("email")
if sociallogin.provider.id == "saml":
if sociallogin.account.provider == "saml":
email = sociallogin.user.email
if email:
existing_user = self.get_user_by_email(email)
@@ -31,10 +38,57 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
"""
with transaction.atomic(using=MainRouter.admin_db):
user = super().save_user(request, sociallogin, form)
provider = sociallogin.provider.id
provider = sociallogin.account.provider
extra = sociallogin.account.extra_data
if provider != "saml":
if provider == "saml":
# Handle SAML-specific logic
user.first_name = extra.get("firstName", [""])[0]
user.last_name = extra.get("lastName", [""])[0]
user.company_name = extra.get("organization", [""])[0]
user.name = f"{user.first_name} {user.last_name}".strip()
user.save(using=MainRouter.admin_db)
email_domain = user.email.split("@")[-1]
tenant = (
SAMLConfiguration.objects.using(MainRouter.admin_db)
.get(email_domain=email_domain)
.tenant
)
with rls_transaction(str(tenant.id)):
role_name = extra.get("userType", ["saml_default_role"])[0].strip()
try:
role = Role.objects.using(MainRouter.admin_db).get(
name=role_name, tenant_id=tenant.id
)
except Role.DoesNotExist:
role = Role.objects.using(MainRouter.admin_db).create(
name=role_name,
tenant_id=tenant.id,
manage_users=False,
manage_account=False,
manage_billing=False,
manage_providers=False,
manage_integrations=False,
manage_scans=False,
unlimited_visibility=False,
)
Membership.objects.using(MainRouter.admin_db).create(
user=user,
tenant=tenant,
role=Membership.RoleChoices.MEMBER,
)
UserRoleRelationship.objects.using(MainRouter.admin_db).create(
user=user,
role=role,
tenant_id=tenant.id,
)
else:
# Handle other providers (e.g., GitHub, Google)
user.save(using=MainRouter.admin_db)
social_account_name = extra.get("name")
@@ -65,7 +119,5 @@ class ProwlerSocialAccountAdapter(DefaultSocialAccountAdapter):
role=role,
tenant_id=tenant.id,
)
else:
request.session["saml_user_created"] = str(user.id)
return user
-35
View File
@@ -175,29 +175,6 @@ def create_objects_in_batches(
model.objects.bulk_create(chunk, batch_size)
def update_objects_in_batches(
tenant_id: str, model, objects: list, fields: list, batch_size: int = 500
):
"""
Bulk-update model instances in repeated, per-tenant RLS transactions.
All chunks execute in their own transaction, so no single transaction
grows too large.
Args:
tenant_id (str): UUID string of the tenant under which to set RLS.
model: Django model class whose `.objects.bulk_update()` will be called.
objects (list): List of model instances (saved) to bulk-update.
fields (list): List of field names to update.
batch_size (int): Maximum number of objects per bulk_update call.
"""
total = len(objects)
for start in range(0, total, batch_size):
chunk = objects[start : start + batch_size]
with rls_transaction(value=tenant_id, parameter=POSTGRES_TENANT_VAR):
model.objects.bulk_update(chunk, fields, batch_size)
# Postgres Enums
@@ -552,15 +529,3 @@ class IntegrationTypeEnum(EnumType):
class IntegrationTypeEnumField(PostgresEnumField):
def __init__(self, *args, **kwargs):
super().__init__("integration_type", *args, **kwargs)
# Postgres enum definition for Processor type
class ProcessorTypeEnum(EnumType):
enum_type_name = "processor_type"
class ProcessorTypeEnumField(PostgresEnumField):
def __init__(self, *args, **kwargs):
super().__init__("processor_type", *args, **kwargs)
-23
View File
@@ -57,11 +57,6 @@ class TaskInProgressException(TaskManagementError):
super().__init__()
# Provider connection errors
class ProviderConnectionError(Exception):
"""Base exception for provider connection errors."""
def custom_exception_handler(exc, context):
if isinstance(exc, django_validation_error):
if hasattr(exc, "error_dict"):
@@ -78,21 +73,3 @@ def custom_exception_handler(exc, context):
message_item["message"] for message_item in exc.detail["messages"]
]
return exception_handler(exc, context)
class ConflictException(APIException):
status_code = status.HTTP_409_CONFLICT
default_detail = "A conflict occurred. The resource already exists."
default_code = "conflict"
def __init__(self, detail=None, code=None, pointer=None):
error_detail = {
"detail": detail or self.default_detail,
"status": self.status_code,
"code": self.default_code,
}
if pointer:
error_detail["source"] = {"pointer": pointer}
super().__init__(detail=[error_detail])
-89
View File
@@ -1,6 +1,5 @@
from datetime import date, datetime, timedelta, timezone
from dateutil.parser import parse
from django.conf import settings
from django.db.models import Q
from django_filters.rest_framework import (
@@ -29,7 +28,6 @@ from api.models import (
Invitation,
Membership,
PermissionChoices,
Processor,
Provider,
ProviderGroup,
ProviderSecret,
@@ -340,8 +338,6 @@ class ResourceFilter(ProviderRelationshipFilterSet):
tags = CharFilter(method="filter_tag")
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
updated_at = DateFilter(field_name="updated_at", lookup_expr="date")
scan = UUIDFilter(field_name="provider__scan", lookup_expr="exact")
scan__in = UUIDInFilter(field_name="provider__scan", lookup_expr="in")
class Meta:
model = Resource
@@ -356,82 +352,6 @@ class ResourceFilter(ProviderRelationshipFilterSet):
"updated_at": ["gte", "lte"],
}
def filter_queryset(self, queryset):
if not (self.data.get("scan") or self.data.get("scan__in")) and not (
self.data.get("updated_at")
or self.data.get("updated_at__date")
or self.data.get("updated_at__gte")
or self.data.get("updated_at__lte")
):
raise ValidationError(
[
{
"detail": "At least one date filter is required: filter[updated_at], filter[updated_at.gte], "
"or filter[updated_at.lte].",
"status": 400,
"source": {"pointer": "/data/attributes/updated_at"},
"code": "required",
}
]
)
gte_date = (
parse(self.data.get("updated_at__gte")).date()
if self.data.get("updated_at__gte")
else datetime.now(timezone.utc).date()
)
lte_date = (
parse(self.data.get("updated_at__lte")).date()
if self.data.get("updated_at__lte")
else datetime.now(timezone.utc).date()
)
if abs(lte_date - gte_date) > timedelta(
days=settings.FINDINGS_MAX_DAYS_IN_RANGE
):
raise ValidationError(
[
{
"detail": f"The date range cannot exceed {settings.FINDINGS_MAX_DAYS_IN_RANGE} days.",
"status": 400,
"source": {"pointer": "/data/attributes/updated_at"},
"code": "invalid",
}
]
)
return super().filter_queryset(queryset)
def filter_tag_key(self, queryset, name, value):
return queryset.filter(Q(tags__key=value) | Q(tags__key__icontains=value))
def filter_tag_value(self, queryset, name, value):
return queryset.filter(Q(tags__value=value) | Q(tags__value__icontains=value))
def filter_tag(self, queryset, name, value):
# We won't know what the user wants to filter on just based on the value,
# and we don't want to build special filtering logic for every possible
# provider tag spec, so we'll just do a full text search
return queryset.filter(tags__text_search=value)
class LatestResourceFilter(ProviderRelationshipFilterSet):
tag_key = CharFilter(method="filter_tag_key")
tag_value = CharFilter(method="filter_tag_value")
tag = CharFilter(method="filter_tag")
tags = CharFilter(method="filter_tag")
class Meta:
model = Resource
fields = {
"provider": ["exact", "in"],
"uid": ["exact", "icontains"],
"name": ["exact", "icontains"],
"region": ["exact", "icontains", "in"],
"service": ["exact", "icontains", "in"],
"type": ["exact", "icontains", "in"],
}
def filter_tag_key(self, queryset, name, value):
return queryset.filter(Q(tags__key=value) | Q(tags__key__icontains=value))
@@ -784,12 +704,3 @@ class IntegrationFilter(FilterSet):
fields = {
"inserted_at": ["date", "gte", "lte"],
}
class ProcessorFilter(FilterSet):
processor_type = ChoiceFilter(choices=Processor.ProcessorChoices.choices)
processor_type__in = ChoiceInFilter(
choices=Processor.ProcessorChoices.choices,
field_name="processor_type",
lookup_expr="in",
)
@@ -3,7 +3,7 @@
"model": "api.user",
"pk": "8b38e2eb-6689-4f1e-a4ba-95b275130200",
"fields": {
"password": "pbkdf2_sha256$870000$Z63pGJ7nre48hfcGbk5S0O$rQpKczAmijs96xa+gPVJifpT3Fetb8DOusl5Eq6gxac=",
"password": "pbkdf2_sha256$720000$vA62S78kog2c2ytycVQdke$Fp35GVLLMyy5fUq3krSL9I02A+ocQ+RVa4S22LIAO5s=",
"last_login": null,
"name": "Devie Prowlerson",
"email": "dev@prowler.com",
@@ -16,7 +16,7 @@
"model": "api.user",
"pk": "b6493a3a-c997-489b-8b99-278bf74de9f6",
"fields": {
"password": "pbkdf2_sha256$870000$Z63pGJ7nre48hfcGbk5S0O$rQpKczAmijs96xa+gPVJifpT3Fetb8DOusl5Eq6gxac=",
"password": "pbkdf2_sha256$720000$vA62S78kog2c2ytycVQdke$Fp35GVLLMyy5fUq3krSL9I02A+ocQ+RVa4S22LIAO5s=",
"last_login": null,
"name": "Devietoo Prowlerson",
"email": "dev2@prowler.com",
@@ -1,61 +1,57 @@
# Generated by Django 5.1.10 on 2025-07-02 15:47
# Generated by Django 5.1.8 on 2025-05-15 09:54
import uuid
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
import api.db_utils
import api.rls
class Migration(migrations.Migration):
dependencies = [
("api", "0031_scan_disable_on_cascade_periodic_tasks"),
("api", "0029_findings_check_index_parent"),
]
operations = [
migrations.AlterField(
model_name="integration",
name="integration_type",
field=api.db_utils.IntegrationTypeEnumField(
choices=[
("amazon_s3", "Amazon S3"),
("aws_security_hub", "AWS Security Hub"),
("jira", "JIRA"),
("slack", "Slack"),
]
),
),
migrations.CreateModel(
name="SAMLToken",
name="SAMLDomainIndex",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
("expires_at", models.DateTimeField(editable=False)),
("token", models.JSONField(unique=True)),
("email_domain", models.CharField(max_length=254, unique=True)),
(
"user",
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "saml_tokens",
"db_table": "saml_domain_index",
},
),
migrations.AddConstraint(
model_name="samldomainindex",
constraint=models.UniqueConstraint(
fields=("email_domain", "tenant"),
name="unique_resources_by_email_domain",
),
),
migrations.AddConstraint(
model_name="samldomainindex",
constraint=api.rls.BaseSecurityConstraint(
name="statements_on_samldomainindex",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
migrations.CreateModel(
name="SAMLConfiguration",
fields=[
@@ -109,42 +105,16 @@ class Migration(migrations.Migration):
fields=("tenant",), name="unique_samlconfig_per_tenant"
),
),
migrations.CreateModel(
name="SAMLDomainIndex",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("email_domain", models.CharField(max_length=254, unique=True)),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "saml_domain_index",
},
),
migrations.AddConstraint(
model_name="samldomainindex",
constraint=models.UniqueConstraint(
fields=("email_domain", "tenant"),
name="unique_resources_by_email_domain",
),
),
migrations.AddConstraint(
model_name="samldomainindex",
constraint=api.rls.BaseSecurityConstraint(
name="statements_on_samldomainindex",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
migrations.AlterField(
model_name="integration",
name="integration_type",
field=api.db_utils.IntegrationTypeEnumField(
choices=[
("amazon_s3", "Amazon S3"),
("aws_security_hub", "AWS Security Hub"),
("jira", "JIRA"),
("slack", "Slack"),
]
),
),
]
@@ -11,7 +11,7 @@ import api.rls
class Migration(migrations.Migration):
dependencies = [
("api", "0029_findings_check_index_parent"),
("api", "0030_samlconfigurations"),
]
operations = [
@@ -54,7 +54,6 @@ class Migration(migrations.Migration):
("gpt-4o-mini-2024-07-18", "GPT-4o Mini v2024-07-18"),
("gpt-4o-mini", "GPT-4o Mini Default"),
],
default="gpt-4o-2024-08-06",
help_text="Must be one of the supported model names",
max_length=50,
),
@@ -1,24 +0,0 @@
# Generated by Django 5.1.10 on 2025-06-23 10:04
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0030_lighthouseconfiguration"),
("django_celery_beat", "0019_alter_periodictasks_options"),
]
operations = [
migrations.AlterField(
model_name="scan",
name="scheduler_task",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="django_celery_beat.periodictask",
),
),
]
@@ -1,34 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-03 15:46
from functools import partial
from django.db import migrations
from api.db_utils import PostgresEnumMigration, ProcessorTypeEnum, register_enum
from api.models import Processor
ProcessorTypeEnumMigration = PostgresEnumMigration(
enum_name="processor_type",
enum_values=tuple(
processor_type[0] for processor_type in Processor.ProcessorChoices.choices
),
)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0032_saml"),
]
operations = [
migrations.RunPython(
ProcessorTypeEnumMigration.create_enum_type,
reverse_code=ProcessorTypeEnumMigration.drop_enum_type,
),
migrations.RunPython(
partial(register_enum, enum_class=ProcessorTypeEnum),
reverse_code=migrations.RunPython.noop,
),
]
@@ -1,88 +0,0 @@
# Generated by Django 5.1.5 on 2025-03-26 13:04
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
from api.rls import RowLevelSecurityConstraint
class Migration(migrations.Migration):
dependencies = [
("api", "0033_processors_enum"),
]
operations = [
migrations.CreateModel(
name="Processor",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"processor_type",
api.db_utils.ProcessorTypeEnumField(
choices=[("mutelist", "Mutelist")]
),
),
("configuration", models.JSONField(default=dict)),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "processors",
"abstract": False,
"indexes": [
models.Index(
fields=["tenant_id", "id"], name="processor_tenant_id_idx"
),
models.Index(
fields=["tenant_id", "processor_type"],
name="processor_tenant_type_idx",
),
],
},
),
migrations.AddConstraint(
model_name="processor",
constraint=models.UniqueConstraint(
fields=("tenant_id", "processor_type"),
name="unique_processor_types_tenant",
),
),
migrations.AddConstraint(
model_name="processor",
constraint=RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_processor",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
migrations.AddField(
model_name="scan",
name="processor",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="scans",
related_query_name="scan",
to="api.processor",
),
),
]
@@ -1,22 +0,0 @@
import django.core.validators
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0034_processors"),
]
operations = [
migrations.AddField(
model_name="finding",
name="muted_reason",
field=models.TextField(
blank=True,
max_length=500,
null=True,
validators=[django.core.validators.MinLengthValidator(3)],
),
),
]
@@ -1,30 +0,0 @@
from functools import partial
from django.db import migrations
from api.db_utils import create_index_on_partitions, drop_index_on_partitions
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0035_finding_muted_reason"),
]
operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="resource_finding_mappings",
index_name="rfm_tenant_finding_idx",
columns="tenant_id, finding_id",
method="BTREE",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="resource_finding_mappings",
index_name="rfm_tenant_finding_idx",
),
),
]
@@ -1,17 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0036_rfm_tenant_finding_index_partitions"),
]
operations = [
migrations.AddIndex(
model_name="resourcefindingmapping",
index=models.Index(
fields=["tenant_id", "finding_id"],
name="rfm_tenant_finding_idx",
),
),
]
@@ -1,15 +0,0 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0037_rfm_tenant_finding_index_parent"),
]
operations = [
migrations.AddField(
model_name="resource",
name="failed_findings_count",
field=models.IntegerField(default=0),
)
]
@@ -1,20 +0,0 @@
from django.contrib.postgres.operations import AddIndexConcurrently
from django.db import migrations, models
class Migration(migrations.Migration):
atomic = False
dependencies = [
("api", "0038_resource_failed_findings_count"),
]
operations = [
AddIndexConcurrently(
model_name="resource",
index=models.Index(
fields=["tenant_id", "-failed_findings_count", "id"],
name="resources_failed_findings_idx",
),
),
]
+25 -144
View File
@@ -2,7 +2,6 @@ import json
import logging
import re
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta, timezone
from uuid import UUID, uuid4
from allauth.socialaccount.models import SocialApp
@@ -34,7 +33,6 @@ from api.db_utils import (
IntegrationTypeEnumField,
InvitationStateEnumField,
MemberRoleEnumField,
ProcessorTypeEnumField,
ProviderEnumField,
ProviderSecretTypeEnumField,
ScanTriggerEnumField,
@@ -410,6 +408,20 @@ class Scan(RowLevelSecurityProtectedModel):
name = models.CharField(
blank=True, null=True, max_length=100, validators=[MinLengthValidator(3)]
)
provider = models.ForeignKey(
Provider,
on_delete=models.CASCADE,
related_name="scans",
related_query_name="scan",
)
task = models.ForeignKey(
Task,
on_delete=models.CASCADE,
related_name="scans",
related_query_name="scan",
null=True,
blank=True,
)
trigger = ScanTriggerEnumField(
choices=TriggerChoices.choices,
)
@@ -425,31 +437,11 @@ class Scan(RowLevelSecurityProtectedModel):
completed_at = models.DateTimeField(null=True, blank=True)
next_scan_at = models.DateTimeField(null=True, blank=True)
scheduler_task = models.ForeignKey(
PeriodicTask, on_delete=models.SET_NULL, null=True, blank=True
PeriodicTask, on_delete=models.CASCADE, null=True, blank=True
)
output_location = models.CharField(blank=True, null=True, max_length=200)
provider = models.ForeignKey(
Provider,
on_delete=models.CASCADE,
related_name="scans",
related_query_name="scan",
)
task = models.ForeignKey(
Task,
on_delete=models.CASCADE,
related_name="scans",
related_query_name="scan",
null=True,
blank=True,
)
processor = models.ForeignKey(
"Processor",
on_delete=models.SET_NULL,
related_name="scans",
related_query_name="scan",
null=True,
blank=True,
)
# TODO: mutelist foreign key
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "scans"
@@ -561,8 +553,6 @@ class Resource(RowLevelSecurityProtectedModel):
details = models.TextField(blank=True, null=True)
partition = models.TextField(blank=True, null=True)
failed_findings_count = models.IntegerField(default=0)
# Relationships
tags = models.ManyToManyField(
ResourceTag,
@@ -609,10 +599,6 @@ class Resource(RowLevelSecurityProtectedModel):
fields=["tenant_id", "provider_id"],
name="resources_tenant_provider_idx",
),
models.Index(
fields=["tenant_id", "-failed_findings_count", "id"],
name="resources_failed_findings_idx",
),
]
constraints = [
@@ -711,9 +697,6 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
check_id = models.CharField(max_length=100, blank=False, null=False)
check_metadata = models.JSONField(default=dict, null=False)
muted = models.BooleanField(default=False, null=False)
muted_reason = models.TextField(
blank=True, null=True, validators=[MinLengthValidator(3)], max_length=500
)
compliance = models.JSONField(default=dict, null=True, blank=True)
# Denormalize resource data for performance
@@ -855,12 +838,6 @@ class ResourceFindingMapping(PostgresPartitionedModel, RowLevelSecurityProtected
# - tenant_id
# - id
indexes = [
models.Index(
fields=["tenant_id", "finding_id"],
name="rfm_tenant_finding_idx",
),
]
constraints = [
models.UniqueConstraint(
fields=("tenant_id", "resource_id", "finding_id"),
@@ -965,11 +942,6 @@ class Invitation(RowLevelSecurityProtectedModel):
null=True,
)
def save(self, *args, **kwargs):
if self.email:
self.email = self.email.strip().lower()
super().save(*args, **kwargs)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "invitations"
@@ -1398,26 +1370,6 @@ class IntegrationProviderRelationship(RowLevelSecurityProtectedModel):
]
class SAMLToken(models.Model):
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
expires_at = models.DateTimeField(editable=False)
token = models.JSONField(unique=True)
user = models.ForeignKey(User, on_delete=models.CASCADE)
class Meta:
db_table = "saml_tokens"
def save(self, *args, **kwargs):
if not self.expires_at:
self.expires_at = datetime.now(timezone.utc) + timedelta(seconds=15)
super().save(*args, **kwargs)
def is_expired(self) -> bool:
return datetime.now(timezone.utc) >= self.expires_at
class SAMLDomainIndex(models.Model):
"""
Public index of SAML domains. No RLS. Used for fast lookup in SAML login flow.
@@ -1495,7 +1447,7 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
),
]
def clean(self, old_email_domain=None, is_create=False):
def clean(self, old_email_domain=None):
# Domain must not contain @
if "@" in self.email_domain:
raise ValidationError({"email_domain": "Domain must not contain @"})
@@ -1519,25 +1471,6 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
{"tenant": "There is a problem with your email domain."}
)
# The entityID must be unique in the system
idp_settings = self._parsed_metadata
entity_id = idp_settings.get("entity_id")
if entity_id:
# Find any SocialApp with this entityID
q = SocialApp.objects.filter(provider="saml", provider_id=entity_id)
# If updating, exclude our own SocialApp from the check
if not is_create:
q = q.exclude(client_id=old_email_domain)
else:
q = q.exclude(client_id=self.email_domain)
if q.exists():
raise ValidationError(
{"metadata_xml": "There is a problem with your metadata."}
)
def save(self, *args, **kwargs):
self.email_domain = self.email_domain.strip().lower()
is_create = not SAMLConfiguration.objects.filter(pk=self.pk).exists()
@@ -1550,8 +1483,7 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
old_email_domain = None
old_metadata_xml = None
self._parsed_metadata = self._parse_metadata()
self.clean(old_email_domain, is_create)
self.clean(old_email_domain)
super().save(*args, **kwargs)
if is_create or (
@@ -1569,12 +1501,6 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
email_domain=self.email_domain, defaults={"tenant": self.tenant}
)
def delete(self, *args, **kwargs):
super().delete(*args, **kwargs)
SocialApp.objects.filter(provider="saml", client_id=self.email_domain).delete()
SAMLDomainIndex.objects.filter(email_domain=self.email_domain).delete()
def _parse_metadata(self):
"""
Parse the raw IdP metadata XML and extract:
@@ -1594,8 +1520,6 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
# Entity ID
entity_id = root.attrib.get("entityID")
if not entity_id:
raise ValidationError({"metadata_xml": "Missing entityID in metadata."})
# SSO endpoint (must exist)
sso = root.find(".//md:IDPSSODescriptor/md:SingleSignOnService", ns)
@@ -1634,8 +1558,9 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
Create or update the corresponding SocialApp based on email_domain.
If the domain changed, update the matching SocialApp.
"""
idp_settings = self._parse_metadata()
settings_dict = SOCIALACCOUNT_PROVIDERS["saml"].copy()
settings_dict["idp"] = self._parsed_metadata
settings_dict["idp"] = idp_settings
current_site = Site.objects.get(id=settings.SITE_ID)
@@ -1643,24 +1568,19 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
provider="saml", client_id=previous_email_domain or self.email_domain
)
client_id = self.email_domain[:191]
name = f"SAML-{self.email_domain}"[:40]
if social_app_qs.exists():
social_app = social_app_qs.first()
social_app.client_id = client_id
social_app.name = name
social_app.client_id = self.email_domain
social_app.name = f"{self.tenant.name} SAML ({self.email_domain})"
social_app.settings = settings_dict
social_app.provider_id = self._parsed_metadata["entity_id"]
social_app.save()
social_app.sites.set([current_site])
else:
social_app = SocialApp.objects.create(
provider="saml",
client_id=client_id,
name=name,
client_id=self.email_domain,
name=f"{self.tenant.name} SAML ({self.email_domain})",
settings=settings_dict,
provider_id=self._parsed_metadata["entity_id"],
)
social_app.sites.set([current_site])
@@ -1839,42 +1759,3 @@ class LighthouseConfiguration(RowLevelSecurityProtectedModel):
class JSONAPIMeta:
resource_name = "lighthouse-configurations"
class Processor(RowLevelSecurityProtectedModel):
class ProcessorChoices(models.TextChoices):
MUTELIST = "mutelist", _("Mutelist")
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
processor_type = ProcessorTypeEnumField(choices=ProcessorChoices.choices)
configuration = models.JSONField(default=dict)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "processors"
constraints = [
models.UniqueConstraint(
fields=("tenant_id", "processor_type"),
name="unique_processor_types_tenant",
),
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
indexes = [
models.Index(
fields=["tenant_id", "id"],
name="processor_tenant_id_idx",
),
models.Index(
fields=["tenant_id", "processor_type"],
name="processor_tenant_type_idx",
),
]
class JSONAPIMeta:
resource_name = "processors"
File diff suppressed because it is too large Load Diff
@@ -11,7 +11,7 @@ def test_basic_authentication():
client = APIClient()
test_user = "test_email@prowler.com"
test_password = "Test_password@1"
test_password = "test_password"
# Check that a 401 is returned when no basic authentication is provided
no_auth_response = client.get(reverse("provider-list"))
@@ -108,7 +108,7 @@ def test_user_me_when_inviting_users(create_test_user, tenants_fixture, roles_fi
user1_email = "user1@testing.com"
user2_email = "user2@testing.com"
password = "Thisisapassword123@"
password = "thisisapassword123"
user1_response = client.post(
reverse("user-list"),
@@ -187,7 +187,7 @@ class TestTokenSwitchTenant:
client = APIClient()
test_user = "test_email@prowler.com"
test_password = "Test_password1@"
test_password = "test_password"
# Check that we can create a new user without any kind of authentication
user_creation_response = client.post(
@@ -17,7 +17,7 @@ def test_delete_provider_without_executing_task(
client = APIClient()
test_user = "test_email@prowler.com"
test_password = "Test_password1@"
test_password = "test_password"
prowler_task = tasks_fixture[0]
task_mock = Mock()
+26 -21
View File
@@ -1,10 +1,12 @@
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock
import pytest
from allauth.socialaccount.models import SocialLogin
from django.contrib.auth import get_user_model
from api.adapters import ProwlerSocialAccountAdapter
from api.db_router import MainRouter
from api.models import Membership, SAMLConfiguration, Tenant
User = get_user_model()
@@ -25,8 +27,7 @@ class TestProwlerSocialAccountAdapter:
sociallogin = MagicMock(spec=SocialLogin)
sociallogin.account = MagicMock()
sociallogin.provider = MagicMock()
sociallogin.provider.id = "saml"
sociallogin.account.provider = "saml"
sociallogin.account.extra_data = {}
sociallogin.user = create_test_user
sociallogin.connect = MagicMock()
@@ -45,9 +46,7 @@ class TestProwlerSocialAccountAdapter:
sociallogin = MagicMock(spec=SocialLogin)
sociallogin.account = MagicMock()
sociallogin.provider = MagicMock()
sociallogin.user = MagicMock()
sociallogin.provider.id = "saml"
sociallogin.account.provider = "github"
sociallogin.account.extra_data = {}
sociallogin.connect = MagicMock()
@@ -55,23 +54,29 @@ class TestProwlerSocialAccountAdapter:
sociallogin.connect.assert_not_called()
def test_save_user_saml_sets_session_flag(self, rf):
def test_save_user_saml_flow(
self,
rf,
saml_setup,
saml_sociallogin,
):
adapter = ProwlerSocialAccountAdapter()
request = rf.get("/")
request.session = {}
saml_sociallogin.user.email = saml_setup["email"]
sociallogin = MagicMock(spec=SocialLogin)
sociallogin.provider = MagicMock()
sociallogin.provider.id = "saml"
sociallogin.account = MagicMock()
sociallogin.account.extra_data = {}
tenant = Tenant.objects.using(MainRouter.admin_db).get(
id=saml_setup["tenant_id"]
)
saml_config = SAMLConfiguration.objects.using(MainRouter.admin_db).get(
tenant=tenant
)
assert saml_config.email_domain == saml_setup["domain"]
mock_user = MagicMock()
mock_user.id = 123
user = adapter.save_user(request, saml_sociallogin)
with patch("api.adapters.super") as mock_super:
with patch("api.adapters.transaction"):
with patch("api.adapters.MainRouter"):
mock_super.return_value.save_user.return_value = mock_user
adapter.save_user(request, sociallogin)
assert request.session["saml_user_created"] == "123"
assert user.email == saml_setup["email"]
assert (
Membership.objects.using(MainRouter.admin_db)
.filter(user=user, tenant=tenant)
.exists()
)
@@ -13,7 +13,6 @@ from api.db_utils import (
enum_to_choices,
generate_random_token,
one_week_from_now,
update_objects_in_batches,
)
from api.models import Provider
@@ -228,88 +227,3 @@ class TestCreateObjectsInBatches:
qs = Provider.objects.filter(tenant=tenant)
assert qs.count() == total
@pytest.mark.django_db
class TestUpdateObjectsInBatches:
@pytest.fixture
def tenant(self, tenants_fixture):
return tenants_fixture[0]
def make_provider_instances(self, tenant, count):
"""
Return a list of `count` unsaved Provider instances for the given tenant.
"""
base_uid = 2000
return [
Provider(
tenant=tenant,
uid=str(base_uid + i),
provider=Provider.ProviderChoices.AWS,
)
for i in range(count)
]
def test_exact_multiple_of_batch(self, tenant):
total = 6
batch_size = 3
objs = self.make_provider_instances(tenant, total)
create_objects_in_batches(str(tenant.id), Provider, objs, batch_size=batch_size)
# Fetch them back, mutate the `uid` field, then update in batches
providers = list(Provider.objects.filter(tenant=tenant))
for p in providers:
p.uid = f"{p.uid}_upd"
update_objects_in_batches(
tenant_id=str(tenant.id),
model=Provider,
objects=providers,
fields=["uid"],
batch_size=batch_size,
)
qs = Provider.objects.filter(tenant=tenant, uid__endswith="_upd")
assert qs.count() == total
def test_non_multiple_of_batch(self, tenant):
total = 7
batch_size = 3
objs = self.make_provider_instances(tenant, total)
create_objects_in_batches(str(tenant.id), Provider, objs, batch_size=batch_size)
providers = list(Provider.objects.filter(tenant=tenant))
for p in providers:
p.uid = f"{p.uid}_upd"
update_objects_in_batches(
tenant_id=str(tenant.id),
model=Provider,
objects=providers,
fields=["uid"],
batch_size=batch_size,
)
qs = Provider.objects.filter(tenant=tenant, uid__endswith="_upd")
assert qs.count() == total
def test_batch_size_default(self, tenant):
default_size = settings.DJANGO_DELETION_BATCH_SIZE
total = default_size + 2
objs = self.make_provider_instances(tenant, total)
create_objects_in_batches(str(tenant.id), Provider, objs)
providers = list(Provider.objects.filter(tenant=tenant))
for p in providers:
p.uid = f"{p.uid}_upd"
# Update without specifying batch_size (uses default)
update_objects_in_batches(
tenant_id=str(tenant.id),
model=Provider,
objects=providers,
fields=["uid"],
)
qs = Provider.objects.filter(tenant=tenant, uid__endswith="_upd")
assert qs.count() == total
+16 -71
View File
@@ -3,7 +3,7 @@ from allauth.socialaccount.models import SocialApp
from django.core.exceptions import ValidationError
from api.db_router import MainRouter
from api.models import Resource, ResourceTag, SAMLConfiguration, SAMLDomainIndex
from api.models import Resource, ResourceTag, SAMLConfiguration, Tenant
@pytest.mark.django_db
@@ -142,8 +142,8 @@ class TestSAMLConfigurationModel:
</md:EntityDescriptor>
"""
def test_creates_valid_configuration(self, tenants_fixture):
tenant = tenants_fixture[0]
def test_creates_valid_configuration(self):
tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant A")
config = SAMLConfiguration.objects.using(MainRouter.admin_db).create(
email_domain="ssoexample.com",
metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
@@ -153,8 +153,8 @@ class TestSAMLConfigurationModel:
assert config.email_domain == "ssoexample.com"
assert SocialApp.objects.filter(client_id="ssoexample.com").exists()
def test_email_domain_with_at_symbol_fails(self, tenants_fixture):
tenant = tenants_fixture[0]
def test_email_domain_with_at_symbol_fails(self):
tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant B")
config = SAMLConfiguration(
email_domain="invalid@domain.com",
metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
@@ -168,8 +168,9 @@ class TestSAMLConfigurationModel:
assert "email_domain" in errors
assert "Domain must not contain @" in errors["email_domain"][0]
def test_duplicate_email_domain_fails(self, tenants_fixture):
tenant1, tenant2, *_ = tenants_fixture
def test_duplicate_email_domain_fails(self):
tenant1 = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant C1")
tenant2 = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant C2")
SAMLConfiguration.objects.using(MainRouter.admin_db).create(
email_domain="duplicate.com",
@@ -190,8 +191,8 @@ class TestSAMLConfigurationModel:
assert "tenant" in errors
assert "There is a problem with your email domain." in errors["tenant"][0]
def test_duplicate_tenant_config_fails(self, tenants_fixture):
tenant = tenants_fixture[0]
def test_duplicate_tenant_config_fails(self):
tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant D")
SAMLConfiguration.objects.using(MainRouter.admin_db).create(
email_domain="unique1.com",
@@ -215,8 +216,8 @@ class TestSAMLConfigurationModel:
in errors["tenant"][0]
)
def test_invalid_metadata_xml_fails(self, tenants_fixture):
tenant = tenants_fixture[0]
def test_invalid_metadata_xml_fails(self):
tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant E")
config = SAMLConfiguration(
email_domain="brokenxml.com",
metadata_xml="<bad<xml>",
@@ -231,8 +232,8 @@ class TestSAMLConfigurationModel:
assert "Invalid XML" in errors["metadata_xml"][0]
assert "not well-formed" in errors["metadata_xml"][0]
def test_metadata_missing_sso_fails(self, tenants_fixture):
tenant = tenants_fixture[0]
def test_metadata_missing_sso_fails(self):
tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant F")
xml = """<md:EntityDescriptor entityID="x" xmlns:md="urn:oasis:names:tc:SAML:2.0:metadata">
<md:IDPSSODescriptor></md:IDPSSODescriptor>
</md:EntityDescriptor>"""
@@ -249,8 +250,8 @@ class TestSAMLConfigurationModel:
assert "metadata_xml" in errors
assert "Missing SingleSignOnService" in errors["metadata_xml"][0]
def test_metadata_missing_certificate_fails(self, tenants_fixture):
tenant = tenants_fixture[0]
def test_metadata_missing_certificate_fails(self):
tenant = Tenant.objects.using(MainRouter.admin_db).create(name="Tenant G")
xml = """<md:EntityDescriptor entityID="x" xmlns:md="urn:oasis:names:tc:SAML:2.0:metadata">
<md:IDPSSODescriptor>
<md:SingleSignOnService Binding="urn:oasis:names:tc:SAML:2.0:bindings:HTTP-Redirect" Location="https://example.com/sso"/>
@@ -268,59 +269,3 @@ class TestSAMLConfigurationModel:
errors = exc_info.value.message_dict
assert "metadata_xml" in errors
assert "X509Certificate" in errors["metadata_xml"][0]
def test_deletes_saml_configuration_and_related_objects(self, tenants_fixture):
tenant = tenants_fixture[0]
email_domain = "deleteme.com"
# Create the configuration
config = SAMLConfiguration.objects.using(MainRouter.admin_db).create(
email_domain=email_domain,
metadata_xml=TestSAMLConfigurationModel.VALID_METADATA,
tenant=tenant,
)
# Verify that the SocialApp and SAMLDomainIndex exist
assert SocialApp.objects.filter(client_id=email_domain).exists()
assert (
SAMLDomainIndex.objects.using(MainRouter.admin_db)
.filter(email_domain=email_domain)
.exists()
)
# Delete the configuration
config.delete()
# Verify that the configuration and its related objects are deleted
assert (
not SAMLConfiguration.objects.using(MainRouter.admin_db)
.filter(pk=config.pk)
.exists()
)
assert not SocialApp.objects.filter(client_id=email_domain).exists()
assert (
not SAMLDomainIndex.objects.using(MainRouter.admin_db)
.filter(email_domain=email_domain)
.exists()
)
def test_duplicate_entity_id_fails_on_creation(self, tenants_fixture):
tenant1, tenant2, *_ = tenants_fixture
SAMLConfiguration.objects.using(MainRouter.admin_db).create(
email_domain="first.com",
metadata_xml=self.VALID_METADATA,
tenant=tenant1,
)
config = SAMLConfiguration(
email_domain="second.com",
metadata_xml=self.VALID_METADATA,
tenant=tenant2,
)
with pytest.raises(ValidationError) as exc_info:
config.save()
errors = exc_info.value.message_dict
assert "metadata_xml" in errors
assert "There is a problem with your metadata." in errors["metadata_xml"][0]
+3 -88
View File
@@ -1,7 +1,6 @@
from unittest.mock import ANY, Mock, patch
import pytest
from conftest import TODAY
from django.urls import reverse
from rest_framework import status
@@ -61,7 +60,7 @@ class TestUserViewSet:
def test_create_user_with_all_permissions(self, authenticated_client_rbac):
valid_user_payload = {
"name": "test",
"password": "Newpassword123@",
"password": "newpassword123",
"email": "new_user@test.com",
}
response = authenticated_client_rbac.post(
@@ -75,7 +74,7 @@ class TestUserViewSet:
):
valid_user_payload = {
"name": "test",
"password": "Newpassword123@",
"password": "newpassword123",
"email": "new_user@test.com",
}
response = authenticated_client_no_permissions_rbac.post(
@@ -322,7 +321,7 @@ class TestProviderViewSet:
@pytest.mark.django_db
class TestLimitedVisibility:
TEST_EMAIL = "rbac@rbac.com"
TEST_PASSWORD = "Thisisapassword123@"
TEST_PASSWORD = "thisisapassword123"
@pytest.fixture
def limited_admin_user(
@@ -410,87 +409,3 @@ class TestLimitedVisibility:
assert (
response.json()["data"]["relationships"]["providers"]["meta"]["count"] == 1
)
def test_overviews_providers(
self,
authenticated_client_rbac_limited,
scan_summaries_fixture,
providers_fixture,
):
# By default, the associated provider is the one which has the overview data
response = authenticated_client_rbac_limited.get(reverse("overview-providers"))
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) > 0
# Changing the provider visibility, no data should be returned
# Only the associated provider to that group is changed
new_provider = providers_fixture[1]
ProviderGroupMembership.objects.all().update(provider=new_provider)
response = authenticated_client_rbac_limited.get(reverse("overview-providers"))
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 0
@pytest.mark.parametrize(
"endpoint_name",
[
"findings",
"findings_severity",
],
)
def test_overviews_findings(
self,
endpoint_name,
authenticated_client_rbac_limited,
scan_summaries_fixture,
providers_fixture,
):
# By default, the associated provider is the one which has the overview data
response = authenticated_client_rbac_limited.get(
reverse(f"overview-{endpoint_name}")
)
assert response.status_code == status.HTTP_200_OK
values = response.json()["data"]["attributes"].values()
assert any(value > 0 for value in values)
# Changing the provider visibility, no data should be returned
# Only the associated provider to that group is changed
new_provider = providers_fixture[1]
ProviderGroupMembership.objects.all().update(provider=new_provider)
response = authenticated_client_rbac_limited.get(
reverse(f"overview-{endpoint_name}")
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]["attributes"].values()
assert all(value == 0 for value in data)
def test_overviews_services(
self,
authenticated_client_rbac_limited,
scan_summaries_fixture,
providers_fixture,
):
# By default, the associated provider is the one which has the overview data
response = authenticated_client_rbac_limited.get(
reverse("overview-services"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) > 0
# Changing the provider visibility, no data should be returned
# Only the associated provider to that group is changed
new_provider = providers_fixture[1]
ProviderGroupMembership.objects.all().update(provider=new_provider)
response = authenticated_client_rbac_limited.get(
reverse("overview-services"), {"filter[inserted_at]": TODAY}
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 0
-80
View File
@@ -1,80 +0,0 @@
import logging
from unittest.mock import MagicMock
from config.settings.sentry import before_send
def test_before_send_ignores_log_with_ignored_exception():
"""Test that before_send ignores logs containing ignored exceptions."""
log_record = MagicMock()
log_record.msg = "Provider kubernetes is not connected"
log_record.levelno = logging.ERROR # 40
hint = {"log_record": log_record}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was dropped (None returned)
assert result is None
def test_before_send_ignores_exception_with_ignored_exception():
"""Test that before_send ignores exceptions containing ignored exceptions."""
exc_info = (Exception, Exception("Provider kubernetes is not connected"), None)
hint = {"exc_info": exc_info}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was dropped (None returned)
assert result is None
def test_before_send_passes_through_non_ignored_log():
"""Test that before_send passes through logs that don't contain ignored exceptions."""
log_record = MagicMock()
log_record.msg = "Some other error message"
log_record.levelno = logging.ERROR # 40
hint = {"log_record": log_record}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was passed through
assert result == event
def test_before_send_passes_through_non_ignored_exception():
"""Test that before_send passes through exceptions that don't contain ignored exceptions."""
exc_info = (Exception, Exception("Some other error message"), None)
hint = {"exc_info": exc_info}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was passed through
assert result == event
def test_before_send_handles_warning_level():
"""Test that before_send handles warning level logs."""
log_record = MagicMock()
log_record.msg = "Provider kubernetes is not connected"
log_record.levelno = logging.WARNING # 30
hint = {"log_record": log_record}
event = MagicMock()
result = before_send(event, hint)
# Assert that the event was dropped (None returned)
assert result is None
+4 -60
View File
@@ -131,21 +131,6 @@ class TestInitializeProwlerProvider:
initialize_prowler_provider(provider)
mock_return_prowler_provider.return_value.assert_called_once_with(key="value")
@patch("api.utils.return_prowler_provider")
def test_initialize_prowler_provider_with_mutelist(
self, mock_return_prowler_provider
):
provider = MagicMock()
provider.secret.secret = {"key": "value"}
mutelist_processor = MagicMock()
mutelist_processor.configuration = {"Mutelist": {"key": "value"}}
mock_return_prowler_provider.return_value = MagicMock()
initialize_prowler_provider(provider, mutelist_processor)
mock_return_prowler_provider.return_value.assert_called_once_with(
key="value", mutelist_content={"key": "value"}
)
class TestProwlerProviderConnectionTest:
@patch("api.utils.return_prowler_provider")
@@ -215,25 +200,6 @@ class TestGetProwlerProviderKwargs:
expected_result = {**secret_dict, **expected_extra_kwargs}
assert result == expected_result
def test_get_prowler_provider_kwargs_with_mutelist(self):
provider_uid = "provider_uid"
secret_dict = {"key": "value"}
secret_mock = MagicMock()
secret_mock.secret = secret_dict
mutelist_processor = MagicMock()
mutelist_processor.configuration = {"Mutelist": {"key": "value"}}
provider = MagicMock()
provider.provider = Provider.ProviderChoices.AWS.value
provider.secret = secret_mock
provider.uid = provider_uid
result = get_prowler_provider_kwargs(provider, mutelist_processor)
expected_result = {**secret_dict, "mutelist_content": {"key": "value"}}
assert result == expected_result
def test_get_prowler_provider_kwargs_unsupported_provider(self):
# Setup
provider_uid = "provider_uid"
@@ -288,7 +254,7 @@ class TestValidateInvitation:
assert result == invitation
mock_db.get.assert_called_once_with(
token="VALID_TOKEN", email__iexact="user@example.com"
token="VALID_TOKEN", email="user@example.com"
)
def test_invitation_not_found_raises_validation_error(self):
@@ -303,7 +269,7 @@ class TestValidateInvitation:
"invitation_token": "Invalid invitation code."
}
mock_db.get.assert_called_once_with(
token="INVALID_TOKEN", email__iexact="user@example.com"
token="INVALID_TOKEN", email="user@example.com"
)
def test_invitation_not_found_raises_not_found(self):
@@ -318,7 +284,7 @@ class TestValidateInvitation:
assert exc_info.value.detail == "Invitation is not valid."
mock_db.get.assert_called_once_with(
token="INVALID_TOKEN", email__iexact="user@example.com"
token="INVALID_TOKEN", email="user@example.com"
)
def test_invitation_expired(self, invitation):
@@ -366,27 +332,5 @@ class TestValidateInvitation:
"invitation_token": "Invalid invitation code."
}
mock_db.get.assert_called_once_with(
token="VALID_TOKEN", email__iexact="different@example.com"
)
def test_valid_invitation_uppercase_email(self):
"""Test that validate_invitation works with case-insensitive email lookup."""
uppercase_email = "USER@example.com"
invitation = MagicMock(spec=Invitation)
invitation.token = "VALID_TOKEN"
invitation.email = uppercase_email
invitation.expires_at = datetime.now(timezone.utc) + timedelta(days=1)
invitation.state = Invitation.State.PENDING
invitation.tenant = MagicMock()
with patch("api.utils.Invitation.objects.using") as mock_using:
mock_db = mock_using.return_value
mock_db.get.return_value = invitation
result = validate_invitation("VALID_TOKEN", "user@example.com")
assert result == invitation
mock_db.get.assert_called_once_with(
token="VALID_TOKEN", email__iexact="user@example.com"
token="VALID_TOKEN", email="different@example.com"
)
File diff suppressed because it is too large Load Diff
+4 -15
View File
@@ -7,7 +7,7 @@ from rest_framework.exceptions import NotFound, ValidationError
from api.db_router import MainRouter
from api.exceptions import InvitationTokenExpiredException
from api.models import Invitation, Processor, Provider, Resource
from api.models import Invitation, Provider, Resource
from api.v1.serializers import FindingMetadataSerializer
from prowler.providers.aws.aws_provider import AwsProvider
from prowler.providers.azure.azure_provider import AzureProvider
@@ -83,14 +83,11 @@ def return_prowler_provider(
return prowler_provider
def get_prowler_provider_kwargs(
provider: Provider, mutelist_processor: Processor | None = None
) -> dict:
def get_prowler_provider_kwargs(provider: Provider) -> dict:
"""Get the Prowler provider kwargs based on the given provider type.
Args:
provider (Provider): The provider object containing the provider type and associated secret.
mutelist_processor (Processor): The mutelist processor object containing the mutelist configuration.
Returns:
dict: The provider kwargs for the corresponding provider class.
@@ -108,24 +105,16 @@ def get_prowler_provider_kwargs(
}
elif provider.provider == Provider.ProviderChoices.KUBERNETES.value:
prowler_provider_kwargs = {**prowler_provider_kwargs, "context": provider.uid}
if mutelist_processor:
mutelist_content = mutelist_processor.configuration.get("Mutelist", {})
if mutelist_content:
prowler_provider_kwargs["mutelist_content"] = mutelist_content
return prowler_provider_kwargs
def initialize_prowler_provider(
provider: Provider,
mutelist_processor: Processor | None = None,
) -> AwsProvider | AzureProvider | GcpProvider | KubernetesProvider | M365Provider:
"""Initialize a Prowler provider instance based on the given provider type.
Args:
provider (Provider): The provider object containing the provider type and associated secrets.
mutelist_processor (Processor): The mutelist processor object containing the mutelist configuration.
Returns:
AwsProvider | AzureProvider | GcpProvider | KubernetesProvider | M365Provider: An instance of the corresponding provider class
@@ -133,7 +122,7 @@ def initialize_prowler_provider(
provider's secrets.
"""
prowler_provider = return_prowler_provider(provider)
prowler_provider_kwargs = get_prowler_provider_kwargs(provider, mutelist_processor)
prowler_provider_kwargs = get_prowler_provider_kwargs(provider)
return prowler_provider(**prowler_provider_kwargs)
@@ -198,7 +187,7 @@ def validate_invitation(
# Admin DB connector is used to bypass RLS protection since the invitation belongs to a tenant the user
# is not a member of yet
invitation = Invitation.objects.using(MainRouter.admin_db).get(
token=invitation_token, email__iexact=email
token=invitation_token, email=email
)
except Invitation.DoesNotExist:
if raise_not_found:
+2 -14
View File
@@ -24,32 +24,20 @@ class PaginateByPkMixin:
request, # noqa: F841
base_queryset,
manager,
select_related: list | None = None,
prefetch_related: list | None = None,
select_related: list[str] | None = None,
prefetch_related: list[str] | None = None,
) -> Response:
"""
Paginate a queryset by primary key.
This method is useful when you want to paginate a queryset that has been
filtered or annotated in a way that would be lost if you used the default
pagination method.
"""
pk_list = base_queryset.values_list("id", flat=True)
page = self.paginate_queryset(pk_list)
if page is None:
return Response(self.get_serializer(base_queryset, many=True).data)
queryset = manager.filter(id__in=page)
if select_related:
queryset = queryset.select_related(*select_related)
if prefetch_related:
queryset = queryset.prefetch_related(*prefetch_related)
# Optimize tags loading, if applicable
if hasattr(self, "_optimize_tags_loading"):
queryset = self._optimize_tags_loading(queryset)
queryset = sorted(queryset, key=lambda obj: page.index(obj.id))
serialized = self.get_serializer(queryset, many=True).data
@@ -1,23 +0,0 @@
import yaml
from rest_framework_json_api import serializers
from rest_framework_json_api.serializers import ValidationError
class BaseValidateSerializer(serializers.Serializer):
def validate(self, data):
if hasattr(self, "initial_data"):
initial_data = set(self.initial_data.keys()) - {"id", "type"}
unknown_keys = initial_data - set(self.fields.keys())
if unknown_keys:
raise ValidationError(f"Invalid fields: {unknown_keys}")
return data
class YamlOrJsonField(serializers.JSONField):
def to_internal_value(self, data):
if isinstance(data, str):
try:
data = yaml.safe_load(data)
except yaml.YAMLError as exc:
raise serializers.ValidationError("Invalid YAML format") from exc
return super().to_internal_value(data)
@@ -1,7 +1,19 @@
from drf_spectacular.utils import extend_schema_field
from rest_framework_json_api import serializers
from rest_framework_json_api.serializers import ValidationError
from api.v1.serializer_utils.base import BaseValidateSerializer
class BaseValidateSerializer(serializers.Serializer):
def validate(self, data):
if hasattr(self, "initial_data"):
initial_data = set(self.initial_data.keys()) - {"id", "type"}
unknown_keys = initial_data - set(self.fields.keys())
if unknown_keys:
raise ValidationError(f"Invalid fields: {unknown_keys}")
return data
# Integrations
class S3ConfigSerializer(BaseValidateSerializer):
@@ -1,21 +0,0 @@
from drf_spectacular.utils import extend_schema_field
from api.v1.serializer_utils.base import YamlOrJsonField
from prowler.lib.mutelist.mutelist import mutelist_schema
@extend_schema_field(
{
"oneOf": [
{
"type": "object",
"title": "Mutelist",
"properties": {"Mutelist": mutelist_schema},
"additionalProperties": False,
},
]
}
)
class ProcessorConfigField(YamlOrJsonField):
pass
+8 -197
View File
@@ -7,9 +7,7 @@ from django.contrib.auth.models import update_last_login
from django.contrib.auth.password_validation import validate_password
from drf_spectacular.utils import extend_schema_field
from jwt.exceptions import InvalidKeyError
from rest_framework.validators import UniqueTogetherValidator
from rest_framework_json_api import serializers
from rest_framework_json_api.relations import SerializerMethodResourceRelatedField
from rest_framework_json_api.serializers import ValidationError
from rest_framework_simplejwt.exceptions import TokenError
from rest_framework_simplejwt.serializers import TokenObtainPairSerializer
@@ -23,7 +21,6 @@ from api.models import (
InvitationRoleRelationship,
LighthouseConfiguration,
Membership,
Processor,
Provider,
ProviderGroup,
ProviderGroupMembership,
@@ -47,9 +44,7 @@ from api.v1.serializer_utils.integrations import (
IntegrationCredentialField,
S3ConfigSerializer,
)
from api.v1.serializer_utils.processors import ProcessorConfigField
from api.v1.serializer_utils.providers import ProviderSecretField
from prowler.lib.mutelist.mutelist import Mutelist
# Tokens
@@ -135,12 +130,6 @@ class TokenSerializer(BaseTokenSerializer):
class TokenSocialLoginSerializer(BaseTokenSerializer):
email = serializers.EmailField(write_only=True)
tenant_id = serializers.UUIDField(
write_only=True,
required=False,
help_text="If not provided, the tenant ID of the first membership that was added"
" to the user will be used.",
)
# Output tokens
refresh = serializers.CharField(read_only=True)
@@ -862,7 +851,6 @@ class ScanSerializer(RLSSerializer):
"completed_at",
"scheduled_at",
"next_scan_at",
"processor",
"url",
]
@@ -1000,12 +988,8 @@ class ResourceSerializer(RLSSerializer):
tags = serializers.SerializerMethodField()
type_ = serializers.CharField(read_only=True)
failed_findings_count = serializers.IntegerField(read_only=True)
findings = SerializerMethodResourceRelatedField(
many=True,
read_only=True,
)
findings = serializers.ResourceRelatedField(many=True, read_only=True)
class Meta:
model = Resource
@@ -1021,7 +1005,6 @@ class ResourceSerializer(RLSSerializer):
"tags",
"provider",
"findings",
"failed_findings_count",
"url",
]
extra_kwargs = {
@@ -1031,8 +1014,8 @@ class ResourceSerializer(RLSSerializer):
}
included_serializers = {
"findings": "api.v1.serializers.FindingIncludeSerializer",
"provider": "api.v1.serializers.ProviderIncludeSerializer",
"findings": "api.v1.serializers.FindingSerializer",
"provider": "api.v1.serializers.ProviderSerializer",
}
@extend_schema_field(
@@ -1043,10 +1026,6 @@ class ResourceSerializer(RLSSerializer):
}
)
def get_tags(self, obj):
# Use prefetched tags if available to avoid N+1 queries
if hasattr(obj, "prefetched_tags"):
return {tag.key: tag.value for tag in obj.prefetched_tags}
# Fallback to the original method if prefetch is not available
return obj.get_tags(self.context.get("tenant_id"))
def get_fields(self):
@@ -1056,17 +1035,10 @@ class ResourceSerializer(RLSSerializer):
fields["type"] = type_
return fields
def get_findings(self, obj):
return (
obj.latest_findings
if hasattr(obj, "latest_findings")
else obj.findings.all()
)
class ResourceIncludeSerializer(RLSSerializer):
"""
Serializer for the included Resource model.
Serializer for the Resource model.
"""
tags = serializers.SerializerMethodField()
@@ -1099,10 +1071,6 @@ class ResourceIncludeSerializer(RLSSerializer):
}
)
def get_tags(self, obj):
# Use prefetched tags if available to avoid N+1 queries
if hasattr(obj, "prefetched_tags"):
return {tag.key: tag.value for tag in obj.prefetched_tags}
# Fallback to the original method if prefetch is not available
return obj.get_tags(self.context.get("tenant_id"))
def get_fields(self):
@@ -1113,17 +1081,6 @@ class ResourceIncludeSerializer(RLSSerializer):
return fields
class ResourceMetadataSerializer(serializers.Serializer):
services = serializers.ListField(child=serializers.CharField(), allow_empty=True)
regions = serializers.ListField(child=serializers.CharField(), allow_empty=True)
types = serializers.ListField(child=serializers.CharField(), allow_empty=True)
# Temporarily disabled until we implement tag filtering in the UI
# tags = serializers.JSONField(help_text="Tags are described as key-value pairs.")
class Meta:
resource_name = "resources-metadata"
class FindingSerializer(RLSSerializer):
"""
Serializer for the Finding model.
@@ -1147,7 +1104,6 @@ class FindingSerializer(RLSSerializer):
"updated_at",
"first_seen_at",
"muted",
"muted_reason",
"url",
# Relationships
"scan",
@@ -1160,28 +1116,6 @@ class FindingSerializer(RLSSerializer):
}
class FindingIncludeSerializer(RLSSerializer):
"""
Serializer for the include Finding model.
"""
class Meta:
model = Finding
fields = [
"id",
"uid",
"status",
"severity",
"check_id",
"check_metadata",
"inserted_at",
"updated_at",
"first_seen_at",
"muted",
"muted_reason",
]
# To be removed when the related endpoint is removed as well
class FindingDynamicFilterSerializer(serializers.Serializer):
services = serializers.ListField(child=serializers.CharField(), allow_empty=True)
@@ -1266,8 +1200,8 @@ class M365ProviderSecret(serializers.Serializer):
client_id = serializers.CharField()
client_secret = serializers.CharField()
tenant_id = serializers.CharField()
user = serializers.EmailField(required=False)
password = serializers.CharField(required=False)
user = serializers.EmailField()
password = serializers.CharField()
class Meta:
resource_name = "provider-secrets"
@@ -1375,13 +1309,12 @@ class ProviderSecretUpdateSerializer(BaseWriteProviderSecretSerializer):
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
"provider": {"read_only": True},
"secret_type": {"required": False},
"secret_type": {"read_only": True},
}
def validate(self, attrs):
provider = self.instance.provider
# To allow updating a secret with the same type without making the `secret_type` mandatory
secret_type = attrs.get("secret_type") or self.instance.secret_type
secret_type = self.instance.secret_type
secret = attrs.get("secret")
validated_attrs = super().validate(attrs)
@@ -2132,128 +2065,6 @@ class IntegrationUpdateSerializer(BaseWriteIntegrationSerializer):
return super().update(instance, validated_data)
# Processors
class ProcessorSerializer(RLSSerializer):
"""
Serializer for the Processor model.
"""
configuration = ProcessorConfigField()
class Meta:
model = Processor
fields = [
"id",
"inserted_at",
"updated_at",
"processor_type",
"configuration",
"url",
]
class ProcessorCreateSerializer(RLSSerializer, BaseWriteSerializer):
configuration = ProcessorConfigField(required=True)
class Meta:
model = Processor
fields = [
"inserted_at",
"updated_at",
"processor_type",
"configuration",
]
extra_kwargs = {
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
}
validators = [
UniqueTogetherValidator(
queryset=Processor.objects.all(),
fields=["processor_type"],
message="A processor with the same type already exists.",
)
]
def validate(self, attrs):
validated_attrs = super().validate(attrs)
self.validate_processor_data(attrs)
return validated_attrs
def validate_processor_data(self, attrs):
processor_type = attrs.get("processor_type")
configuration = attrs.get("configuration")
if processor_type == "mutelist":
self.validate_mutelist_configuration(configuration)
def validate_mutelist_configuration(self, configuration):
if not isinstance(configuration, dict):
raise serializers.ValidationError("Invalid Mutelist configuration.")
mutelist_configuration = configuration.get("Mutelist", {})
if not mutelist_configuration:
raise serializers.ValidationError(
"Invalid Mutelist configuration: 'Mutelist' is a required property."
)
try:
Mutelist.validate_mutelist(mutelist_configuration, raise_on_exception=True)
return
except Exception as error:
raise serializers.ValidationError(
f"Invalid Mutelist configuration: {error}"
)
class ProcessorUpdateSerializer(BaseWriteSerializer):
configuration = ProcessorConfigField(required=True)
class Meta:
model = Processor
fields = [
"inserted_at",
"updated_at",
"configuration",
]
extra_kwargs = {
"inserted_at": {"read_only": True},
"updated_at": {"read_only": True},
}
def validate(self, attrs):
validated_attrs = super().validate(attrs)
self.validate_processor_data(attrs)
return validated_attrs
def validate_processor_data(self, attrs):
processor_type = self.instance.processor_type
configuration = attrs.get("configuration")
if processor_type == "mutelist":
self.validate_mutelist_configuration(configuration)
def validate_mutelist_configuration(self, configuration):
if not isinstance(configuration, dict):
raise serializers.ValidationError("Invalid Mutelist configuration.")
mutelist_configuration = configuration.get("Mutelist", {})
if not mutelist_configuration:
raise serializers.ValidationError(
"Invalid Mutelist configuration: 'Mutelist' is a required property."
)
try:
Mutelist.validate_mutelist(mutelist_configuration, raise_on_exception=True)
return
except Exception as error:
raise serializers.ValidationError(
f"Invalid Mutelist configuration: {error}"
)
# SSO
+3 -27
View File
@@ -1,11 +1,9 @@
from allauth.socialaccount.providers.saml.views import ACSView, MetadataView, SLSView
from django.urls import include, path
from drf_spectacular.views import SpectacularRedocView
from rest_framework_nested import routers
from api.v1.views import (
ComplianceOverviewViewSet,
CustomSAMLLoginView,
CustomTokenObtainView,
CustomTokenRefreshView,
CustomTokenSwitchTenantView,
@@ -18,7 +16,6 @@ from api.v1.views import (
LighthouseConfigViewSet,
MembershipViewSet,
OverviewViewSet,
ProcessorViewSet,
ProviderGroupProvidersRelationshipView,
ProviderGroupViewSet,
ProviderSecretViewSet,
@@ -28,7 +25,6 @@ from api.v1.views import (
RoleViewSet,
SAMLConfigurationViewSet,
SAMLInitiateAPIView,
SAMLTokenValidateView,
ScanViewSet,
ScheduleViewSet,
SchemaView,
@@ -57,7 +53,6 @@ router.register(
router.register(r"overviews", OverviewViewSet, basename="overview")
router.register(r"schedules", ScheduleViewSet, basename="schedule")
router.register(r"integrations", IntegrationViewSet, basename="integration")
router.register(r"processors", ProcessorViewSet, basename="processor")
router.register(r"saml-config", SAMLConfigurationViewSet, basename="saml-config")
router.register(
r"lighthouse-configurations",
@@ -131,32 +126,13 @@ urlpatterns = [
path(
"auth/saml/initiate/", SAMLInitiateAPIView.as_view(), name="api_saml_initiate"
),
# Allauth SAML endpoints for tenants
path("accounts/", include("allauth.urls")),
path(
"accounts/saml/<organization_slug>/login/",
CustomSAMLLoginView.as_view(),
name="saml_login",
),
path(
"accounts/saml/<organization_slug>/acs/",
ACSView.as_view(),
name="saml_acs",
),
path(
"accounts/saml/<organization_slug>/acs/finish/",
"api/v1/accounts/saml/<organization_slug>/acs/finish/",
TenantFinishACSView.as_view(),
name="saml_finish_acs",
),
path(
"accounts/saml/<organization_slug>/sls/",
SLSView.as_view(),
name="saml_sls",
),
path(
"accounts/saml/<organization_slug>/metadata/",
MetadataView.as_view(),
name="saml_metadata",
),
path("tokens/saml", SAMLTokenValidateView.as_view(), name="token-saml"),
path("tokens/google", GoogleSocialLoginView.as_view(), name="token-google"),
path("tokens/github", GithubSocialLoginView.as_view(), name="token-github"),
path("", include(router.urls)),
+92 -549
View File
@@ -1,17 +1,14 @@
import glob
import logging
import os
from datetime import datetime, timedelta, timezone
from urllib.parse import urljoin
import sentry_sdk
from allauth.socialaccount.models import SocialAccount, SocialApp
from allauth.socialaccount.providers.github.views import GitHubOAuth2Adapter
from allauth.socialaccount.providers.google.views import GoogleOAuth2Adapter
from allauth.socialaccount.providers.saml.views import FinishACSView, LoginView
from allauth.socialaccount.providers.saml.views import FinishACSView
from botocore.exceptions import ClientError, NoCredentialsError, ParamValidationError
from celery.result import AsyncResult
from config.custom_logging import BackendLogger
from config.env import env
from config.settings.social_login import (
GITHUB_OAUTH_CALLBACK_URL,
@@ -22,9 +19,9 @@ from django.conf import settings as django_settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import Count, F, Prefetch, Q, Sum
from django.db.models import Count, Exists, F, OuterRef, Prefetch, Q, Sum
from django.db.models.functions import Coalesce
from django.http import HttpResponse
from django.http import HttpResponse, JsonResponse
from django.shortcuts import redirect
from django.urls import reverse
from django.utils.dateparse import parse_date
@@ -78,9 +75,7 @@ from api.filters import (
IntegrationFilter,
InvitationFilter,
LatestFindingFilter,
LatestResourceFilter,
MembershipFilter,
ProcessorFilter,
ProviderFilter,
ProviderGroupFilter,
ProviderSecretFilter,
@@ -94,13 +89,13 @@ from api.filters import (
UserFilter,
)
from api.models import (
ComplianceOverview,
ComplianceRequirementOverview,
Finding,
Integration,
Invitation,
LighthouseConfiguration,
Membership,
Processor,
Provider,
ProviderGroup,
ProviderGroupMembership,
@@ -108,12 +103,10 @@ from api.models import (
Resource,
ResourceFindingMapping,
ResourceScanSummary,
ResourceTag,
Role,
RoleProviderGroupRelationship,
SAMLConfiguration,
SAMLDomainIndex,
SAMLToken,
Scan,
ScanSummary,
SeverityChoices,
@@ -155,9 +148,6 @@ from api.v1.serializers import (
OverviewProviderSerializer,
OverviewServiceSerializer,
OverviewSeveritySerializer,
ProcessorCreateSerializer,
ProcessorSerializer,
ProcessorUpdateSerializer,
ProviderCreateSerializer,
ProviderGroupCreateSerializer,
ProviderGroupMembershipSerializer,
@@ -168,7 +158,6 @@ from api.v1.serializers import (
ProviderSecretUpdateSerializer,
ProviderSerializer,
ProviderUpdateSerializer,
ResourceMetadataSerializer,
ResourceSerializer,
RoleCreateSerializer,
RoleProviderGroupRelationshipSerializer,
@@ -194,8 +183,6 @@ from api.v1.serializers import (
UserUpdateSerializer,
)
logger = logging.getLogger(BackendLogger.API)
CACHE_DECORATOR = cache_control(
max_age=django_settings.CACHE_MAX_AGE,
stale_while_revalidate=django_settings.CACHE_STALE_WHILE_REVALIDATE,
@@ -292,7 +279,7 @@ class SchemaView(SpectacularAPIView):
def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.10.0"
spectacular_settings.VERSION = "1.9.0"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)
@@ -358,11 +345,6 @@ class SchemaView(SpectacularAPIView):
"description": "Endpoints for managing Lighthouse configurations, including creation, retrieval, "
"updating, and deletion of configurations such as OpenAI keys, models, and business context.",
},
{
"name": "Processor",
"description": "Endpoints for managing post-processors used to process Prowler findings, including "
"registration, configuration, and deletion of post-processing actions.",
},
]
return super().get(request, *args, **kwargs)
@@ -419,68 +401,17 @@ class GithubSocialLoginView(SocialLoginView):
return original_response
@extend_schema(exclude=True)
class SAMLTokenValidateView(GenericAPIView):
resource_name = "tokens"
http_method_names = ["post"]
def post(self, request):
token_id = request.query_params.get("id", "invalid")
try:
saml_token = SAMLToken.objects.using(MainRouter.admin_db).get(id=token_id)
except SAMLToken.DoesNotExist:
return Response({"detail": "Invalid token ID."}, status=404)
if saml_token.is_expired():
return Response({"detail": "Token expired."}, status=400)
token_data = saml_token.token
# Currently we don't store the tokens in the database, so we delete the token after use
saml_token.delete()
return Response(token_data, status=200)
@extend_schema(exclude=True)
class CustomSAMLLoginView(LoginView):
def dispatch(self, request, *args, **kwargs):
"""
Convert GET requests to POST to bypass allauth's confirmation screen.
Why this is necessary:
- django-allauth requires POST for social logins to prevent open redirect attacks
- SAML login links typically use GET requests (e.g., <a href="...">)
- This conversion allows seamless login without user-facing confirmation
Security considerations:
1. Preserves CSRF protection: Original POST handling remains intact
2. Avoids global SOCIALACCOUNT_LOGIN_ON_GET=True which would:
- Enable GET logins for ALL providers (security risk)
- Potentially expose open redirect vulnerabilities
3. SAML payloads remain signed/encrypted regardless of HTTP method
4. No sensitive parameters are exposed in URLs (copied to POST body)
This approach maintains security while providing better UX.
"""
if request.method == "GET":
# Convert GET to POST while preserving parameters
request.method = "POST"
return super().dispatch(request, *args, **kwargs)
@extend_schema(exclude=True)
class SAMLInitiateAPIView(GenericAPIView):
serializer_class = SamlInitiateSerializer
permission_classes = []
def post(self, request, *args, **kwargs):
# Validate the input payload and extract the domain
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
email = serializer.validated_data["email_domain"]
domain = email.split("@", 1)[-1].lower()
# Retrieve the SAML configuration for the given email domain
try:
check = SAMLDomainIndex.objects.get(email_domain=domain)
with rls_transaction(str(check.tenant_id)):
@@ -490,24 +421,20 @@ class SAMLInitiateAPIView(GenericAPIView):
{"detail": "Unauthorized domain."}, status=status.HTTP_403_FORBIDDEN
)
# Check certificates are not empty (TODO: Validate certificates)
# saml_public_cert = os.getenv("SAML_PUBLIC_CERT", "").strip()
# saml_private_key = os.getenv("SAML_PRIVATE_KEY", "").strip()
# Check certificates are not empty
saml_public_cert = os.getenv("SAML_PUBLIC_CERT", "").strip()
saml_private_key = os.getenv("SAML_PRIVATE_KEY", "").strip()
# if not saml_public_cert or not saml_private_key:
# return Response(
# {"detail": "SAML configuration is invalid: missing certificates."},
# status=status.HTTP_403_FORBIDDEN,
# )
if not saml_public_cert or not saml_private_key:
return Response(
{"detail": "SAML configuration is invalid: missing certificates."},
status=status.HTTP_403_FORBIDDEN,
)
# Build the SAML login URL using the configured API host
api_host = os.getenv("API_BASE_URL")
login_path = reverse(
saml_login_url = reverse(
"saml_login", kwargs={"organization_slug": config.email_domain}
)
login_url = urljoin(api_host, login_path)
return redirect(login_url)
return redirect(f"{saml_login_url}?email={email}")
@extend_schema_view(
@@ -565,64 +492,27 @@ class SAMLConfigurationViewSet(BaseRLSViewSet):
class TenantFinishACSView(FinishACSView):
def _rollback_saml_user(self, request):
"""Helper function to rollback SAML user if it was just created and validation fails"""
saml_user_id = request.session.get("saml_user_created")
if saml_user_id:
User.objects.using(MainRouter.admin_db).filter(id=saml_user_id).delete()
request.session.pop("saml_user_created", None)
def dispatch(self, request, organization_slug):
try:
super().dispatch(request, organization_slug)
except Exception as e:
logger.error(f"SAML dispatch failed: {e}")
self._rollback_saml_user(request)
callback_url = env.str("AUTH_URL")
return redirect(f"{callback_url}?sso_saml_failed=true")
response = super().dispatch(request, organization_slug)
user = getattr(request, "user", None)
if not user or not user.is_authenticated:
self._rollback_saml_user(request)
callback_url = env.str("AUTH_URL")
return redirect(f"{callback_url}?sso_saml_failed=true")
return response
# Defensive check to avoid edge case failures due to inconsistent or incomplete data in the database
# This handles scenarios like partially deleted or missing related objects
try:
check = SAMLDomainIndex.objects.get(email_domain=organization_slug)
with rls_transaction(str(check.tenant_id)):
SAMLConfiguration.objects.get(tenant_id=str(check.tenant_id))
social_app = SocialApp.objects.get(
provider="saml", client_id=organization_slug
)
user_id = User.objects.get(email=str(user)).id
social_account = SocialAccount.objects.get(
user=str(user_id), provider=social_app.provider_id
user=user, provider=social_app.provider
)
except (
SAMLDomainIndex.DoesNotExist,
SAMLConfiguration.DoesNotExist,
SocialApp.DoesNotExist,
SocialAccount.DoesNotExist,
User.DoesNotExist,
) as e:
logger.error(f"SAML user is not authenticated: {e}")
self._rollback_saml_user(request)
callback_url = env.str("AUTH_URL")
return redirect(f"{callback_url}?sso_saml_failed=true")
except (SocialApp.DoesNotExist, SocialAccount.DoesNotExist):
return response
extra = social_account.extra_data
user.first_name = (
extra.get("firstName", [""])[0] if extra.get("firstName") else ""
)
user.last_name = extra.get("lastName", [""])[0] if extra.get("lastName") else ""
user.company_name = (
extra.get("organization", [""])[0] if extra.get("organization") else ""
)
user.first_name = extra.get("firstName", [""])[0]
user.last_name = extra.get("lastName", [""])[0]
user.company_name = extra.get("organization", [""])[0]
user.name = f"{user.first_name} {user.last_name}".strip()
if user.name == "":
user.name = "N/A"
user.save()
email_domain = user.email.split("@")[-1]
@@ -631,11 +521,7 @@ class TenantFinishACSView(FinishACSView):
.get(email_domain=email_domain)
.tenant
)
role_name = (
extra.get("userType", ["no_permissions"])[0].strip()
if extra.get("userType")
else "no_permissions"
)
role_name = extra.get("userType", ["saml_default_role"])[0].strip()
try:
role = Role.objects.using(MainRouter.admin_db).get(
name=role_name, tenant=tenant
@@ -661,30 +547,15 @@ class TenantFinishACSView(FinishACSView):
role=role,
tenant_id=tenant.id,
)
membership, _ = Membership.objects.using(MainRouter.admin_db).get_or_create(
user=user,
tenant=tenant,
defaults={
"user": user,
"tenant": tenant,
"role": Membership.RoleChoices.MEMBER,
},
)
serializer = TokenSocialLoginSerializer(
data={"email": user.email, "tenant_id": str(tenant.id)}
)
serializer = TokenSocialLoginSerializer(data={"email": user.email})
serializer.is_valid(raise_exception=True)
token_data = serializer.validated_data
saml_token = SAMLToken.objects.using(MainRouter.admin_db).create(
token=token_data, user=user
return JsonResponse(
{
"type": "saml-social-tokens",
"attributes": serializer.validated_data,
}
)
callback_url = env.str("SAML_SSO_CALLBACK_URL")
redirect_url = f"{callback_url}?id={saml_token.id}"
request.session.pop("saml_user_created", None)
return redirect(redirect_url)
@extend_schema_view(
@@ -1885,14 +1756,6 @@ class TaskViewSet(BaseRLSViewSet):
summary="List all resources",
description="Retrieve a list of all resources with options for filtering by various criteria. Resources are "
"objects that are discovered by Prowler. They can be anything from a single host to a whole VPC.",
parameters=[
OpenApiParameter(
name="filter[updated_at]",
description="At least one of the variations of the `filter[updated_at]` filter must be provided.",
required=True,
type=OpenApiTypes.DATE,
)
],
),
retrieve=extend_schema(
tags=["Resource"],
@@ -1900,43 +1763,15 @@ class TaskViewSet(BaseRLSViewSet):
description="Fetch detailed information about a specific resource by their ID. A Resource is an object that "
"is discovered by Prowler. It can be anything from a single host to a whole VPC.",
),
metadata=extend_schema(
tags=["Resource"],
summary="Retrieve metadata values from resources",
description="Fetch unique metadata values from a set of resources. This is useful for dynamic filtering.",
parameters=[
OpenApiParameter(
name="filter[updated_at]",
description="At least one of the variations of the `filter[updated_at]` filter must be provided.",
required=True,
type=OpenApiTypes.DATE,
)
],
filters=True,
),
latest=extend_schema(
tags=["Resource"],
summary="List the latest resources",
description="Retrieve a list of the latest resources from the latest scans for each provider with options for "
"filtering by various criteria.",
filters=True,
),
metadata_latest=extend_schema(
tags=["Resource"],
summary="Retrieve metadata values from the latest resources",
description="Fetch unique metadata values from a set of resources from the latest scans for each provider. "
"This is useful for dynamic filtering.",
filters=True,
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
class ResourceViewSet(PaginateByPkMixin, BaseRLSViewSet):
queryset = Resource.all_objects.all()
class ResourceViewSet(BaseRLSViewSet):
queryset = Resource.objects.all()
serializer_class = ResourceSerializer
http_method_names = ["get"]
filterset_class = ResourceFilter
ordering = ["-failed_findings_count", "-updated_at"]
ordering = ["-inserted_at"]
ordering_fields = [
"provider_uid",
"uid",
@@ -1947,14 +1782,6 @@ class ResourceViewSet(PaginateByPkMixin, BaseRLSViewSet):
"inserted_at",
"updated_at",
]
prefetch_for_includes = {
"__all__": [],
"provider": [
Prefetch(
"provider", queryset=Provider.all_objects.select_related("resources")
)
],
}
# RBAC required permissions (implicit -> MANAGE_PROVIDERS enable unlimited visibility or check the visibility of
# the provider through the provider group)
required_permissions = []
@@ -1963,257 +1790,41 @@ class ResourceViewSet(PaginateByPkMixin, BaseRLSViewSet):
user_roles = get_role(self.request.user)
if user_roles.unlimited_visibility:
# User has unlimited visibility, return all scans
queryset = Resource.all_objects.filter(tenant_id=self.request.tenant_id)
queryset = Resource.objects.filter(tenant_id=self.request.tenant_id)
else:
# User lacks permission, filter providers based on provider groups associated with the role
queryset = Resource.all_objects.filter(
queryset = Resource.objects.filter(
tenant_id=self.request.tenant_id, provider__in=get_providers(user_roles)
)
search_value = self.request.query_params.get("filter[search]", None)
if search_value:
# Django's ORM will build a LEFT JOIN and OUTER JOIN on the "through" table, resulting in duplicates
# The duplicates then require a `distinct` query
search_query = SearchQuery(
search_value, config="simple", search_type="plain"
)
queryset = queryset.filter(
Q(text_search=search_query) | Q(tags__text_search=search_query)
Q(tags__key=search_value)
| Q(tags__value=search_value)
| Q(tags__text_search=search_query)
| Q(tags__key__contains=search_value)
| Q(tags__value__contains=search_value)
| Q(uid=search_value)
| Q(name=search_value)
| Q(region=search_value)
| Q(service=search_value)
| Q(type=search_value)
| Q(text_search=search_query)
| Q(uid__contains=search_value)
| Q(name__contains=search_value)
| Q(region__contains=search_value)
| Q(service__contains=search_value)
| Q(type__contains=search_value)
).distinct()
return queryset
def _optimize_tags_loading(self, queryset):
"""Optimize tags loading with prefetch_related to avoid N+1 queries"""
# Use prefetch_related to load all tags in a single query
return queryset.prefetch_related(
Prefetch(
"tags",
queryset=ResourceTag.objects.filter(
tenant_id=self.request.tenant_id
).select_related(),
to_attr="prefetched_tags",
)
)
def get_serializer_class(self):
if self.action in ["metadata", "metadata_latest"]:
return ResourceMetadataSerializer
return super().get_serializer_class()
def get_filterset_class(self):
if self.action in ["latest", "metadata_latest"]:
return LatestResourceFilter
return ResourceFilter
def filter_queryset(self, queryset):
# Do not apply filters when retrieving specific resource
if self.action == "retrieve":
return queryset
return super().filter_queryset(queryset)
def list(self, request, *args, **kwargs):
filtered_queryset = self.filter_queryset(self.get_queryset())
return self.paginate_by_pk(
request,
filtered_queryset,
manager=Resource.all_objects,
select_related=["provider"],
prefetch_related=["findings"],
)
def retrieve(self, request, *args, **kwargs):
queryset = self._optimize_tags_loading(self.get_queryset())
instance = get_object_or_404(queryset, pk=kwargs.get("pk"))
mapping_ids = list(
ResourceFindingMapping.objects.filter(
resource=instance, tenant_id=request.tenant_id
).values_list("finding_id", flat=True)
)
latest_findings = (
Finding.all_objects.filter(id__in=mapping_ids, tenant_id=request.tenant_id)
.order_by("uid", "-inserted_at")
.distinct("uid")
)
setattr(instance, "latest_findings", latest_findings)
serializer = self.get_serializer(instance)
return Response(serializer.data, status=status.HTTP_200_OK)
@action(detail=False, methods=["get"], url_name="latest")
def latest(self, request):
tenant_id = request.tenant_id
filtered_queryset = self.filter_queryset(self.get_queryset())
latest_scan_ids = (
Scan.all_objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
)
filtered_queryset = filtered_queryset.filter(
tenant_id=tenant_id, provider__scan__in=latest_scan_ids
)
return self.paginate_by_pk(
request,
filtered_queryset,
manager=Resource.all_objects,
select_related=["provider"],
prefetch_related=["findings"],
)
@action(detail=False, methods=["get"], url_name="metadata")
def metadata(self, request):
# Force filter validation
self.filter_queryset(self.get_queryset())
tenant_id = request.tenant_id
query_params = request.query_params
queryset = ResourceScanSummary.objects.filter(tenant_id=tenant_id)
if scans := query_params.get("filter[scan__in]") or query_params.get(
"filter[scan]"
):
queryset = queryset.filter(scan_id__in=scans.split(","))
else:
exact = query_params.get("filter[inserted_at]")
gte = query_params.get("filter[inserted_at__gte]")
lte = query_params.get("filter[inserted_at__lte]")
date_filters = {}
if exact:
date = parse_date(exact)
datetime_start = datetime.combine(
date, datetime.min.time(), tzinfo=timezone.utc
)
datetime_end = datetime_start + timedelta(days=1)
date_filters["scan_id__gte"] = uuid7_start(
datetime_to_uuid7(datetime_start)
)
date_filters["scan_id__lt"] = uuid7_start(
datetime_to_uuid7(datetime_end)
)
else:
if gte:
date_start = parse_date(gte)
datetime_start = datetime.combine(
date_start, datetime.min.time(), tzinfo=timezone.utc
)
date_filters["scan_id__gte"] = uuid7_start(
datetime_to_uuid7(datetime_start)
)
if lte:
date_end = parse_date(lte)
datetime_end = datetime.combine(
date_end + timedelta(days=1),
datetime.min.time(),
tzinfo=timezone.utc,
)
date_filters["scan_id__lt"] = uuid7_start(
datetime_to_uuid7(datetime_end)
)
if date_filters:
queryset = queryset.filter(**date_filters)
if service_filter := query_params.get("filter[service]") or query_params.get(
"filter[service__in]"
):
queryset = queryset.filter(service__in=service_filter.split(","))
if region_filter := query_params.get("filter[region]") or query_params.get(
"filter[region__in]"
):
queryset = queryset.filter(region__in=region_filter.split(","))
if resource_type_filter := query_params.get("filter[type]") or query_params.get(
"filter[type__in]"
):
queryset = queryset.filter(
resource_type__in=resource_type_filter.split(",")
)
services = list(
queryset.values_list("service", flat=True).distinct().order_by("service")
)
regions = list(
queryset.values_list("region", flat=True).distinct().order_by("region")
)
resource_types = list(
queryset.values_list("resource_type", flat=True)
.exclude(resource_type__isnull=True)
.exclude(resource_type__exact="")
.distinct()
.order_by("resource_type")
)
result = {
"services": services,
"regions": regions,
"types": resource_types,
}
serializer = self.get_serializer(data=result)
serializer.is_valid(raise_exception=True)
return Response(serializer.data)
@action(
detail=False,
methods=["get"],
url_name="metadata_latest",
url_path="metadata/latest",
)
def metadata_latest(self, request):
tenant_id = request.tenant_id
query_params = request.query_params
latest_scans_queryset = (
Scan.all_objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
)
queryset = ResourceScanSummary.objects.filter(
tenant_id=tenant_id,
scan_id__in=latest_scans_queryset.values_list("id", flat=True),
)
if service_filter := query_params.get("filter[service]") or query_params.get(
"filter[service__in]"
):
queryset = queryset.filter(service__in=service_filter.split(","))
if region_filter := query_params.get("filter[region]") or query_params.get(
"filter[region__in]"
):
queryset = queryset.filter(region__in=region_filter.split(","))
if resource_type_filter := query_params.get("filter[type]") or query_params.get(
"filter[type__in]"
):
queryset = queryset.filter(
resource_type__in=resource_type_filter.split(",")
)
services = list(
queryset.values_list("service", flat=True).distinct().order_by("service")
)
regions = list(
queryset.values_list("region", flat=True).distinct().order_by("region")
)
resource_types = list(
queryset.values_list("resource_type", flat=True)
.exclude(resource_type__isnull=True)
.exclude(resource_type__exact="")
.distinct()
.order_by("resource_type")
)
result = {
"services": services,
"regions": regions,
"types": resource_types,
}
serializer = self.get_serializer(data=result)
serializer.is_valid(raise_exception=True)
return Response(serializer.data)
@extend_schema_view(
list=extend_schema(
@@ -2332,7 +1943,17 @@ class FindingViewSet(PaginateByPkMixin, BaseRLSViewSet):
search_value, config="simple", search_type="plain"
)
queryset = queryset.filter(text_search=search_query)
resource_match = Resource.all_objects.filter(
text_search=search_query,
id__in=ResourceFindingMapping.objects.filter(
resource_id=OuterRef("pk"),
tenant_id=tenant_id,
).values("resource_id"),
)
queryset = queryset.filter(
Q(text_search=search_query) | Q(Exists(resource_match))
)
return queryset
@@ -2435,12 +2056,9 @@ class FindingViewSet(PaginateByPkMixin, BaseRLSViewSet):
# ToRemove: Temporary fallback mechanism
if not queryset.exists():
raw_scans_ids = Scan.objects.filter(
scan_ids = Scan.objects.filter(
tenant_id=tenant_id, **scan_based_filters
).values_list("id", "unique_resource_count")
scan_ids = [
scan_id for scan_id, count in raw_scans_ids if count and count > 0
]
).values_list("id", flat=True)
for scan_id in scan_ids:
backfill_scan_resource_summaries_task.apply_async(
kwargs={"tenant_id": tenant_id, "scan_id": scan_id}
@@ -2526,12 +2144,7 @@ class FindingViewSet(PaginateByPkMixin, BaseRLSViewSet):
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
)
raw_latest_scans_ids = list(
latest_scans_queryset.values_list("id", "unique_resource_count")
)
latest_scans_ids = [
scan_id for scan_id, count in raw_latest_scans_ids if count and count > 0
]
latest_scans_ids = list(latest_scans_queryset.values_list("id", flat=True))
queryset = ResourceScanSummary.objects.filter(
tenant_id=tenant_id,
@@ -3425,9 +3038,9 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
return Response(serializer.data, status=status.HTTP_200_OK)
@extend_schema(tags=["Overview"])
@extend_schema_view(
providers=extend_schema(
list=extend_schema(
tags=["Overview"],
summary="Get aggregated provider data",
description=(
"Retrieve an aggregated overview of findings and resources grouped by providers. "
@@ -3468,7 +3081,7 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
)
@method_decorator(CACHE_DECORATOR, name="list")
class OverviewViewSet(BaseRLSViewSet):
queryset = ScanSummary.objects.all()
queryset = ComplianceOverview.objects.all()
http_method_names = ["get"]
ordering = ["-inserted_at"]
# RBAC required permissions (implicit -> MANAGE_PROVIDERS enable unlimited visibility or check the visibility of
@@ -3479,10 +3092,19 @@ class OverviewViewSet(BaseRLSViewSet):
role = get_role(self.request.user)
providers = get_providers(role)
if not role.unlimited_visibility:
self.allowed_providers = providers
def _get_filtered_queryset(model):
if role.unlimited_visibility:
return model.all_objects.filter(tenant_id=self.request.tenant_id)
return model.all_objects.filter(
tenant_id=self.request.tenant_id, scan__provider__in=providers
)
return ScanSummary.all_objects.filter(tenant_id=self.request.tenant_id)
if self.action == "providers":
return _get_filtered_queryset(Finding)
elif self.action in ("findings", "findings_severity", "services"):
return _get_filtered_queryset(ScanSummary)
else:
return super().get_queryset()
def get_serializer_class(self):
if self.action == "providers":
@@ -3515,24 +3137,18 @@ class OverviewViewSet(BaseRLSViewSet):
@action(detail=False, methods=["get"], url_name="providers")
def providers(self, request):
tenant_id = self.request.tenant_id
queryset = self.get_queryset()
provider_filter = (
{"provider__in": self.allowed_providers}
if hasattr(self, "allowed_providers")
else {}
)
latest_scan_ids = (
Scan.all_objects.filter(
tenant_id=tenant_id, state=StateChoices.COMPLETED, **provider_filter
)
Scan.all_objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
)
findings_aggregated = (
queryset.filter(scan_id__in=latest_scan_ids)
ScanSummary.all_objects.filter(
tenant_id=tenant_id, scan_id__in=latest_scan_ids
)
.values(
"scan__provider_id",
provider=F("scan__provider__provider"),
@@ -3568,7 +3184,7 @@ class OverviewViewSet(BaseRLSViewSet):
)
return Response(
self.get_serializer(overview, many=True).data,
OverviewProviderSerializer(overview, many=True).data,
status=status.HTTP_200_OK,
)
@@ -3577,16 +3193,9 @@ class OverviewViewSet(BaseRLSViewSet):
tenant_id = self.request.tenant_id
queryset = self.get_queryset()
filtered_queryset = self.filter_queryset(queryset)
provider_filter = (
{"provider__in": self.allowed_providers}
if hasattr(self, "allowed_providers")
else {}
)
latest_scan_ids = (
Scan.all_objects.filter(
tenant_id=tenant_id, state=StateChoices.COMPLETED, **provider_filter
)
Scan.all_objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
@@ -3623,16 +3232,9 @@ class OverviewViewSet(BaseRLSViewSet):
tenant_id = self.request.tenant_id
queryset = self.get_queryset()
filtered_queryset = self.filter_queryset(queryset)
provider_filter = (
{"provider__in": self.allowed_providers}
if hasattr(self, "allowed_providers")
else {}
)
latest_scan_ids = (
Scan.all_objects.filter(
tenant_id=tenant_id, state=StateChoices.COMPLETED, **provider_filter
)
Scan.all_objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
@@ -3652,7 +3254,7 @@ class OverviewViewSet(BaseRLSViewSet):
for item in severity_counts:
severity_data[item["severity"]] = item["count"]
serializer = self.get_serializer(severity_data)
serializer = OverviewSeveritySerializer(severity_data)
return Response(serializer.data, status=status.HTTP_200_OK)
@action(detail=False, methods=["get"], url_name="services")
@@ -3660,16 +3262,9 @@ class OverviewViewSet(BaseRLSViewSet):
tenant_id = self.request.tenant_id
queryset = self.get_queryset()
filtered_queryset = self.filter_queryset(queryset)
provider_filter = (
{"provider__in": self.allowed_providers}
if hasattr(self, "allowed_providers")
else {}
)
latest_scan_ids = (
Scan.all_objects.filter(
tenant_id=tenant_id, state=StateChoices.COMPLETED, **provider_filter
)
Scan.all_objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
@@ -3687,12 +3282,11 @@ class OverviewViewSet(BaseRLSViewSet):
.order_by("service")
)
serializer = self.get_serializer(services_data, many=True)
serializer = OverviewServiceSerializer(services_data, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@extend_schema(tags=["Schedule"])
@extend_schema_view(
daily=extend_schema(
summary="Create a daily schedule scan for a given provider",
@@ -3887,54 +3481,3 @@ class LighthouseConfigViewSet(BaseRLSViewSet):
)
},
)
@extend_schema_view(
list=extend_schema(
tags=["Processor"],
summary="List all processors",
description="Retrieve a list of all configured processors with options for filtering by various criteria.",
),
retrieve=extend_schema(
tags=["Processor"],
summary="Retrieve processor details",
description="Fetch detailed information about a specific processor by its ID.",
),
create=extend_schema(
tags=["Processor"],
summary="Create a new processor",
description="Register a new processor with the system, providing necessary configuration details. There can "
"only be one processor of each type per tenant.",
),
partial_update=extend_schema(
tags=["Processor"],
summary="Partially update a processor",
description="Modify certain fields of an existing processor without affecting other settings.",
),
destroy=extend_schema(
tags=["Processor"],
summary="Delete a processor",
description="Remove a processor from the system by its ID.",
),
)
@method_decorator(CACHE_DECORATOR, name="list")
@method_decorator(CACHE_DECORATOR, name="retrieve")
class ProcessorViewSet(BaseRLSViewSet):
queryset = Processor.objects.all()
serializer_class = ProcessorSerializer
http_method_names = ["get", "post", "patch", "delete"]
filterset_class = ProcessorFilter
ordering = ["processor_type", "-inserted_at"]
# RBAC required permissions
required_permissions = [Permissions.MANAGE_ACCOUNT]
def get_queryset(self):
queryset = Processor.objects.filter(tenant_id=self.request.tenant_id)
return queryset
def get_serializer_class(self):
if self.action == "create":
return ProcessorCreateSerializer
elif self.action == "partial_update":
return ProcessorUpdateSerializer
return super().get_serializer_class()
-88
View File
@@ -1,5 +1,3 @@
import string
from django.core.exceptions import ValidationError
from django.utils.translation import gettext as _
@@ -22,89 +20,3 @@ class MaximumLengthValidator:
return _(
f"Your password must contain no more than {self.max_length} characters."
)
class SpecialCharactersValidator:
def __init__(self, special_characters=None, min_special_characters=1):
# Use string.punctuation if no custom characters provided
self.special_characters = special_characters or string.punctuation
self.min_special_characters = min_special_characters
def validate(self, password, user=None):
if (
sum(1 for char in password if char in self.special_characters)
< self.min_special_characters
):
raise ValidationError(
_("This password must contain at least one special character."),
code="password_no_special_characters",
params={
"special_characters": self.special_characters,
"min_special_characters": self.min_special_characters,
},
)
def get_help_text(self):
return _(
f"Your password must contain at least one special character from: {self.special_characters}"
)
class UppercaseValidator:
def __init__(self, min_uppercase=1):
self.min_uppercase = min_uppercase
def validate(self, password, user=None):
if sum(1 for char in password if char.isupper()) < self.min_uppercase:
raise ValidationError(
_(
"This password must contain at least %(min_uppercase)d uppercase letter."
),
code="password_no_uppercase_letters",
params={"min_uppercase": self.min_uppercase},
)
def get_help_text(self):
return _(
f"Your password must contain at least {self.min_uppercase} uppercase letter."
)
class LowercaseValidator:
def __init__(self, min_lowercase=1):
self.min_lowercase = min_lowercase
def validate(self, password, user=None):
if sum(1 for char in password if char.islower()) < self.min_lowercase:
raise ValidationError(
_(
"This password must contain at least %(min_lowercase)d lowercase letter."
),
code="password_no_lowercase_letters",
params={"min_lowercase": self.min_lowercase},
)
def get_help_text(self):
return _(
f"Your password must contain at least {self.min_lowercase} lowercase letter."
)
class NumericValidator:
def __init__(self, min_numeric=1):
self.min_numeric = min_numeric
def validate(self, password, user=None):
if sum(1 for char in password if char.isdigit()) < self.min_numeric:
raise ValidationError(
_(
"This password must contain at least %(min_numeric)d numeric character."
),
code="password_no_numeric_characters",
params={"min_numeric": self.min_numeric},
)
def get_help_text(self):
return _(
f"Your password must contain at least {self.min_numeric} numeric character."
)
-29
View File
@@ -11,7 +11,6 @@ SECRET_KEY = env("SECRET_KEY", default="secret")
DEBUG = env.bool("DJANGO_DEBUG", default=False)
ALLOWED_HOSTS = ["localhost", "127.0.0.1"]
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
USE_X_FORWARDED_HOST = True
# Application definition
@@ -159,30 +158,6 @@ AUTH_PASSWORD_VALIDATORS = [
{
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
},
{
"NAME": "api.validators.SpecialCharactersValidator",
"OPTIONS": {
"min_special_characters": 1,
},
},
{
"NAME": "api.validators.UppercaseValidator",
"OPTIONS": {
"min_uppercase": 1,
},
},
{
"NAME": "api.validators.LowercaseValidator",
"OPTIONS": {
"min_lowercase": 1,
},
},
{
"NAME": "api.validators.NumericValidator",
"OPTIONS": {
"min_numeric": 1,
},
},
]
SIMPLE_JWT = {
@@ -273,7 +248,3 @@ X_FRAME_OPTIONS = "DENY"
SECURE_REFERRER_POLICY = "strict-origin-when-cross-origin"
DJANGO_DELETION_BATCH_SIZE = env.int("DJANGO_DELETION_BATCH_SIZE", 5000)
# SAML requirement
CSRF_COOKIE_SECURE = True
SESSION_COOKIE_SECURE = True
+6 -16
View File
@@ -4,7 +4,6 @@ from config.env import env
IGNORED_EXCEPTIONS = [
# Provider is not connected due to credentials errors
"is not connected",
"ProviderConnectionError",
# Authentication Errors from AWS
"InvalidToken",
"AccessDeniedException",
@@ -17,7 +16,7 @@ IGNORED_EXCEPTIONS = [
"InternalServerErrorException",
"AccessDenied",
"No Shodan API Key", # Shodan Check
"RequestLimitExceeded", # For now, we don't want to log the RequestLimitExceeded errors
"RequestLimitExceeded", # For now we don't want to log the RequestLimitExceeded errors
"ThrottlingException",
"Rate exceeded",
"SubscriptionRequiredException",
@@ -43,9 +42,7 @@ IGNORED_EXCEPTIONS = [
"AWSAccessKeyIDInvalidError",
"AWSSessionTokenExpiredError",
"EndpointConnectionError", # AWS Service is not available in a region
# The following comes from urllib3: eu-west-1 -- HTTPClientError[126]: An HTTP Client raised an
# unhandled exception: AWSHTTPSConnectionPool(host='hostname.s3.eu-west-1.amazonaws.com', port=443): Pool is closed.
"Pool is closed",
"Pool is closed", # The following comes from urllib3: eu-west-1 -- HTTPClientError[126]: An HTTP Client raised an unhandled exception: AWSHTTPSConnectionPool(host='hostname.s3.eu-west-1.amazonaws.com', port=443): Pool is closed.
# Authentication Errors from GCP
"ClientAuthenticationError",
"AuthorizationFailed",
@@ -74,7 +71,7 @@ IGNORED_EXCEPTIONS = [
def before_send(event, hint):
"""
before_send handles the Sentry events in order to send them or not
before_send handles the Sentry events in order to sent them or not
"""
# Ignore logs with the ignored_exceptions
# https://docs.python.org/3/library/logging.html#logrecord-objects
@@ -82,16 +79,9 @@ def before_send(event, hint):
log_msg = hint["log_record"].msg
log_lvl = hint["log_record"].levelno
# Handle Error and Critical events and discard the rest
if log_lvl <= 40 and any(ignored in log_msg for ignored in IGNORED_EXCEPTIONS):
return None # Explicitly return None to drop the event
# Ignore exceptions with the ignored_exceptions
if "exc_info" in hint and hint["exc_info"]:
exc_value = str(hint["exc_info"][1])
if any(ignored in exc_value for ignored in IGNORED_EXCEPTIONS):
return None # Explicitly return None to drop the event
# Handle Error events and discard the rest
if log_lvl == 40 and any(ignored in log_msg for ignored in IGNORED_EXCEPTIONS):
return
return event
@@ -25,18 +25,9 @@ SOCIALACCOUNT_EMAIL_AUTHENTICATION = True
SOCIALACCOUNT_EMAIL_AUTHENTICATION_AUTO_CONNECT = True
SOCIALACCOUNT_ADAPTER = "api.adapters.ProwlerSocialAccountAdapter"
# def inline(pem: str) -> str:
# return "".join(
# line.strip()
# for line in pem.splitlines()
# if "CERTIFICATE" not in line and "KEY" not in line
# )
# # SAML keys (TODO: Validate certificates)
# SAML_PUBLIC_CERT = inline(env("SAML_PUBLIC_CERT", default=""))
# SAML_PRIVATE_KEY = inline(env("SAML_PRIVATE_KEY", default=""))
# SAML keys
SAML_PUBLIC_CERT = env("SAML_PUBLIC_CERT", default="")
SAML_PRIVATE_KEY = env("SAML_PRIVATE_KEY", default="")
SOCIALACCOUNT_PROVIDERS = {
"google": {
@@ -69,14 +60,12 @@ SOCIALACCOUNT_PROVIDERS = {
"entity_id": "urn:prowler.com:sp",
},
"advanced": {
# TODO: Validate certificates
# "x509cert": SAML_PUBLIC_CERT,
# "private_key": SAML_PRIVATE_KEY,
# "authn_request_signed": True,
# "want_message_signed": True,
# "want_assertion_signed": True,
"reject_idp_initiated_sso": False,
"x509cert": SAML_PUBLIC_CERT,
"private_key": SAML_PRIVATE_KEY,
"name_id_format": "urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddress",
"authn_request_signed": True,
"want_assertion_signed": True,
"want_message_signed": True,
},
},
}
+2 -95
View File
@@ -23,13 +23,11 @@ from api.models import (
Invitation,
LighthouseConfiguration,
Membership,
Processor,
Provider,
ProviderGroup,
ProviderSecret,
Resource,
ResourceTag,
ResourceTagMapping,
Role,
SAMLConfiguration,
SAMLDomainIndex,
@@ -46,19 +44,12 @@ from api.v1.serializers import TokenSerializer
from prowler.lib.check.models import Severity
from prowler.lib.outputs.finding import Status
TODAY = str(datetime.today().date())
API_JSON_CONTENT_TYPE = "application/vnd.api+json"
NO_TENANT_HTTP_STATUS = status.HTTP_401_UNAUTHORIZED
TEST_USER = "dev@prowler.com"
TEST_PASSWORD = "testing_psswd"
def today_after_n_days(n_days: int) -> str:
return datetime.strftime(
datetime.today().date() + timedelta(days=n_days), "%Y-%m-%d"
)
@pytest.fixture(scope="module")
def enforce_test_user_db_connection(django_db_setup, django_db_blocker):
"""Ensure tests use the test user for database connections."""
@@ -390,27 +381,8 @@ def providers_fixture(tenants_fixture):
tenant_id=tenant.id,
scanner_args={"key1": "value1", "key2": {"key21": "value21"}},
)
provider6 = Provider.objects.create(
provider="m365",
uid="m365.test.com",
alias="m365_testing",
tenant_id=tenant.id,
)
return provider1, provider2, provider3, provider4, provider5, provider6
@pytest.fixture
def processor_fixture(tenants_fixture):
tenant, *_ = tenants_fixture
processor = Processor.objects.create(
tenant_id=tenant.id,
processor_type="mutelist",
configuration="Mutelist:\n Accounts:\n *:\n Checks:\n iam_user_hardware_mfa_enabled:\n "
" Regions:\n - *\n Resources:\n - *",
)
return processor
return provider1, provider2, provider3, provider4, provider5
@pytest.fixture
@@ -662,7 +634,6 @@ def findings_fixture(scans_fixture, resources_fixture):
check_metadata={
"CheckId": "test_check_id",
"Description": "test description apple sauce",
"servicename": "ec2",
},
first_seen_at="2024-01-02T00:00:00Z",
)
@@ -689,7 +660,6 @@ def findings_fixture(scans_fixture, resources_fixture):
check_metadata={
"CheckId": "test_check_id",
"Description": "test description orange juice",
"servicename": "s3",
},
first_seen_at="2024-01-02T00:00:00Z",
muted=True,
@@ -1145,73 +1115,10 @@ def latest_scan_finding(authenticated_client, providers_fixture, resources_fixtu
return finding
@pytest.fixture(scope="function")
def latest_scan_resource(authenticated_client, providers_fixture):
provider = providers_fixture[0]
tenant_id = str(providers_fixture[0].tenant_id)
scan = Scan.objects.create(
name="latest completed scan for resource",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant_id,
)
resource = Resource.objects.create(
tenant_id=tenant_id,
provider=provider,
uid="latest_resource_uid",
name="Latest Resource",
region="us-east-1",
service="ec2",
type="instance",
metadata='{"test": "metadata"}',
details='{"test": "details"}',
)
resource_tag = ResourceTag.objects.create(
tenant_id=tenant_id,
key="environment",
value="test",
)
ResourceTagMapping.objects.create(
tenant_id=tenant_id,
resource=resource,
tag=resource_tag,
)
finding = Finding.objects.create(
tenant_id=tenant_id,
uid="test_finding_uid_latest",
scan=scan,
delta="new",
status=Status.FAIL,
status_extended="test status extended ",
impact=Severity.critical,
impact_extended="test impact extended",
severity=Severity.critical,
raw_result={
"status": Status.FAIL,
"impact": Severity.critical,
"severity": Severity.critical,
},
tags={"test": "latest"},
check_id="test_check_id_latest",
check_metadata={
"CheckId": "test_check_id_latest",
"Description": "test description latest",
},
first_seen_at="2024-01-02T00:00:00Z",
)
finding.add_resources([resource])
backfill_resource_scan_summaries(tenant_id, str(scan.id))
return resource
@pytest.fixture
def saml_setup(tenants_fixture):
tenant_id = tenants_fixture[0].id
domain = "prowler.com"
domain = "example.com"
SAMLDomainIndex.objects.create(email_domain=domain, tenant_id=tenant_id)
+10 -4
View File
@@ -2,10 +2,10 @@ import json
from datetime import datetime, timedelta, timezone
from django_celery_beat.models import IntervalSchedule, PeriodicTask
from rest_framework_json_api.serializers import ValidationError
from tasks.tasks import perform_scheduled_scan_task
from api.db_utils import rls_transaction
from api.exceptions import ConflictException
from api.models import Provider, Scan, StateChoices
@@ -24,9 +24,15 @@ def schedule_provider_scan(provider_instance: Provider):
if PeriodicTask.objects.filter(
interval=schedule, name=task_name, task="scan-perform-scheduled"
).exists():
raise ConflictException(
detail="There is already a scheduled scan for this provider.",
pointer="/data/attributes/provider_id",
raise ValidationError(
[
{
"detail": "There is already a scheduled scan for this provider.",
"status": 400,
"source": {"pointer": "/data/attributes/provider_id"},
"code": "invalid",
}
]
)
with rls_transaction(tenant_id):
-2
View File
@@ -31,7 +31,6 @@ from prowler.lib.outputs.compliance.iso27001.iso27001_gcp import GCPISO27001
from prowler.lib.outputs.compliance.iso27001.iso27001_kubernetes import (
KubernetesISO27001,
)
from prowler.lib.outputs.compliance.iso27001.iso27001_m365 import M365ISO27001
from prowler.lib.outputs.compliance.kisa_ismsp.kisa_ismsp_aws import AWSKISAISMSP
from prowler.lib.outputs.compliance.mitre_attack.mitre_attack_aws import AWSMitreAttack
from prowler.lib.outputs.compliance.mitre_attack.mitre_attack_azure import (
@@ -91,7 +90,6 @@ COMPLIANCE_CLASS_MAP = {
"m365": [
(lambda name: name.startswith("cis_"), M365CIS),
(lambda name: name == "prowler_threatscore_m365", ProwlerThreatScoreM365),
(lambda name: name.startswith("iso27001_"), M365ISO27001),
],
}
+15 -93
View File
@@ -5,8 +5,8 @@ from datetime import datetime, timezone
from celery.utils.log import get_task_logger
from config.settings.celery import CELERY_DEADLOCK_ATTEMPTS
from django.db import IntegrityError, OperationalError, connection
from django.db.models import Case, Count, IntegerField, Prefetch, Sum, When
from django.db import IntegrityError, OperationalError
from django.db.models import Case, Count, IntegerField, Sum, When
from tasks.utils import CustomEncoder
from api.compliance import (
@@ -14,11 +14,9 @@ from api.compliance import (
generate_scan_compliance,
)
from api.db_utils import create_objects_in_batches, rls_transaction
from api.exceptions import ProviderConnectionError
from api.models import (
ComplianceRequirementOverview,
Finding,
Processor,
Provider,
Resource,
ResourceScanSummary,
@@ -28,7 +26,7 @@ from api.models import (
StateChoices,
)
from api.models import StatusChoices as FindingStatus
from api.utils import initialize_prowler_provider, return_prowler_provider
from api.utils import initialize_prowler_provider
from api.v1.serializers import ScanTaskSerializer
from prowler.lib.outputs.finding import Finding as ProwlerFinding
from prowler.lib.scan.scan import Scan as ProwlerScan
@@ -134,28 +132,14 @@ def perform_prowler_scan(
scan_instance.started_at = datetime.now(tz=timezone.utc)
scan_instance.save()
# Find the mutelist processor if it exists
with rls_transaction(tenant_id):
try:
mutelist_processor = Processor.objects.get(
tenant_id=tenant_id, processor_type=Processor.ProcessorChoices.MUTELIST
)
except Processor.DoesNotExist:
mutelist_processor = None
except Exception as e:
logger.error(f"Error processing mutelist rules: {e}")
mutelist_processor = None
try:
with rls_transaction(tenant_id):
try:
prowler_provider = initialize_prowler_provider(
provider_instance, mutelist_processor
)
prowler_provider = initialize_prowler_provider(provider_instance)
provider_instance.connected = True
except Exception as e:
provider_instance.connected = False
exc = ProviderConnectionError(
exc = ValueError(
f"Provider {provider_instance.provider} is not connected: {e}"
)
finally:
@@ -165,8 +149,7 @@ def perform_prowler_scan(
provider_instance.save()
# If the provider is not connected, raise an exception outside the transaction.
# If raised within the transaction, the transaction will be rolled back and the provider will not be marked
# as not connected.
# If raised within the transaction, the transaction will be rolled back and the provider will not be marked as not connected.
if exc:
raise exc
@@ -290,9 +273,6 @@ def perform_prowler_scan(
if not last_first_seen_at:
last_first_seen_at = datetime.now(tz=timezone.utc)
# If the finding is muted at this time the reason must be the configured Mutelist
muted_reason = "Muted by mutelist" if finding.muted else None
# Create the finding
finding_instance = Finding.objects.create(
tenant_id=tenant_id,
@@ -308,7 +288,6 @@ def perform_prowler_scan(
scan=scan_instance,
first_seen_at=last_first_seen_at,
muted=finding.muted,
muted_reason=muted_reason,
compliance=finding.compliance,
)
finding_instance.add_resources([resource_instance])
@@ -376,16 +355,12 @@ def perform_prowler_scan(
def aggregate_findings(tenant_id: str, scan_id: str):
"""
Aggregates findings for a given scan and stores the results in the ScanSummary table.
Also updates the failed_findings_count for each resource based on the latest findings.
This function retrieves all findings associated with a given `scan_id` and calculates various
metrics such as counts of failed, passed, and muted findings, as well as their deltas (new,
changed, unchanged). The results are grouped by `check_id`, `service`, `severity`, and `region`.
These aggregated metrics are then stored in the `ScanSummary` table.
Additionally, it updates the failed_findings_count field for each resource based on the most
recent findings for each finding.uid.
Args:
tenant_id (str): The ID of the tenant to which the scan belongs.
scan_id (str): The ID of the scan for which findings need to be aggregated.
@@ -405,8 +380,6 @@ def aggregate_findings(tenant_id: str, scan_id: str):
- muted_new: Muted findings with a delta of 'new'.
- muted_changed: Muted findings with a delta of 'changed'.
"""
_update_resource_failed_findings_count(tenant_id, scan_id)
with rls_transaction(tenant_id):
findings = Finding.objects.filter(tenant_id=tenant_id, scan_id=scan_id)
@@ -531,48 +504,6 @@ def aggregate_findings(tenant_id: str, scan_id: str):
ScanSummary.objects.bulk_create(scan_aggregations, batch_size=3000)
def _update_resource_failed_findings_count(tenant_id: str, scan_id: str):
"""
Update the failed_findings_count field for resources based on the latest findings.
This function calculates the number of failed findings for each resource by:
1. Getting the latest finding for each finding.uid
2. Counting failed findings per resource
3. Updating the failed_findings_count field for each resource
Args:
tenant_id (str): The ID of the tenant to which the scan belongs.
scan_id (str): The ID of the scan for which to update resource counts.
"""
with rls_transaction(tenant_id):
scan = Scan.objects.get(pk=scan_id)
provider_id = str(scan.provider_id)
with connection.cursor() as cursor:
cursor.execute(
"""
UPDATE resources AS r
SET failed_findings_count = COALESCE((
SELECT COUNT(*) FROM (
SELECT DISTINCT ON (f.uid) f.uid
FROM findings AS f
JOIN resource_finding_mappings AS rfm
ON rfm.finding_id = f.id
WHERE f.tenant_id = %s
AND f.status = %s
AND f.muted = FALSE
AND rfm.resource_id = r.id
ORDER BY f.uid, f.inserted_at DESC
) AS latest_uids
), 0)
WHERE r.tenant_id = %s
AND r.provider_id = %s
""",
[tenant_id, FindingStatus.FAIL, tenant_id, provider_id],
)
def create_compliance_requirements(tenant_id: str, scan_id: str):
"""
Create detailed compliance requirement overview records for a scan.
@@ -595,30 +526,21 @@ def create_compliance_requirements(tenant_id: str, scan_id: str):
with rls_transaction(tenant_id):
scan_instance = Scan.objects.get(pk=scan_id)
provider_instance = scan_instance.provider
prowler_provider = return_prowler_provider(provider_instance)
prowler_provider = initialize_prowler_provider(provider_instance)
# Get check status data by region from findings
findings = (
Finding.all_objects.filter(scan_id=scan_id, muted=False)
.only("id", "check_id", "status")
.prefetch_related(
Prefetch(
"resources",
queryset=Resource.objects.only("id", "region"),
to_attr="small_resources",
)
)
.iterator(chunk_size=1000)
)
check_status_by_region = {}
with rls_transaction(tenant_id):
findings = Finding.objects.filter(scan_id=scan_id, muted=False)
for finding in findings:
for resource in finding.small_resources:
# Get region from resources
for resource in finding.resources.all():
region = resource.region
current_status = check_status_by_region.setdefault(region, {})
if current_status.get(finding.check_id) != "FAIL":
current_status[finding.check_id] = finding.status
region_dict = check_status_by_region.setdefault(region, {})
current_status = region_dict.get(finding.check_id)
if current_status == "FAIL":
continue
region_dict[finding.check_id] = finding.status
try:
# Try to get regions from provider
+19 -25
View File
@@ -37,26 +37,6 @@ from prowler.lib.outputs.finding import Finding as FindingOutput
logger = get_task_logger(__name__)
def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str):
"""
Helper function to perform tasks after a scan is completed.
Args:
tenant_id (str): The tenant ID under which the scan was performed.
scan_id (str): The ID of the scan that was performed.
provider_id (str): The primary key of the Provider instance that was scanned.
"""
create_compliance_requirements_task.apply_async(
kwargs={"tenant_id": tenant_id, "scan_id": scan_id}
)
chain(
perform_scan_summary_task.si(tenant_id=tenant_id, scan_id=scan_id),
generate_outputs_task.si(
scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
),
).apply_async()
@shared_task(base=RLSTask, name="provider-connection-check")
@set_tenant
def check_provider_connection_task(provider_id: str):
@@ -123,7 +103,13 @@ def perform_scan_task(
checks_to_execute=checks_to_execute,
)
_perform_scan_complete_tasks(tenant_id, scan_id, provider_id)
chain(
perform_scan_summary_task.si(tenant_id, scan_id),
create_compliance_requirements_task.si(tenant_id=tenant_id, scan_id=scan_id),
generate_outputs.si(
scan_id=scan_id, provider_id=provider_id, tenant_id=tenant_id
),
).apply_async()
return result
@@ -228,12 +214,20 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
scheduler_task_id=periodic_task_instance.id,
)
_perform_scan_complete_tasks(tenant_id, str(scan_instance.id), provider_id)
chain(
perform_scan_summary_task.si(tenant_id, scan_instance.id),
create_compliance_requirements_task.si(
tenant_id=tenant_id, scan_id=str(scan_instance.id)
),
generate_outputs.si(
scan_id=str(scan_instance.id), provider_id=provider_id, tenant_id=tenant_id
),
).apply_async()
return result
@shared_task(name="scan-summary", queue="overview")
@shared_task(name="scan-summary")
def perform_scan_summary_task(tenant_id: str, scan_id: str):
return aggregate_findings(tenant_id=tenant_id, scan_id=scan_id)
@@ -249,7 +243,7 @@ def delete_tenant_task(tenant_id: str):
queue="scan-reports",
)
@set_tenant(keep_tenant=True)
def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
def generate_outputs(scan_id: str, provider_id: str, tenant_id: str):
"""
Process findings in batches and generate output files in multiple formats.
@@ -387,7 +381,7 @@ def backfill_scan_resource_summaries_task(tenant_id: str, scan_id: str):
return backfill_resource_scan_summaries(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(base=RLSTask, name="scan-compliance-overviews", queue="overview")
@shared_task(base=RLSTask, name="scan-compliance-overviews")
def create_compliance_requirements_task(tenant_id: str, scan_id: str):
"""
Creates detailed compliance requirement records for a scan.
+3 -22
View File
@@ -3,9 +3,9 @@ from unittest.mock import patch
import pytest
from django_celery_beat.models import IntervalSchedule, PeriodicTask
from rest_framework_json_api.serializers import ValidationError
from tasks.beat import schedule_provider_scan
from api.exceptions import ConflictException
from api.models import Scan
@@ -48,29 +48,10 @@ class TestScheduleProviderScan:
with patch("tasks.tasks.perform_scheduled_scan_task.apply_async"):
schedule_provider_scan(provider_instance)
# Now, try scheduling again, should raise ConflictException
with pytest.raises(ConflictException) as exc_info:
# Now, try scheduling again, should raise ValidationError
with pytest.raises(ValidationError) as exc_info:
schedule_provider_scan(provider_instance)
assert "There is already a scheduled scan for this provider." in str(
exc_info.value
)
def test_remove_periodic_task(self, providers_fixture):
provider_instance = providers_fixture[0]
assert Scan.objects.count() == 0
with patch("tasks.tasks.perform_scheduled_scan_task.apply_async"):
schedule_provider_scan(provider_instance)
assert Scan.objects.count() == 1
scan = Scan.objects.first()
periodic_task = scan.scheduler_task
assert periodic_task is not None
periodic_task.delete()
scan.refresh_from_db()
# Assert the scan still exists but its scheduler_task is set to None
# Otherwise, Scan.DoesNotExist would be raised
assert Scan.objects.get(id=scan.id).scheduler_task is None
+562 -134
View File
@@ -7,18 +7,16 @@ import pytest
from tasks.jobs.scan import (
_create_finding_delta,
_store_resources,
_update_resource_failed_findings_count,
create_compliance_requirements,
perform_prowler_scan,
)
from tasks.utils import CustomEncoder
from api.exceptions import ProviderConnectionError
from api.models import (
ComplianceRequirementOverview,
Finding,
Provider,
Resource,
Scan,
Severity,
StateChoices,
StatusChoices,
@@ -160,7 +158,6 @@ class TestPerformScan:
assert scan_finding.raw_result == finding.raw
assert scan_finding.muted
assert scan_finding.compliance == finding.compliance
assert scan_finding.muted_reason == "Muted by mutelist"
assert scan_resource.tenant == tenant
assert scan_resource.uid == finding.resource_uid
@@ -206,7 +203,7 @@ class TestPerformScan:
provider_id = str(provider.id)
checks_to_execute = ["check1", "check2"]
with pytest.raises(ProviderConnectionError):
with pytest.raises(ValueError):
perform_prowler_scan(tenant_id, scan_id, provider_id, checks_to_execute)
scan.refresh_from_db()
@@ -401,13 +398,38 @@ class TestCreateComplianceRequirements:
resources_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant_id = str(tenants_fixture[0].id)
scan_id = str(scans_fixture[0].id)
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = [
"us-east-1",
"us-west-2",
]
mock_initialize_prowler_provider.return_value = (
mock_prowler_provider_instance
)
mock_compliance_template.__getitem__.return_value = {
"cis_1.4_aws": {
@@ -436,29 +458,108 @@ class TestCreateComplianceRequirements:
},
},
},
"aws_account_security_onboarding_aws": {
"framework": "AWS Account Security Onboarding",
"version": "1.0",
"requirements": {
"requirement1": {
"description": "Basic security requirement",
"checks_status": {
"pass": 1,
"fail": 0,
"manual": 0,
"total": 1,
},
"status": "PASS",
},
},
},
}
mock_findings_filter.return_value = []
result = create_compliance_requirements(tenant_id, scan_id)
assert "requirements_created" in result
assert "regions_processed" in result
assert "compliance_frameworks" in result
assert result["regions_processed"] == ["us-east-1", "us-west-2"]
assert result["requirements_created"] == 6
assert len(result["compliance_frameworks"]) == 2
mock_create_objects.assert_called_once()
call_args = mock_create_objects.call_args[0]
assert call_args[0] == tenant_id
assert call_args[1] == ComplianceRequirementOverview
assert len(call_args[2]) == 6
compliance_objects = call_args[2]
for obj in compliance_objects:
assert isinstance(obj, ComplianceRequirementOverview)
assert obj.tenant.id == tenant.id
assert obj.scan == scan
assert obj.region in ["us-east-1", "us-west-2"]
assert obj.compliance_id in [
"cis_1.4_aws",
"aws_account_security_onboarding_aws",
]
def test_create_compliance_requirements_with_findings(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
findings_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant_id = str(tenants_fixture[0].id)
scan_id = str(scans_fixture[0].id)
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_finding1 = MagicMock()
mock_finding1.check_id = "check1"
mock_finding1.status = "PASS"
mock_resource1 = MagicMock()
mock_resource1.region = "us-east-1"
mock_finding1.resources.all.return_value = [mock_resource1]
mock_finding2 = MagicMock()
mock_finding2.check_id = "check2"
mock_finding2.status = "FAIL"
mock_resource2 = MagicMock()
mock_resource2.region = "us-west-2"
mock_finding2.resources.all.return_value = [mock_resource2]
mock_findings_filter.return_value = [mock_finding1, mock_finding2]
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = [
"us-east-1",
"us-west-2",
]
mock_initialize_prowler_provider.return_value = (
mock_prowler_provider_instance
)
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
@@ -467,6 +568,7 @@ class TestCreateComplianceRequirements:
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 1,
@@ -475,26 +577,45 @@ class TestCreateComplianceRequirements:
},
"status": "FAIL",
},
"req_2": {
"description": "Test Requirement 2",
"checks": {"check_2": None},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
},
},
}
}
result = create_compliance_requirements(tenant_id, scan_id)
assert "requirements_created" in result
mock_findings_filter.assert_called_once_with(scan_id=scan_id, muted=False)
assert mock_generate_compliance.call_count == 2
assert result["requirements_created"] == 4
assert set(result["regions_processed"]) == {"us-east-1", "us-west-2"}
def test_create_compliance_requirements_kubernetes_provider(
def test_create_compliance_requirements_no_provider_regions(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
findings_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
@@ -508,6 +629,22 @@ class TestCreateComplianceRequirements:
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_finding = MagicMock()
mock_finding.check_id = "check1"
mock_finding.status = "PASS"
mock_resource = MagicMock()
mock_resource.region = "default"
mock_finding.resources.all.return_value = [mock_resource]
mock_findings_filter.return_value = [mock_finding]
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.side_effect = AttributeError(
"No get_regions method"
)
mock_initialize_prowler_provider.return_value = (
mock_prowler_provider_instance
)
mock_compliance_template.__getitem__.return_value = {
"kubernetes_cis": {
"framework": "CIS Kubernetes Benchmark",
@@ -529,61 +666,205 @@ class TestCreateComplianceRequirements:
result = create_compliance_requirements(tenant_id, scan_id)
assert "regions_processed" in result
assert result["regions_processed"] == ["default"]
def test_create_compliance_requirements_empty_template(
def test_create_compliance_requirements_empty_findings(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
findings_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
):
tenant_id = str(tenants_fixture[0].id)
scan_id = str(scans_fixture[0].id)
mock_compliance_template.__getitem__.return_value = {}
result = create_compliance_requirements(tenant_id, scan_id)
assert result["requirements_created"] == 0
def test_create_compliance_requirements_error_handling(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
findings_fixture,
):
with patch("tasks.jobs.scan.return_prowler_provider") as mock_prowler_provider:
tenant_id = str(tenants_fixture[0].id)
scan_id = str(scans_fixture[0].id)
mock_prowler_provider.side_effect = Exception(
"Provider initialization failed"
)
with pytest.raises(Exception, match="Provider initialization failed"):
create_compliance_requirements(tenant_id, scan_id)
def test_create_compliance_requirements_check_status_priority(
self, tenants_fixture, scans_fixture, providers_fixture, findings_fixture
):
with (
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant_id = str(tenants_fixture[0].id)
scan_id = str(scans_fixture[0].id)
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_findings_filter.return_value = []
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = ["us-east-1"]
mock_initialize_prowler_provider.return_value = (
mock_prowler_provider_instance
)
mock_compliance_template.__getitem__.return_value = {
"cis_1.4_aws": {
"framework": "CIS AWS Foundations Benchmark",
"version": "1.4.0",
"requirements": {
"1.1": {
"description": "Test requirement",
"checks_status": {
"pass": 0,
"fail": 0,
"manual": 0,
"total": 1,
},
"status": "PASS",
},
},
},
}
mock_findings_filter.return_value = []
result = create_compliance_requirements(tenant_id, scan_id)
assert result["regions_processed"] == ["us-east-1"]
assert result["requirements_created"] == 1
mock_generate_compliance.assert_not_called()
def test_create_compliance_requirements_error_handling(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_initialize_prowler_provider.side_effect = Exception(
"Provider initialization failed"
)
with pytest.raises(Exception, match="Provider initialization failed"):
create_compliance_requirements(tenant_id, scan_id)
def test_create_compliance_requirements_muted_findings_excluded(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_findings_filter.return_value = []
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = ["us-east-1"]
mock_initialize_prowler_provider.return_value = (
mock_prowler_provider_instance
)
mock_compliance_template.__getitem__.return_value = {}
mock_findings_filter.return_value = []
create_compliance_requirements(tenant_id, scan_id)
mock_findings_filter.assert_called_once_with(scan_id=scan_id, muted=False)
def test_create_compliance_requirements_check_status_priority(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches"),
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan.provider = provider
scan.save()
tenant_id = str(tenant.id)
scan_id = str(scan.id)
mock_finding1 = MagicMock()
mock_finding1.check_id = "check1"
mock_finding1.status = "PASS"
mock_resource1 = MagicMock()
mock_resource1.region = "us-east-1"
mock_finding1.resources.all.return_value = [mock_resource1]
mock_finding2 = MagicMock()
mock_finding2.check_id = "check1"
mock_finding2.status = "FAIL"
mock_resource2 = MagicMock()
mock_resource2.region = "us-east-1"
mock_finding2.resources.all.return_value = [mock_resource2]
mock_findings_filter.return_value = [mock_finding1, mock_finding2]
mock_prowler_provider_instance = MagicMock()
mock_prowler_provider_instance.get_regions.return_value = ["us-east-1"]
mock_initialize_prowler_provider.return_value = (
mock_prowler_provider_instance
)
mock_compliance_template.__getitem__.return_value = {
"cis_1.4_aws": {
@@ -608,21 +889,39 @@ class TestCreateComplianceRequirements:
assert mock_generate_compliance.call_count == 1
def test_create_compliance_requirements_multiple_regions(
def test_compliance_overview_aggregation_requirement_fail_priority(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
findings_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant_id = str(tenants_fixture[0].id)
scan_id = str(scans_fixture[0].id)
tenant = tenants_fixture[0]
scan = scans_fixture[0]
providers_fixture[0]
mock_findings_filter.return_value = []
mock_prowler_provider = MagicMock()
mock_prowler_provider.get_regions.return_value = [
"us-east-1",
"us-west-2",
"eu-west-1",
]
mock_initialize_prowler_provider.return_value = mock_prowler_provider
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
@@ -631,6 +930,95 @@ class TestCreateComplianceRequirements:
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 1,
"manual": 0,
"total": 3,
},
"status": "FAIL",
}
},
}
}
mock_generate_compliance.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {
"check_1": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "FAIL"},
"eu-west-1": {"status": "PASS"},
}
},
"checks_status": {
"pass": 2,
"fail": 1,
"manual": 0,
"total": 3,
},
"status": "FAIL",
}
},
}
}
created_objects = []
mock_create_objects.side_effect = (
lambda tenant_id, model, objs, batch_size=500: created_objects.extend(
objs
)
)
create_compliance_requirements(str(tenant.id), str(scan.id))
assert len(created_objects) == 3
assert all(obj.requirement_status == "FAIL" for obj in created_objects)
def test_compliance_overview_aggregation_requirement_pass_all_regions(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant = tenants_fixture[0]
scan = scans_fixture[0]
providers_fixture[0]
mock_findings_filter.return_value = []
mock_prowler_provider = MagicMock()
mock_prowler_provider.get_regions.return_value = ["us-east-1", "us-west-2"]
mock_initialize_prowler_provider.return_value = mock_prowler_provider
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 0,
@@ -643,26 +1031,72 @@ class TestCreateComplianceRequirements:
}
}
result = create_compliance_requirements(tenant_id, scan_id)
mock_generate_compliance.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {
"check_1": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "PASS"},
}
},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
}
},
}
}
assert "requirements_created" in result
assert len(result["regions_processed"]) >= 0
created_objects = []
mock_create_objects.side_effect = (
lambda tenant_id, model, objs, batch_size=500: created_objects.extend(
objs
)
)
def test_create_compliance_requirements_mixed_status_requirements(
create_compliance_requirements(str(tenant.id), str(scan.id))
assert len(created_objects) == 2
assert all(obj.requirement_status == "PASS" for obj in created_objects)
def test_compliance_overview_aggregation_multiple_requirements_mixed_status(
self,
tenants_fixture,
scans_fixture,
providers_fixture,
findings_fixture,
):
with (
patch("api.db_utils.rls_transaction"),
patch(
"tasks.jobs.scan.initialize_prowler_provider"
) as mock_initialize_prowler_provider,
patch(
"tasks.jobs.scan.PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE"
) as mock_compliance_template,
patch("tasks.jobs.scan.generate_scan_compliance"),
patch(
"tasks.jobs.scan.generate_scan_compliance"
) as mock_generate_compliance,
patch("tasks.jobs.scan.create_objects_in_batches") as mock_create_objects,
patch("api.models.Finding.objects.filter") as mock_findings_filter,
):
tenant_id = str(tenants_fixture[0].id)
scan_id = str(scans_fixture[0].id)
tenant = tenants_fixture[0]
scan = scans_fixture[0]
providers_fixture[0]
mock_findings_filter.return_value = []
mock_prowler_provider = MagicMock()
mock_prowler_provider.get_regions.return_value = ["us-east-1", "us-west-2"]
mock_initialize_prowler_provider.return_value = mock_prowler_provider
mock_compliance_template.__getitem__.return_value = {
"test_compliance": {
@@ -671,6 +1105,7 @@ class TestCreateComplianceRequirements:
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {"check_1": None},
"checks_status": {
"pass": 2,
"fail": 0,
@@ -681,6 +1116,7 @@ class TestCreateComplianceRequirements:
},
"req_2": {
"description": "Test Requirement 2",
"checks": {"check_2": None},
"checks_status": {
"pass": 1,
"fail": 1,
@@ -693,72 +1129,64 @@ class TestCreateComplianceRequirements:
}
}
result = create_compliance_requirements(tenant_id, scan_id)
mock_generate_compliance.return_value = {
"test_compliance": {
"framework": "Test Framework",
"version": "1.0",
"requirements": {
"req_1": {
"description": "Test Requirement 1",
"checks": {
"check_1": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "PASS"},
}
},
"checks_status": {
"pass": 2,
"fail": 0,
"manual": 0,
"total": 2,
},
"status": "PASS",
},
"req_2": {
"description": "Test Requirement 2",
"checks": {
"check_2": {
"us-east-1": {"status": "PASS"},
"us-west-2": {"status": "FAIL"},
}
},
"checks_status": {
"pass": 1,
"fail": 1,
"manual": 0,
"total": 2,
},
"status": "FAIL",
},
},
}
}
assert "requirements_created" in result
assert result["requirements_created"] >= 0
@pytest.mark.django_db
class TestUpdateResourceFailedFindingsCount:
def test_execute_sql_update(
self, tenants_fixture, scans_fixture, providers_fixture, resources_fixture
):
resource = resources_fixture[0]
tenant_id = resource.tenant_id
scan_id = resource.provider.scans.first().id
# Common kwargs for all failing findings
base_kwargs = {
"tenant_id": tenant_id,
"scan_id": scan_id,
"delta": None,
"status": StatusChoices.FAIL,
"status_extended": "test status extended",
"impact": Severity.critical,
"impact_extended": "test impact extended",
"severity": Severity.critical,
"raw_result": {
"status": StatusChoices.FAIL,
"impact": Severity.critical,
"severity": Severity.critical,
},
"tags": {"test": "dev-qa"},
"check_id": "test_check_id",
"check_metadata": {
"CheckId": "test_check_id",
"Description": "test description apple sauce",
"servicename": "ec2",
},
"first_seen_at": "2024-01-02T00:00:00Z",
}
# UIDs to create (two with same UID, one unique)
uids = ["test_finding_uid_1", "test_finding_uid_1", "test_finding_uid_2"]
# Create findings and associate with the resource
for uid in uids:
finding = Finding.objects.create(uid=uid, **base_kwargs)
finding.add_resources([resource])
resource.refresh_from_db()
assert resource.failed_findings_count == 0
_update_resource_failed_findings_count(tenant_id=tenant_id, scan_id=scan_id)
resource.refresh_from_db()
# Only two since two findings share the same UID
assert resource.failed_findings_count == 2
@patch("tasks.jobs.scan.Scan.objects.get")
def test_scan_not_found(
self,
mock_scan_get,
):
mock_scan_get.side_effect = Scan.DoesNotExist
with pytest.raises(Scan.DoesNotExist):
_update_resource_failed_findings_count(
"8614ca97-8370-4183-a7f7-e96a6c7d2c93",
"4705bed5-8782-4e8b-bab6-55e8043edaa6",
created_objects = []
mock_create_objects.side_effect = (
lambda tenant_id, model, objs, batch_size=500: created_objects.extend(
objs
)
)
create_compliance_requirements(str(tenant.id), str(scan.id))
assert len(created_objects) == 4
req_1_objects = [
obj for obj in created_objects if obj.requirement_id == "req_1"
]
req_2_objects = [
obj for obj in created_objects if obj.requirement_id == "req_2"
]
assert len(req_1_objects) == 2
assert len(req_2_objects) == 2
assert all(obj.requirement_status == "PASS" for obj in req_1_objects)
assert all(obj.requirement_status == "FAIL" for obj in req_2_objects)
+8 -31
View File
@@ -3,10 +3,9 @@ from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from tasks.tasks import _perform_scan_complete_tasks, generate_outputs_task
from tasks.tasks import generate_outputs
# TODO Move this to outputs/reports jobs
@pytest.mark.django_db
class TestGenerateOutputs:
def setup_method(self):
@@ -18,7 +17,7 @@ class TestGenerateOutputs:
with patch("tasks.tasks.ScanSummary.objects.filter") as mock_filter:
mock_filter.return_value.exists.return_value = False
result = generate_outputs_task(
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
@@ -100,7 +99,7 @@ class TestGenerateOutputs:
mock_compress.return_value = "/tmp/zipped.zip"
mock_upload.return_value = "s3://bucket/zipped.zip"
result = generate_outputs_task(
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
@@ -151,7 +150,7 @@ class TestGenerateOutputs:
True,
]
result = generate_outputs_task(
result = generate_outputs(
scan_id="scan",
provider_id="provider",
tenant_id=self.tenant_id,
@@ -209,7 +208,7 @@ class TestGenerateOutputs:
{"aws": [(lambda x: True, MagicMock())]},
),
):
generate_outputs_task(
generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
@@ -277,7 +276,7 @@ class TestGenerateOutputs:
}
},
):
result = generate_outputs_task(
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
@@ -347,7 +346,7 @@ class TestGenerateOutputs:
):
mock_summary.return_value.exists.return_value = True
result = generate_outputs_task(
result = generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
@@ -408,31 +407,9 @@ class TestGenerateOutputs:
),
):
with caplog.at_level("ERROR"):
generate_outputs_task(
generate_outputs(
scan_id=self.scan_id,
provider_id=self.provider_id,
tenant_id=self.tenant_id,
)
assert "Error deleting output files" in caplog.text
class TestScanCompleteTasks:
@patch("tasks.tasks.create_compliance_requirements_task.apply_async")
@patch("tasks.tasks.perform_scan_summary_task.si")
@patch("tasks.tasks.generate_outputs_task.si")
def test_scan_complete_tasks(
self, mock_outputs_task, mock_scan_summary_task, mock_compliance_tasks
):
_perform_scan_complete_tasks("tenant-id", "scan-id", "provider-id")
mock_compliance_tasks.assert_called_once_with(
kwargs={"tenant_id": "tenant-id", "scan_id": "scan-id"},
)
mock_scan_summary_task.assert_called_once_with(
scan_id="scan-id",
tenant_id="tenant-id",
)
mock_outputs_task.assert_called_once_with(
scan_id="scan-id",
provider_id="provider-id",
tenant_id="tenant-id",
)
@@ -1,128 +0,0 @@
import random
from collections import defaultdict
import requests
from locust import events, task
from utils.helpers import APIUserBase, get_api_token, get_auth_headers
GLOBAL = {
"token": None,
"available_scans_info": {},
}
SUPPORTED_COMPLIANCE_IDS = {
"aws": ["ens_rd2022", "cis_2.0", "prowler_threatscore", "soc2"],
"gcp": ["ens_rd2022", "cis_2.0", "prowler_threatscore", "soc2"],
"azure": ["ens_rd2022", "cis_2.0", "prowler_threatscore", "soc2"],
"m365": ["cis_4.0", "iso27001_2022", "prowler_threatscore"],
}
def _get_random_scan() -> tuple:
provider_type = random.choice(list(GLOBAL["available_scans_info"].keys()))
scan_info = random.choice(GLOBAL["available_scans_info"][provider_type])
return provider_type, scan_info
def _get_random_compliance_id(provider: str) -> str:
return f"{random.choice(SUPPORTED_COMPLIANCE_IDS[provider])}_{provider}"
def _get_compliance_available_scans_by_provider_type(host: str, token: str) -> dict:
excluded_providers = ["kubernetes"]
response_dict = defaultdict(list)
provider_response = requests.get(
f"{host}/providers?fields[providers]=id,provider&filter[connected]=true",
headers=get_auth_headers(token),
)
for provider in provider_response.json()["data"]:
provider_id = provider["id"]
provider_type = provider["attributes"]["provider"]
if provider_type in excluded_providers:
continue
scan_response = requests.get(
f"{host}/scans?fields[scans]=id&filter[provider]={provider_id}&filter[state]=completed",
headers=get_auth_headers(token),
)
scan_data = scan_response.json()["data"]
if not scan_data:
continue
scan_id = scan_data[0]["id"]
response_dict[provider_type].append(scan_id)
return response_dict
def _get_compliance_regions_from_scan(host: str, token: str, scan_id: str) -> list:
response = requests.get(
f"{host}/compliance-overviews/metadata?filter[scan_id]={scan_id}",
headers=get_auth_headers(token),
)
assert response.status_code == 200, f"Failed to get scan: {response.text}"
return response.json()["data"]["attributes"]["regions"]
@events.test_start.add_listener
def on_test_start(environment, **kwargs):
GLOBAL["token"] = get_api_token(environment.host)
scans_by_provider = _get_compliance_available_scans_by_provider_type(
environment.host, GLOBAL["token"]
)
scan_info = defaultdict(list)
for provider, scans in scans_by_provider.items():
for scan in scans:
scan_info[provider].append(
{
"scan_id": scan,
"regions": _get_compliance_regions_from_scan(
environment.host, GLOBAL["token"], scan
),
}
)
GLOBAL["available_scans_info"] = scan_info
class APIUser(APIUserBase):
def on_start(self):
self.token = GLOBAL["token"]
@task(3)
def compliance_overviews_default(self):
provider_type, scan_info = _get_random_scan()
name = f"/compliance-overviews ({provider_type})"
endpoint = f"/compliance-overviews?" f"filter[scan_id]={scan_info['scan_id']}"
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(2)
def compliance_overviews_region(self):
provider_type, scan_info = _get_random_scan()
name = f"/compliance-overviews?filter[region] ({provider_type})"
endpoint = (
f"/compliance-overviews"
f"?filter[scan_id]={scan_info['scan_id']}"
f"&filter[region]={random.choice(scan_info['regions'])}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(2)
def compliance_overviews_requirements(self):
provider_type, scan_info = _get_random_scan()
compliance_id = _get_random_compliance_id(provider_type)
name = f"/compliance-overviews/requirements ({compliance_id})"
endpoint = (
f"/compliance-overviews/requirements"
f"?filter[scan_id]={scan_info['scan_id']}"
f"&filter[compliance_id]={compliance_id}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task
def compliance_overviews_attributes(self):
provider_type, _ = _get_random_scan()
compliance_id = _get_random_compliance_id(provider_type)
name = f"/compliance-overviews/attributes ({compliance_id})"
endpoint = (
f"/compliance-overviews/attributes"
f"?filter[compliance_id]={compliance_id}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@@ -0,0 +1,71 @@
from locust import task, events
from utils.helpers import (
APIUserBase,
get_api_token,
get_auth_headers,
get_sort_value,
get_available_resource_filters,
get_next_resource_filter,
)
from utils.config import (
RESOURCES_UI_SORT_VALUES,
)
GLOBAL = {"token": None, "resource_ids": [], "resource_filters": None}
@events.test_start.add_listener
def on_test_start(environment, **kwargs):
GLOBAL["token"] = get_api_token(environment.host)
GLOBAL["resource_filters"] = get_available_resource_filters(
environment.host, GLOBAL["token"]
)
class ResourceUser(APIUserBase):
def on_start(self):
self.token = GLOBAL["token"]
self.headers = get_auth_headers(self.token)
self.available_resource_filters = GLOBAL["resource_filters"]
@task
def resources_default(self):
name = "GET /resources"
page_number = self._next_page(name)
endpoint = (
f"/resources?page[number]={page_number}"
f"&{get_sort_value(RESOURCES_UI_SORT_VALUES)}"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(3)
def resource_with_include(self):
name = "GET /resources (with include)"
page = self._next_page(name)
endpoint = (
f"/resources?page[number]={page}"
f"&{get_sort_value(RESOURCES_UI_SORT_VALUES)}"
f"&include=findings,provider"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(2)
def resource_filter(self):
name = "GET /resources (random filter)"
filter_type, filter_value = get_next_resource_filter(
self.available_resource_filters
)
endpoint = f"/resources?filter[{filter_type}]={filter_value}"
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
@task(2)
def resource_filter_with_include(self):
name = "GET /resources (random filter + include)"
filter_type, filter_value = get_next_resource_filter(
self.available_resource_filters
)
endpoint = (
f"/resources?filter[{filter_type}]={filter_value}"
f"&include=findings,provider"
)
self.client.get(endpoint, headers=get_auth_headers(self.token), name=name)
+3
View File
@@ -17,3 +17,6 @@ FINDINGS_RESOURCE_METADATA = {
S_PROVIDER_NAME = "provider-50k"
M_PROVIDER_NAME = "provider-250k"
L_PROVIDER_NAME = "provider-500k"
RESOURCES_UI_SORT_VALUES = ["name", "region", "inserted_at"]
RESOURCE_INSERTED_AT = os.environ.get("RESOURCE_INSERTED_AT", "2025-04-14")
+39
View File
@@ -166,3 +166,42 @@ def get_sort_value(sort_values: list) -> str:
str: A formatted sort query string (e.g., "sort=created_at,-severity").
"""
return f"sort={','.join(sort_values)}"
def get_available_resource_filters(host: str, token: str) -> dict:
"""
Fetches and returns available resource filter values from the API.
Args:
host (str): The host URL of the API.
token (str): Bearer token for authentication.
Returns:
dict: A dictionary containing lists of unique values for each resource filter type.
Example:
{
"service": ["ec2", "s3", "rds"],
"type": ["instance", "bucket"],
"region": ["us-east-1", "us-west-2"]
}
Raises:
AssertionError: If the API request fails or does not return a 200 status code.
"""
url = f"{host}/resources"
params = {"fields[resources]": "type,region,service"}
response = requests.get(url, headers=get_auth_headers(token), params=params)
assert response.status_code == 200, f"Failed to fetch filters: {response.text}"
resources = response.json()["data"]
filters = {"service": set(), "type": set(), "region": set()}
for res in resources:
attr = res["attributes"]
filters["service"].add(attr["service"])
filters["type"].add(attr["type"])
filters["region"].add(attr["region"])
return {k: list(v) for k, v in filters.items()}
@@ -1,23 +0,0 @@
import warnings
from dashboard.common_methods import get_section_container_iso
warnings.filterwarnings("ignore")
def get_table(data):
aux = data[
[
"REQUIREMENTS_ATTRIBUTES_CATEGORY",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID",
"REQUIREMENTS_ATTRIBUTES_OBJETIVE_NAME",
"CHECKID",
"STATUS",
"REGION",
"ACCOUNTID",
"RESOURCEID",
]
]
return get_section_container_iso(
aux, "REQUIREMENTS_ATTRIBUTES_CATEGORY", "REQUIREMENTS_ATTRIBUTES_OBJETIVE_ID"
)
+1 -4
View File
@@ -4,10 +4,7 @@ from dash import html
def create_provider_card(
provider: str,
provider_logo: str,
account_type: str,
filtered_data,
provider: str, provider_logo: str, account_type: str, filtered_data
) -> List[html.Div]:
"""
Card to display the provider's name and icon.
-25
View File
@@ -245,31 +245,6 @@ def create_service_dropdown(services: list) -> html.Div:
)
def create_provider_dropdown(providers: list) -> html.Div:
"""
Dropdown to select the provider.
Args:
providers (list): List of providers.
Returns:
html.Div: Dropdown to select the provider.
"""
return html.Div(
[
html.Label(
"Provider:", className="text-prowler-stone-900 font-bold text-sm"
),
dcc.Dropdown(
id="provider-filter",
options=[{"label": i, "value": i} for i in providers],
value=["All"],
clearable=False,
multi=True,
style={"color": "#000000"},
),
],
)
def create_status_dropdown(status: list) -> html.Div:
"""
Dropdown to select the status.
+2 -5
View File
@@ -9,11 +9,9 @@ def create_layout_overview(
download_button_xlsx: html.Button,
severity_dropdown: html.Div,
service_dropdown: html.Div,
provider_dropdown: html.Div,
table_row_dropdown: html.Div,
status_dropdown: html.Div,
table_div_header: html.Div,
amount_providers: int,
) -> html.Div:
"""
Create the layout of the dashboard.
@@ -49,10 +47,9 @@ def create_layout_overview(
[
html.Div([severity_dropdown], className=""),
html.Div([service_dropdown], className=""),
html.Div([provider_dropdown], className=""),
html.Div([status_dropdown], className=""),
],
className="grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-4",
className="grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-3",
),
html.Div(
[
@@ -62,7 +59,7 @@ def create_layout_overview(
html.Div(className="flex", id="k8s_card", n_clicks=0),
html.Div(className="flex", id="m365_card", n_clicks=0),
],
className=f"grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-{amount_providers}",
className="grid gap-x-4 mb-[30px] sm:grid-cols-2 lg:grid-cols-5",
),
html.H4(
"Count of Findings by severity",
+20 -13
View File
@@ -346,27 +346,34 @@ def display_data(
if item == "nan" or item.__class__.__name__ != "str":
region_filter_options.remove(item)
# Convert ASSESSMENTDATE to datetime
data["ASSESSMENTDATE"] = pd.to_datetime(data["ASSESSMENTDATE"], errors="coerce")
data["ASSESSMENTDAY"] = data["ASSESSMENTDATE"].dt.date
data["ASSESSMENTDATE"] = data["ASSESSMENTDATE"].dt.strftime("%Y-%m-%d %H:%M:%S")
# Find the latest timestamp per account per day
latest_per_account_day = data.groupby(["ACCOUNTID", "ASSESSMENTDAY"])[
"ASSESSMENTDATE"
].transform("max")
# Choosing the date that is the most recent
data_values = data["ASSESSMENTDATE"].unique()
data_values.sort()
data_values = data_values[::-1]
aux = []
# Keep only rows with the latest timestamp for each account and day
data = data[data["ASSESSMENTDATE"] == latest_per_account_day]
data_values = [str(i) for i in data_values]
for value in data_values:
if value.split(" ")[0] not in [aux[i].split(" ")[0] for i in range(len(aux))]:
aux.append(value)
data_values = [str(i) for i in aux]
# Prepare the date filter options (unique days, as strings)
options_date = sorted(data["ASSESSMENTDAY"].astype(str).unique(), reverse=True)
data = data[data["ASSESSMENTDATE"].isin(data_values)]
data["ASSESSMENTDATE"] = data["ASSESSMENTDATE"].apply(lambda x: x.split(" ")[0])
# Filter by selected date (as string)
options_date = data["ASSESSMENTDATE"].unique()
options_date.sort()
options_date = options_date[::-1]
# Filter DATE
if date_filter_analytics in options_date:
data = data[data["ASSESSMENTDAY"].astype(str) == date_filter_analytics]
data = data[data["ASSESSMENTDATE"] == date_filter_analytics]
else:
date_filter_analytics = options_date[0]
data = data[data["ASSESSMENTDAY"].astype(str) == date_filter_analytics]
data = data[data["ASSESSMENTDATE"] == date_filter_analytics]
if data.empty:
fig = px.pie()
+39 -88
View File
@@ -1,4 +1,5 @@
# Standard library imports
import csv
import glob
import json
import os
@@ -19,6 +20,7 @@ from dash.dependencies import Input, Output
# Config import
from dashboard.config import (
critical_color,
encoding_format,
fail_color,
folder_path_overview,
high_color,
@@ -36,7 +38,6 @@ from dashboard.lib.cards import create_provider_card
from dashboard.lib.dropdowns import (
create_account_dropdown,
create_date_dropdown,
create_provider_dropdown,
create_region_dropdown,
create_service_dropdown,
create_severity_dropdown,
@@ -44,7 +45,6 @@ from dashboard.lib.dropdowns import (
create_table_row_dropdown,
)
from dashboard.lib.layouts import create_layout_overview
from prowler.lib.logger import logger
# Suppress warnings
warnings.filterwarnings("ignore")
@@ -54,13 +54,11 @@ warnings.filterwarnings("ignore")
csv_files = []
for file in glob.glob(os.path.join(folder_path_overview, "*.csv")):
try:
df = pd.read_csv(file, sep=";")
num_rows = len(df)
with open(file, "r", newline="", encoding=encoding_format) as csvfile:
reader = csv.reader(csvfile)
num_rows = sum(1 for row in reader)
if num_rows > 1:
csv_files.append(file)
except Exception:
logger.error(f"Error reading file {file}")
# Import logos providers
@@ -192,13 +190,7 @@ else:
data.rename(columns={"RESOURCE_ID": "RESOURCE_UID"}, inplace=True)
# Remove dupplicates on the finding_uid colummn but keep the last one taking into account the timestamp
data["DATE"] = data["TIMESTAMP"].dt.date
data = (
data.sort_values("TIMESTAMP")
.groupby(["DATE", "FINDING_UID"], as_index=False)
.last()
)
data["TIMESTAMP"] = pd.to_datetime(data["TIMESTAMP"])
data = data.sort_values("TIMESTAMP").drop_duplicates("FINDING_UID", keep="last")
data["ASSESSMENT_TIME"] = data["TIMESTAMP"].dt.strftime("%Y-%m-%d")
data_valid = pd.DataFrame()
@@ -306,13 +298,6 @@ else:
service_dropdown = create_service_dropdown(services)
# Provider Dropdown
providers = ["All"] + list(data["PROVIDER"].unique())
providers = [
x for x in providers if str(x) != "nan" and x.__class__.__name__ == "str"
]
provider_dropdown = create_provider_dropdown(providers)
# Create the download button
download_button_csv = html.Button(
"Download this table as CSV",
@@ -494,11 +479,9 @@ else:
download_button_xlsx,
severity_dropdown,
service_dropdown,
provider_dropdown,
table_row_dropdown,
status_dropdown,
table_div_header,
len(data["PROVIDER"].unique()),
)
@@ -525,8 +508,6 @@ else:
Output("severity-filter", "value"),
Output("severity-filter", "options"),
Output("service-filter", "value"),
Output("provider-filter", "value"),
Output("provider-filter", "options"),
Output("service-filter", "options"),
Output("table-rows", "value"),
Output("table-rows", "options"),
@@ -545,7 +526,6 @@ else:
Input("download_link_xlsx", "n_clicks"),
Input("severity-filter", "value"),
Input("service-filter", "value"),
Input("provider-filter", "value"),
Input("table-rows", "value"),
Input("status-filter", "value"),
Input("search-input", "value"),
@@ -569,7 +549,6 @@ def filter_data(
n_clicks_xlsx,
severity_values,
service_values,
provider_values,
table_row_values,
status_values,
search_value,
@@ -895,25 +874,6 @@ def filter_data(
filtered_data["SERVICE_NAME"].isin(updated_service_values)
]
provider_filter_options = ["All"] + list(filtered_data["PROVIDER"].unique())
# Filter Provider
if provider_values == ["All"]:
updated_provider_values = filtered_data["PROVIDER"].unique()
elif "All" in provider_values and len(provider_values) > 1:
# Remove 'All' from the list
provider_values.remove("All")
updated_provider_values = provider_values
elif len(provider_values) == 0:
updated_provider_values = filtered_data["PROVIDER"].unique()
provider_values = ["All"]
else:
updated_provider_values = provider_values
filtered_data = filtered_data[
filtered_data["PROVIDER"].isin(updated_provider_values)
]
# Filter Status
if status_values == ["All"]:
updated_status_values = filtered_data["STATUS"].unique()
@@ -1134,17 +1094,25 @@ def filter_data(
table_row_options = []
# Calculate table row options as percentages
percentages = [0.05, 0.10, 0.25, 0.50, 0.75, 1.0]
total_rows = len(filtered_data)
for pct in percentages:
value = max(1, int(total_rows * pct))
label = f"{int(pct * 100)}%"
table_row_options.append({"label": label, "value": value})
# Default to 25% if not set
# Take the values from the table_row_values
if table_row_values is None or table_row_values == -1:
table_row_values = table_row_options[0]["value"]
if len(filtered_data) < 25:
table_row_values = len(filtered_data)
else:
table_row_values = 25
if len(filtered_data) < 25:
table_row_values = len(filtered_data)
if len(filtered_data) >= 25:
table_row_options.append(25)
if len(filtered_data) >= 50:
table_row_options.append(50)
if len(filtered_data) >= 75:
table_row_options.append(75)
if len(filtered_data) >= 100:
table_row_options.append(100)
table_row_options.append(len(filtered_data))
# For the values that are nan or none, replace them with ""
filtered_data = filtered_data.replace({np.nan: ""})
@@ -1379,36 +1347,21 @@ def filter_data(
]
# Create Provider Cards
if "aws" in list(data["PROVIDER"].unique()):
aws_card = create_provider_card(
"aws", aws_provider_logo, "Accounts", full_filtered_data
)
else:
aws_card = None
if "azure" in list(data["PROVIDER"].unique()):
azure_card = create_provider_card(
"azure", azure_provider_logo, "Subscriptions", full_filtered_data
)
else:
azure_card = None
if "gcp" in list(data["PROVIDER"].unique()):
gcp_card = create_provider_card(
"gcp", gcp_provider_logo, "Projects", full_filtered_data
)
else:
gcp_card = None
if "kubernetes" in list(data["PROVIDER"].unique()):
k8s_card = create_provider_card(
"kubernetes", ks8_provider_logo, "Clusters", full_filtered_data
)
else:
k8s_card = None
if "m365" in list(data["PROVIDER"].unique()):
m365_card = create_provider_card(
"m365", m365_provider_logo, "Accounts", full_filtered_data
)
else:
m365_card = None
aws_card = create_provider_card(
"aws", aws_provider_logo, "Accounts", full_filtered_data
)
azure_card = create_provider_card(
"azure", azure_provider_logo, "Subscriptions", full_filtered_data
)
gcp_card = create_provider_card(
"gcp", gcp_provider_logo, "Projects", full_filtered_data
)
k8s_card = create_provider_card(
"kubernetes", ks8_provider_logo, "Clusters", full_filtered_data
)
m365_card = create_provider_card(
"m365", m365_provider_logo, "Accounts", full_filtered_data
)
# Subscribe to Prowler Cloud card
subscribe_card = [
@@ -1492,8 +1445,6 @@ def filter_data(
severity_values,
severity_filter_options,
service_values,
provider_values,
provider_filter_options,
service_filter_options,
table_row_values,
table_row_options,
-122
View File
@@ -1,122 +0,0 @@
# AWS Provider
In this page you can find all the details about [Amazon Web Services (AWS)](https://aws.amazon.com/) provider implementation in Prowler.
By default, Prowler will audit just one account and organization settings per scan. To configure it, follow the [getting started](../index.md#aws) page.
## AWS Provider Classes Architecture
The AWS provider implementation follows the general [Provider structure](./provider.md). This section focuses on the AWS-specific implementation, highlighting how the generic provider concepts are realized for AWS in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md). In next subsection you can find a list of the main classes of the AWS provider.
### `AwsProvider` (Main Class)
- **Location:** [`prowler/providers/aws/aws_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/aws_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for AWS-specific logic, session management, credential validation, role assumption, region and organization discovery, and configuration.
- **Key AWS Responsibilities:**
- Initializes and manages AWS sessions (with or without role assumption, MFA, etc.).
- Validates credentials and sets up the AWS identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Discovers enabled AWS regions and organization metadata.
- Provides properties and methods for downstream AWS service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/aws/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/models.py)
- **Purpose:** Define structured data for AWS identity, session, credentials, organization info, and more.
- **Key AWS Models:**
- `AWSOrganizationsInfo`: Holds AWS Organizations metadata, to be used by the checks.
- `AWSCredentials`, `AWSAssumeRoleInfo`, `AWSAssumeRoleConfiguration`: Used for role assumption and session management.
- `AWSIdentityInfo`: Stores account, user, partition, and region context for the scan.
- `AWSSession`: Wraps the current and original [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) sessions and config.
### `AWSService` (Service Base Class)
- **Location:** [`prowler/providers/aws/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/lib/service/service.py)
- **Purpose:** Abstract base class that all AWS service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for AWS.
- **Key AWS Responsibilities:**
- Receives an `AwsProvider` instance to access session, identity, and configuration.
- Manages clients for all services by regions.
- Provides `__threading_call__` method to make boto3 calls in parallel. By default, this calls are made by region, but it can be overridden with the first parameter of the method and use by resource.
- Exposes common audit context (`audited_account`, `audited_account_arn`, `audited_partition`, `audited_resources`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/aws/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for AWS-specific error handling, such as credential and role errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/aws/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/lib/)
- **Purpose:** Helpers for session setup, ARN parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in AWS Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the right now implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/aws/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/aws/services)
- In the [Prowler Hub](https://hub.prowler.com/). For a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other services already implemented as reference. In next subsection you can find a list of common patterns that are used accross all AWS services.
### AWS Service Common Patterns
- Services communicate with AWS using boto3, you can find the documentation with all the services [here](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/index.html).
- Every AWS service class inherits from `AWSService`, ensuring access to session, identity, configuration, and threading utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service name and provider (e.g. `super().__init__(__class__.__name__, provider))`). Ensure that the service name in boto3 is the same that you use in the constructor. Usually is used the `__class__.__name__` to get the service name because it is the same as the class name.
- Resource containers **must** be initialized in the constructor. They should be dictionaries, with the key being the resource ARN or equivalent unique identifier and the value being the resource object.
- Resource discovery and attribute collection are parallelized using `self.__threading_call__`, typically by region or resource, for performance. The first parameter of the method is the iterator, if not provided, it will be the region; but if present indicate an array of the resources to be processed.
- Resource filtering is consistently enforced using `self.audit_resources` attribute and `is_resource_filtered` function, it is used to see if user has provided some resource that is not in the audit scope, so we can skip it in the service logic. Normally it is used befor storing the resource in the service container as follows: `if not self.audit_resources or (is_resource_filtered(resource["arn"], self.audit_resources)):`.
- All AWS resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- AWS API calls are wrapped in try/except blocks, with specific handling for `ClientError` and generic exceptions, always logging errors.
- If ARN is not present for some resource, it can be constructed using string interpolation, always including partition, service, region, account, and resource ID.
- Tags and additional attributes that cannot be retrieved from the default call, should be collected and stored for each resource using dedicated methods and threading using the resource object list as iterator.
## Specific Patterns in AWS Checks
The AWS checks pattern is described in [checks page](./checks.md). You can find all the right now implemented checks:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/aws/services/s3/s3_bucket_acl_prohibited/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/aws/services/s3/s3_bucket_acl_prohibited))
- In the [Prowler Hub](https://hub.prowler.com/). For a more human-readable view.
The best reference to understand how to implement a new check is following the [check creation documentation](./checks.md#creating-a-check) and taking other similar checks as reference.
### Check Report Class
The `Check_Report_AWS` class models a single finding for an AWS resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_AWS` extends the base report structure with AWS-specific fields, enabling detailed tracking of the resource, ARN, and region associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_AWS`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its AWS-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses `resource.id` if present.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if none are available.
- **`resource_arn`**:
- Uses `resource.arn` if present.
- Defaults to an empty string if ARN is not present in the resource object.
- **`region`**:
- Uses `resource.region` if present.
- Defaults to an empty string if region is not present in the resource object.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from that ones you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_AWS(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
-121
View File
@@ -1,121 +0,0 @@
# Azure Provider
In this page you can find all the details about [Microsoft Azure](https://azure.microsoft.com/) provider implementation in Prowler.
By default, Prowler will audit all the subscriptions that it is able to list in the Microsoft Entra tenant, and tenant Entra ID service. To configure it, follow the [getting started](../index.md#azure) page.
## Azure Provider Classes Architecture
The Azure provider implementation follows the general [Provider structure](./provider.md). This section focuses on the Azure-specific implementation, highlighting how the generic provider concepts are realized for Azure in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md). In next subsection you can find a list of the main classes of the Azure provider.
### `AzureProvider` (Main Class)
- **Location:** [`prowler/providers/azure/azure_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/azure_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for Azure-specific logic, session management, credential validation, and configuration.
- **Key Azure Responsibilities:**
- Initializes and manages Azure sessions (supports Service Principal, CLI, Browser, and Managed Identity authentication).
- Validates credentials and sets up the Azure identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Retrieves subscription(s) metadata.
- Provides properties and methods for downstream Azure service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/azure/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/models.py)
- **Purpose:** Define structured data for Azure identity, session, region configuration, and subscription info.
- **Key Azure Models:**
- `AzureIdentityInfo`: Holds Azure identity metadata, including tenant ID, domain, subscription names and IDs, and locations.
- `AzureRegionConfig`: Stores the specific region that will be audited. That can be: Global, US Government or China.
- `AzureSubscription`: Represents a subscription with ID, display name, and state.
### `AzureService` (Service Base Class)
- **Location:** [`prowler/providers/azure/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/lib/service/service.py)
- **Purpose:** Abstract base class that all Azure service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for Azure.
- **Key Azure Responsibilities:**
- Receives an `AzureProvider` instance to access session, identity, and configuration.
- Manages clients for all services by subscription.
- Exposes common audit context (`subscriptions`, `locations`, `audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/azure/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for Azure-specific error handling, such as credential, region, and session errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/azure/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/lib/)
- **Purpose:** Helpers for argument parsing, region setup, mutelist management, and other cross-cutting concerns.
## Specific Patterns in Azure Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/azure/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/azure/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other services already implemented as reference. In next subsection you can find a list of common patterns that are used accross all Azure services.
### Azure Service Common Patterns
- Services communicate with Azure using the Azure Python SDK, mainly using the Azure Management Client (except for the Microsoft Entra ID service, that is using the Microsoft Graph API), you can find the documentation with all the management services [here](https://learn.microsoft.com/en-us/python/api/overview/azure/?view=azure-python).
- Every Azure service class inherits from `AzureService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service Azure Management Client and Prowler provider object (e.g `super().__init__(WebSiteManagementClient, provider)`).
- Resource containers **must** be initialized in the constructor, and they should be dictionaries, with the key being the subscription ID, the value being a dictionary with the resource ID as key and the resource object as value.
- All Azure resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes. Some are represented as dataclasses due to legacy reasons, but new resources should be represented as Pydantic `BaseModel` classes.
- Azure SDK functions are wrapped in try/except blocks, with specific handling for errors, always logging errors. It is a best practice to create a custom function for every Azure SDK call, in that way we can handle the errors in a more specific way.
## Specific Patterns in Azure Checks
The Azure checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/azure/services/storage/storage_blob_public_access_level_is_disabled/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/azure/services/storage/storage_blob_public_access_level_is_disabled))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is the [Azure check implementation documentation](./checks.md#creating-a-check) and taking other similar checks as reference.
### Check Report Class
The `Check_Report_Azure` class models a single finding for an Azure resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_Azure` extends the base report structure with Azure-specific fields, enabling detailed tracking of the resource, resource ID, name, subscription, and location associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_Azure`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its Azure-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses `resource.id` if present.
- Otherwise, uses `resource.resource_id` if present.
- Defaults to an empty string if not available.
- **`resource_name`**:
- Uses `resource.name` if present.
- Otherwise, uses `resource.resource_name` if present.
- Defaults to an empty string if not available.
- **`subscription`**:
- Defaults to an empty string, it **must** be set in the check logic.
- **`location`**:
- Uses `resource.location` if present.
- Defaults to an empty string if not available.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_Azure(
metadata=check_metadata,
resource=resource_object
)
report.subscription = subscription_id
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
+278 -222
View File
@@ -1,314 +1,370 @@
# Prowler Checks
# Create a new Check for a Provider
This guide explains how to create new checks in Prowler.
Here you can find how to create new checks for Prowler.
**To create a check is required to have a Prowler provider service already created, so if the service is not present or the attribute you want to audit is not retrieved by the service, please refer to the [Service](./services.md) documentation.**
## Introduction
Checks are the core component of Prowler. A check is a piece of code designed to validate whether a configuration aligns with cybersecurity best practices. Execution of a check yields a finding, which includes the result and contextual metadata (e.g., outcome, risks, remediation).
The checks are the fundamental piece of Prowler. A check is a simply piece of code that ensures if something is configured against cybersecurity best practices. Then the check generates a finding with the result and includes the check's metadata to give the user more contextual information about the result, the risk and how to remediate it.
### Creating a Check
To create a new check for a supported Prowler provider, you will need to create a folder with the check name inside the specific service for the selected provider.
To create a new check:
- Prerequisites: A Prowler provider and service must exist. Verify support and check for pre-existing checks via [Prowler Hub](https://hub.prowler.com). If the provider or service is not present, please refer to the [Provider](./provider.md) and [Service](./services.md) documentation for creation instructions.
- Navigate to the service directory. The path should be as follows: `prowler/providers/<provider>/services/<service>`.
- Create a check-specific folder. The path should follow this pattern: `prowler/providers/<provider>/services/<service>/<check_name>`. Adhere to the [Naming Format for Checks](#naming-format-for-checks).
- Populate the folder with files as specified in [File Creation](#file-creation).
### Naming Format for Checks
Checks must be named following the format: `service_subservice_resource_action`.
The name components are:
- `service` The main service being audited (e.g., ec2, entra, iam, etc.)
- `subservice` An individual component or subset of functionality within the service that is being audited. This may correspond to a shortened version of the class attribute accessed within the check. If there is no subservice, just omit.
- `resource` The specific resource type being evaluated (e.g., instance, policy, role, etc.)
- `action` The security aspect or configuration being checked (e.g., public, encrypted, enabled, etc.)
### File Creation
Each check in Prowler follows a straightforward structure. Within the newly created folder, three files must be added to implement the check logic:
- `__init__.py` (empty file) Ensures Python treats the check folder as a package.
- `<check_name>.py` (code file) Contains the check logic, following the prescribed format. Please refer to the [prowler's check code structure](./checks.md#prowlers-check-code-structure) for more information.
- `<check_name>.metadata.json` (metadata file) Defines the check's metadata for contextual information. Please refer to the [check metadata](./checks.md#) for more information.
## Prowler's Check Code Structure
Prowler's check structure is designed for clarity and maintainability. It follows a dynamic loading approach based on predefined paths, ensuring seamless integration of new checks into a provider's service without additional manual steps.
Below the code for a generic check is presented. It is strongly recommended to consult other checks from the same provider and service to understand provider-specific details and patterns. This will help ensure consistency and proper implementation of provider-specific requirements.
Report fields are the most dependent on the provider, consult the `CheckReport<Provider>` class for more information on what can be included in the report [here](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py).
We are going to use the `ec2_ami_public` check from the `AWS` provider as an example. So the folder name will be `prowler/providers/aws/services/ec2/ec2_ami_public` (following the format `prowler/providers/<provider>/services/<service>/<check_name>`), with the name of check following the pattern: `service_subservice_resource_action`.
???+ note
Legacy providers (AWS, Azure, GCP, Kubernetes) follow the `Check_Report_<Provider>` naming convention. This is not recommended for current instances. Newer providers adopt the `CheckReport<Provider>` naming convention. Learn more at [Prowler Code](https://github.com/prowler-cloud/prowler/tree/master/prowler/lib/check/models.py).
A subservice is an specific component of a service that is gonna be audited. Sometimes it could be the shortened name of the class attribute that is gonna be accessed in the check.
```python title="Generic Check Class"
# Required Imports
# Import the base Check class and the provider-specific CheckReport class
from prowler.lib.check.models import Check, CheckReport<Provider>
# Import the provider service client
from prowler.providers.<provider>.services.<service>.<service>_client import <service>_client
Inside that folder, we need to create three files:
# Defining the Check Class
# Each check must be implemented as a Python class with the same name as its corresponding file.
# The class must inherit from the Check base class.
class <check_name>(Check):
"""Short description of what is being checked"""
- An empty `__init__.py`: to make Python treat this check folder as a package.
- A `check_name.py` with the above format containing the check's logic. Refer to the [check](./checks.md#check)
- A `check_name.metadata.json` containing the check's metadata. Refer to the [check metadata](./checks.md#check-metadata)
## Check
The Prowler's check structure is very simple and following it there is nothing more to do to include a check in a provider's service because the load is done dynamically based on the paths.
The following is the code for the `ec2_ami_public` check:
```python title="Check Class"
# At the top of the file we need to import the following:
# - Check class which is in charge of the following:
# - Retrieve the check metadata and expose the `metadata()`
# to return a JSON representation of the metadata,
# read more at Check Metadata Model down below.
# - Enforce that each check requires to have the `execute()` function
from prowler.lib.check.models import Check, Check_Report_AWS
# Then you have to import the provider service client
# read more at the Service documentation.
from prowler.providers.aws.services.ec2.ec2_client import ec2_client
# For each check we need to create a python class called the same as the
# file which inherits from the Check class.
class ec2_ami_public(Check):
"""ec2_ami_public verifies if an EC2 AMI is publicly shared"""
# Then, within the check's class we need to create the "execute(self)"
# function, which is enforce by the "Check" class to implement
# the Check's interface and let Prowler to run this check.
def execute(self):
"""Execute <check short description>
Returns:
List[CheckReport<Provider>]: A list of reports containing the result of the check.
"""
# Inside the execute(self) function we need to create
# the list of findings initialised to an empty list []
findings = []
# Iterate over the target resources using the provider service client
for resource in <service>_client.<resources>:
# Initialize the provider-specific report class, passing metadata and resource
report = Check_Report_<Provider>(metadata=self.metadata(), resource=resource)
# Set required fields and implement check logic
# Then, using the service client we need to iterate by the resource we
# want to check, in this case EC2 AMIs stored in the
# "ec2_client.images" object.
for image in ec2_client.images:
# Once iterating for the images, we have to intialise
# the Check_Report_AWS class passing the check's metadata
# using the "metadata" function explained above.
report = Check_Report_AWS(self.metadata())
# For each Prowler check we MUST fill the following
# Check_Report_AWS fields:
# - region
# - resource_id
# - resource_arn
# - resource_tags
# - status
# - status_extended
report.region = image.region
report.resource_id = image.id
report.resource_arn = image.arn
# The resource_tags should be filled if the resource has the ability
# of having tags, please check the service first.
report.resource_tags = image.tags
# Then we need to create the business logic for the check
# which always should be simple because the Prowler service
# must do the heavy lifting and the check should be in charge
# of parsing the data provided
report.status = "PASS"
report.status_extended = f"<Description about why the resource is compliant>"
# If some of the information needed for the report is not inside the resource, it can be set it manually here.
# This depends on the provider and the resource that is being audited.
# report.region = resource.region
# report.resource_tags = getattr(resource, "tags", [])
# ...
# Example check logic (replace with actual logic):
if <non_compliant_condition>:
report.status_extended = f"EC2 AMI {image.id} is not public."
# In this example each "image" object has a boolean attribute
# called "public" to set if the AMI is publicly shared
if image.public:
report.status = "FAIL"
report.status_extended = f"<Description about why the resource is not compliant>"
report.status_extended = (
f"EC2 AMI {image.id} is currently public."
)
# Then at the same level as the "report"
# object we need to append it to the findings list.
findings.append(report)
# Last thing to do is to return the findings list to Prowler
return findings
```
### Data Requirements for Checks in Prowler
### Check Status
One of the most important aspects when creating a new check is ensuring that all required data is available from the service client. Often, default API calls are insufficient. Extending the service class with new methods or resource attributes may be required to fetch and store requisite data.
All the checks MUST fill the `report.status` and `report.status_extended` with the following criteria:
### Statuses for Checks in Prowler
- Status -- `report.status`
- `PASS` --> If the check is passing against the configured value.
- `FAIL` --> If the check is failing against the configured value.
- `MANUAL` --> This value cannot be used unless a manual operation is required in order to determine if the `report.status` is whether `PASS` or `FAIL`.
- Status Extended -- `report.status_extended`
- MUST end in a dot `.`
- MUST include the service audited with the resource and a brief explanation of the result generated, e.g.: `EC2 AMI ami-0123456789 is not public.`
Required Fields: status and status\_extended
### Check Region
Each check **must** populate the `report.status` and `report.status_extended` fields according to the following criteria:
All the checks MUST fill the `report.region` with the following criteria:
- Status field: `report.status`
- `PASS` Assigned when the check confirms compliance with the configured value.
- `FAIL` Assigned when the check detects non-compliance with the configured value.
- `MANUAL` This status must not be used unless manual verification is necessary to determine whether the status (`report.status`) passes (`PASS`) or fails (`FAIL`).
- If the audited resource is regional use the `region` (the name changes depending on the provider: `location` in Azure and GCP and `namespace` in K8s) attribute within the resource object.
- If the audited resource is global use the `service_client.region` within the service client object.
- Status extended field: `report.status_extended`
- It **must** end with a period (`.`).
- It **must** include the audited service, the resource, and a concise explanation of the check result, for instance: `EC2 AMI ami-0123456789 is not public.`.
### Check Severity
### Prowler's Check Severity Levels
The severity of the checks are defined in the metadata file with the `Severity` field. The severity is always in lowercase and can be one of the following values:
The severity of each check is defined in the metadata file using the `Severity` field. Severity values are always lowercase and must be one of the predefined categories below.
- `critical`
- `high`
- `medium`
- `low`
- `informational`
- `critical` Issue that must be addressed immediately.
- `high` Issue that should be addressed as soon as possible.
- `medium` Issue that should be addressed within a reasonable timeframe.
- `low` Issue that can be addressed in the future.
- `informational` Not an issue but provides valuable information.
If the check involves multiple scenarios that may alter its severity, adjustments can be made dynamically within the check's logic using the severity `report.check_metadata.Severity` attribute:
You may need to change it in the check's code if the check has different scenarios that could change the severity. This can be done by using the `report.check_metadata.Severity` attribute:
```python
if <generic_condition_1>:
if <valid for more than 6 months>:
report.status = "PASS"
report.check_metadata.Severity = "informational"
report.status_extended = f"<Resource> is compliant with <requirement>."
elif <generic_condition_2>:
report.status = "FAIL"
report.status_extended = f"RDS Instance {db_instance.id} certificate has over 6 months of validity left."
elif <valid for more than 3 months>:
report.status = "PASS"
report.check_metadata.Severity = "low"
report.status_extended = f"<Resource> is not compliant with <requirement>: <reason>."
elif <generic_condition_3>:
report.status_extended = f"RDS Instance {db_instance.id} certificate has between 3 and 6 months of validity."
elif <valid for more than 1 month>:
report.status = "FAIL"
report.check_metadata.Severity = "medium"
report.status_extended = f"<Resource> is not compliant with <requirement>: <reason>."
elif <generic_condition_4>:
report.status_extended = f"RDS Instance {db_instance.id} certificate less than 3 months of validity."
elif <valid for less than 1 month>:
report.status = "FAIL"
report.check_metadata.Severity = "high"
report.status_extended = f"<Resource> is not compliant with <requirement>: <reason>."
report.status_extended = f"RDS Instance {db_instance.id} certificate less than 1 month of validity."
else:
report.status = "FAIL"
report.check_metadata.Severity = "critical"
report.status_extended = f"<Resource> is not compliant with <requirement>: <critical reason>."
report.status_extended = (
f"RDS Instance {db_instance.id} certificate has expired."
)
```
### Resource Identification in Prowler
Each check **must** populate the report with an unique identifier for the audited resource. This identifier or identifiers are going to depend on the provider and the resource that is being audited. Here are the criteria for each provider:
### Resource ID, Name and ARN
All the checks MUST fill the `report.resource_id` and `report.resource_arn` with the following criteria:
- AWS
- Amazon Resource ID — `report.resource_id`.
- The resource identifier. This is the name of the resource, the ID of the resource, or a resource path. Some resource identifiers include a parent resource (sub-resource-type/parent-resource/sub-resource) or a qualifier such as a version (resource-type:resource-name:qualifier).
- If the resource ID cannot be retrieved directly from the audited resource, it can be extracted from the ARN. It is the last part of the ARN after the last slash (`/`) or colon (`:`).
- If no actual resource to audit exists, this format can be used: `<resource_type>/unknown`
- Amazon Resource Name — `report.resource_arn`.
- The [Amazon Resource Name (ARN)](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html) of the audited entity.
- If the ARN cannot be retrieved directly from the audited resource, construct a valid ARN using the `resource_id` component as the audited entity. Examples:
- Bedrock — `arn:<partition>:bedrock:<region>:<account-id>:model-invocation-logging`.
- DirectConnect — `arn:<partition>:directconnect:<region>:<account-id>:dxcon`.
- If no actual resource to audit exists, this format can be used: `arn:<partition>:<service>:<region>:<account-id>:<resource_type>/unknown`.
- Resouce ID and resource ARN:
- If the resource audited is the AWS account:
- `resource_id` -> AWS Account Number
- `resource_arn` -> AWS Account Root ARN
- If we cant get the ARN from the resource audited, we create a valid ARN with the `resource_id` part as the resource audited. Examples:
- Bedrock -> `arn:<partition>:bedrock:<region>:<account-id>:model-invocation-logging`
- DirectConnect -> `arn:<partition>:directconnect:<region>:<account-id>:dxcon`
- If there is no real resource to audit we do the following:
- resource_id -> `resource_type/unknown`
- resource_arn -> `arn:<partition>:<service>:<region>:<account-id>:<resource_type>/unknown`
- Examples:
- AWS Security Hub `arn:<partition>:security-hub:<region>:<account-id>:hub/unknown`.
- Access Analyzer `arn:<partition>:access-analyzer:<region>:<account-id>:analyzer/unknown`.
- GuardDuty `arn:<partition>:guardduty:<region>:<account-id>:detector/unknown`.
- AWS Security Hub -> `arn:<partition>:security-hub:<region>:<account-id>:hub/unknown`
- Access Analyzer -> `arn:<partition>:access-analyzer:<region>:<account-id>:analyzer/unknown`
- GuardDuty -> `arn:<partition>:guardduty:<region>:<account-id>:detector/unknown`
- GCP
- Resource ID — `report.resource_id`.
- Resource ID represents the full, [unambiguous path to a resource](https://google.aip.dev/122#full-resource-names), known as the full resource name. Typically, it follows the format: `//{api_service/resource_path}`.
- If the resource ID cannot be retrieved directly from the audited resource, by default the resource name is used.
- Resource Name — `report.resource_name`.
- Resource Name usually refers to the name of a resource within its service.
- Resource ID -- `report.resource_id`
- GCP Resource --> Resource ID
- Resource Name -- `report.resource_name`
- GCP Resource --> Resource Name
- Azure
- Resource ID -- `report.resource_id`
- Azure Resource --> Resource ID
- Resource Name -- `report.resource_name`
- Azure Resource --> Resource Name
- Resource ID — `report.resource_id`.
- Resource ID represents the full Azure Resource Manager path to a resource, which follows the format: `/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/{resourceProviderNamespace}/{resourceType}/{resourceName}`.
- Resource Name — `report.resource_name`.
- Resource Name usually refers to the name of a resource within its service.
- If the [resource name](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules) cannot be retrieved directly from the audited resource, the last part of the resource ID can be used.
### Python Model
The following is the Python model for the check's class.
- Kubernetes
As per April 11th 2024 the `Check_Metadata_Model` can be found [here](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py#L36-L82).
- Resource ID — `report.resource_id`.
- The UID of the Kubernetes object. This is a system-generated string that uniquely identifies the object within the cluster for its entire lifetime. See [Kubernetes Object Names and IDs - UIDs](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids).
- Resource Name — `report.resource_name`.
- The name of the Kubernetes object. This is a client-provided string that must be unique for the resource type within a namespace (for namespaced resources) or cluster (for cluster-scoped resources). Names typically follow DNS subdomain or label conventions. See [Kubernetes Object Names and IDs - Names](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names).
```python
class Check(ABC, Check_Metadata_Model):
"""Prowler Check"""
- M365
def __init__(self, **data):
"""Check's init function. Calls the CheckMetadataModel init."""
# Parse the Check's metadata file
metadata_file = (
os.path.abspath(sys.modules[self.__module__].__file__)[:-3]
+ ".metadata.json"
)
# Store it to validate them with Pydantic
data = Check_Metadata_Model.parse_file(metadata_file).dict()
# Calls parents init function
super().__init__(**data)
- Resource ID — `report.resource_id`.
- If the audited resource has a globally unique identifier such as a `guid`, use it as the `resource_id`.
- If no `guid` exists, use another unique and relevant identifier for the resource, such as the tenant domain, the internal policy ID, or a representative string following the format `<resource_type>/<name_or_id>`.
- Resource Name — `report.resource_name`.
- Use the visible or descriptive name of the audited resource. If no explicit name is available, use a clear description of the resource or configuration being evaluated.
- Examples:
- For an organization:
- `resource_id`: Organization GUID
- `resource_name`: Organization name
- For a policy:
- `resource_id`: Unique policy ID
- `resource_name`: Policy display name
- For global configurations:
- `resource_id`: Tenant domain or representative string (e.g., "userSettings")
- `resource_name`: Description of the configuration (e.g., "SharePoint Settings")
def metadata(self) -> dict:
"""Return the JSON representation of the check's metadata"""
return self.json()
- GitHub
@abstractmethod
def execute(self):
"""Execute the check's logic"""
```
- Resource ID — `report.resource_id`.
- The ID of the Github resource. This is a system-generated integer that uniquely identifies the resource within the Github platform.
- Resource Name — `report.resource_name`.
- The name of the Github resource. In the case of a repository, this is just the repository name. For full repository names use the resource `full_name`.
### Using the audit config
### Configurable Checks in Prowler
Prowler has a [configuration file](../tutorials/configuration_file.md) which is used to pass certain configuration values to the checks, like the following:
See [Configurable Checks](./configurable-checks.md) for detailed information on making checks configurable using the `audit_config` object and configuration file.
```python title="ec2_securitygroup_with_many_ingress_egress_rules.py"
class ec2_securitygroup_with_many_ingress_egress_rules(Check):
def execute(self):
findings = []
## Metadata Structure for Prowler Checks
# max_security_group_rules, default: 50
max_security_group_rules = ec2_client.audit_config.get(
"max_security_group_rules", 50
)
for security_group_arn, security_group in ec2_client.security_groups.items():
```
Each Prowler check must include a metadata file named `<check_name>.metadata.json` that must be located in its directory. This file supplies crucial information for execution, reporting, and context.
```yaml title="config.yaml"
# AWS Configuration
aws:
# AWS EC2 Configuration
### Example Metadata File
# aws.ec2_securitygroup_with_many_ingress_egress_rules
# The default value is 50 rules
max_security_group_rules: 50
```
Below is a generic example of a check metadata file. **Do not include comments in actual JSON files.**
As you can see in the above code, within the service client, in this case the `ec2_client`, there is an object called `audit_config` which is a Python dictionary containing the values read from the configuration file.
In order to use it, you have to check first if the value is present in the configuration file. If the value is not present, you can create it in the `config.yaml` file and then, read it from the check.
???+ note
It is mandatory to always use the `dictionary.get(value, default)` syntax to set a default value in the case the configuration value is not present.
## Check Metadata
Each Prowler check has metadata associated which is stored at the same level of the check's folder in a file called A `check_name.metadata.json` containing the check's metadata.
???+ note
We are going to include comments in this example metadata JSON but they cannot be included because the JSON format does not allow comments.
```json
{
# Provider holds the Prowler provider which the checks belongs to
"Provider": "aws",
"CheckID": "example_check_id",
"CheckTitle": "Example Check Title",
"CheckType": ["Infrastructure Security"],
# CheckID holds check name
"CheckID": "ec2_ami_public",
# CheckTitle holds the title of the check
"CheckTitle": "Ensure there are no EC2 AMIs set as Public.",
# CheckType holds Software and Configuration Checks, check more here
# https://docs.aws.amazon.com/securityhub/latest/userguide/asff-required-attributes.html#Types
"CheckType": [
"Infrastructure Security"
],
# ServiceName holds the provider service name
"ServiceName": "ec2",
# SubServiceName holds the service's subservice or resource used by the check
"SubServiceName": "ami",
# ResourceIdTemplate holds the unique ID for the resource used by the check
"ResourceIdTemplate": "arn:partition:service:region:account-id:resource-id",
# Severity holds the check's severity, always in lowercase (critical, high, medium, low or informational)
"Severity": "critical",
# ResourceType only for AWS, holds the type from here
# https://docs.aws.amazon.com/securityhub/latest/userguide/asff-resources.html
# In case of not existing, use CloudFormation type but removing the "::" and using capital letters only at the beginning of each word. Example: "AWS::EC2::Instance" -> "AwsEc2Instance"
# CloudFormation type reference: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html
# If the resource type does not exist in the CloudFormation types, use "Other".
"ResourceType": "Other",
"Description": "Example description of the check.",
"Risk": "Example risk if the check fails.",
"RelatedUrl": "https://example.com",
# Description holds the title of the check, for now is the same as CheckTitle
"Description": "Ensure there are no EC2 AMIs set as Public.",
# Risk holds the check risk if the result is FAIL
"Risk": "When your AMIs are publicly accessible, they are available in the Community AMIs where everyone with an AWS account can use them to launch EC2 instances. Your AMIs could contain snapshots of your applications (including their data), therefore exposing your snapshots in this manner is not advised.",
# RelatedUrl holds an URL with more information about the check purpose
"RelatedUrl": "",
# Remediation holds the information to help the practitioner to fix the issue in the case of the check raise a FAIL
"Remediation": {
# Code holds different methods to remediate the FAIL finding
"Code": {
"CLI": "example CLI command",
# CLI holds the command in the provider native CLI to remediate it
"CLI": "aws ec2 modify-image-attribute --region <REGION> --image-id <EC2_AMI_ID> --launch-permission {\"Remove\":[{\"Group\":\"all\"}]}",
# NativeIaC holds the native IaC code to remediate it, use "https://docs.bridgecrew.io/docs"
"NativeIaC": "",
"Other": "",
# Other holds the other commands, scripts or code to remediate it, use "https://www.trendmicro.com/cloudoneconformity"
"Other": "https://docs.prowler.com/checks/public_8#aws-console",
# Terraform holds the Terraform code to remediate it, use "https://docs.bridgecrew.io/docs"
"Terraform": ""
},
# Recommendation holds the recommendation for this check with a description and a related URL
"Recommendation": {
"Text": "Example recommendation text.",
"Url": "https://example.com/remediation"
"Text": "We recommend your EC2 AMIs are not publicly accessible, or generally available in the Community AMIs.",
"Url": "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/cancel-sharing-an-AMI.html"
}
},
"Categories": ["example-category"],
# Categories holds the category or categories where the check can be included, if applied
"Categories": [
"internet-exposed"
],
# DependsOn is not actively used for the moment but it will hold other
# checks wich this check is dependant to
"DependsOn": [],
# RelatedTo is not actively used for the moment but it will hold other
# checks wich this check is related to
"RelatedTo": [],
# Notes holds additional information not covered in this file
"Notes": ""
}
```
### Metadata Fields and Their Purpose
### Remediation Code
- **Provider** — The Prowler provider related to the check. The name **must** be lowercase and match the provider folder name. For supported providers refer to [Prowler Hub](https://hub.prowler.com/check) or directly to [Prowler Code](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
For the Remediation Code we use the following knowledge base to fill it:
- **CheckID** — The unique identifier for the check inside the provider, this field **must** match the check's folder and python file and json metadata file name. For more information about the naming refer to the [Naming Format for Checks](#naming-format-for-checks) section.
- Official documentation for the provider
- https://docs.prowler.com/checks/checks-index
- https://www.trendmicro.com/cloudoneconformity
- https://github.com/cloudmatos/matos/tree/master/remediations
- **CheckTitle** — A concise, descriptive title for the check.
### RelatedURL and Recommendation
- **CheckType***For now this field is only standardized for the AWS provider*.
- For AWS this field must follow the [AWS Security Hub Types](https://docs.aws.amazon.com/securityhub/latest/userguide/asff-required-attributes.html#Types) format. So the common pattern to follow is `namespace/category/classifier`, refer to the attached documentation for the valid values for this fields.
The RelatedURL field must be filled with an URL from the provider's official documentation like https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/sharingamis-intro.html
- **ServiceName** — The name of the provider service being audited. This field **must** be in lowercase and match with the service folder name. For supported services refer to [Prowler Hub](https://hub.prowler.com/check) or directly to [Prowler Code](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
Also, if not present you can use the Risk and Recommendation texts from the TrendMicro [CloudConformity](https://www.trendmicro.com/cloudoneconformity) guide.
- **SubServiceName** — The subservice or resource within the service, if applicable. For more information refer to the [Naming Format for Checks](#naming-format-for-checks) section.
- **ResourceIdTemplate** — A template for the unique resource identifier. For more information refer to the [Prowler's Resource Identification](#prowlers-resource-identification) section.
### Python Model
The following is the Python model for the check's metadata model. We use the Pydantic's [BaseModel](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel) as the parent class.
- **Severity** — The severity of the finding if the check fails. Must be one of: `critical`, `high`, `medium`, `low`, or `informational`, this field **must** be in lowercase. To get more information about the severity levels refer to the [Prowler's Check Severity Levels](#prowlers-check-severity-levels) section.
As per August 5th 2023 the `Check_Metadata_Model` can be found [here](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py#L34-L56).
```python
class Check_Metadata_Model(BaseModel):
"""Check Metadata Model"""
- **ResourceType** — The type of resource being audited. *For now this field is only standardized for the AWS provider*.
- For AWS use the [Security Hub resource types](https://docs.aws.amazon.com/securityhub/latest/userguide/asff-resources.html) or, if not available, the PascalCase version of the [CloudFormation type](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html) (e.g., `AwsEc2Instance`). Use "Other" if no match exists.
- **Description** — A short description of what the check does.
- **Risk** — The risk or impact if the check fails, explaining why the finding matters.
- **RelatedUrl** — A URL to official documentation or further reading about the check's purpose. If no official documentation is available, use the risk and recommendation text from trusted third-party sources.
- **Remediation** — Guidance for fixing a failed check, including:
- **Code** — Remediation commands or code snippets for CLI, Terraform, native IaC, or other tools like the Web Console.
- **Recommendation** — A textual human readable recommendation. Here it is not necessary to include actual steps, but rather a general recommendation about what to do to fix the check.
- **Categories** — One or more categories for grouping checks in execution (e.g., `internet-exposed`). For the current list of categories, refer to the [Prowler Hub](https://hub.prowler.com/check).
- **DependsOn** — Currently not used.
- **RelatedTo** — Currently not used.
- **Notes** — Any additional information not covered by other fields.
### Remediation Code Guidelines
When providing remediation steps, reference the following sources:
- Official provider documentation.
- [Prowler Checks Remediation Index](https://docs.prowler.com/checks/checks-index)
- [TrendMicro Cloud One Conformity](https://www.trendmicro.com/cloudoneconformity)
- [CloudMatos Remediation Repository](https://github.com/cloudmatos/matos/tree/master/remediations)
### Python Model Reference
The metadata structure is enforced in code using a Pydantic model. For reference, see the [`CheckMetadata`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py).
Provider: str
CheckID: str
CheckTitle: str
CheckType: list[str]
ServiceName: str
SubServiceName: str
ResourceIdTemplate: str
Severity: str
ResourceType: str
Description: str
Risk: str
RelatedUrl: str
Remediation: Remediation
Categories: list[str]
DependsOn: list[str]
RelatedTo: list[str]
Notes: str
# We set the compliance to None to
# store the compliance later if supplied
Compliance: list = None
```
@@ -1,46 +0,0 @@
# Configurable Checks in Prowler
Prowler empowers users to extend and adapt cloud security coverage by making checks configurable through the use of the `audit_config` object. This approach enables customization of checks to meet specific requirements through a configuration file.
## Understanding the `audit_config` Object
The `audit_config` object is a dictionary attached to each provider's service client (for example, `<service_name>_client.audit_config`). This object loads configuration values from the main configuration file (`prowler/config/config.yaml`). Use `audit_config` to make checks flexible and user-configurable.
## Using `audit_config` to Configure Checks
Retrieve configuration values in a check by using the `.get()` method on the `audit_config` object. For example, to get the minimum number of Availability Zones for Lambda from the configuration file, use the following code. If the value is not set in the configuration, the check defaults to 2:
```python
LAMBDA_MIN_AZS = awslambda_client.audit_config.get("lambda_min_azs", 2)
```
Always provide a default value in `.get()` to ensure the check works even if the configuration is missing the variable.
### Example: Security Group Rule Limit
```python title="ec2_securitygroup_with_many_ingress_egress_rules.py"
class ec2_securitygroup_with_many_ingress_egress_rules(Check):
def execute(self):
findings = []
max_security_group_rules = ec2_client.audit_config.get(
"max_security_group_rules", 50
)
for security_group_arn, security_group in ec2_client.security_groups.items():
# ... check logic ...
```
## Required File Updates for Configurable Variables
When adding a new configurable check to Prowler, update the following files:
- **Configuration File:** Add the new variable under the relevant provider or service section in `prowler/config/config.yaml`.
```yaml
# aws.awslambda_function_vpc_multi_az
lambda_min_azs: 2
```
- **Test Fixtures:** If tests depend on this configuration, add the variable to `tests/config/fixtures/config.yaml`.
- **Documentation:** Document the new variable in the list of configurable checks in `docs/tutorials/configuration_file.md`.
For a complete list of checks that already support configuration, see the [Configuration File Tutorial](../tutorials/configuration_file.md).
This approach ensures that checks are easily configurable, making Prowler highly adaptable to different environments and requirements.
+7 -9
View File
@@ -1,16 +1,14 @@
# Debugging in Prowler
# Debugging
Debugging in Prowler simplifies the development process, allowing developers to efficiently inspect and resolve unexpected issues during execution.
Debugging in Prowler make things easier!
If you are developing Prowler, it's possible that you will encounter some situations where you have to inspect the code in depth to fix some unexpected issues during the execution.
## Debugging with Visual Studio Code
## VSCode
Visual Studio Code (also referred to as VSCode) provides an integrated debugger for executing and analyzing Prowler code. Refer to the official VSCode debugger [documentation](https://code.visualstudio.com/docs/editor/debugging) for detailed instructions.
In VSCode you can run the code using the integrated debugger. Please, refer to this [documentation](https://code.visualstudio.com/docs/editor/debugging) for guidance about the debugger in VSCode.
The following file is an example of the [debugging configuration](https://code.visualstudio.com/docs/editor/debugging#_launch-configurations) file that you can add to [Virtual Studio Code](https://code.visualstudio.com/).
### Debugging Configuration Example
The following file is an example of a [debugging configuration](https://code.visualstudio.com/docs/editor/debugging#_launch-configurations) file for [Virtual Studio Code](https://code.visualstudio.com/).
This file must be placed inside the *.vscode* directory and named *launch.json*:
This file should inside the *.vscode* folder and its name has to be *launch.json*:
```json
{
+6 -26
View File
@@ -1,28 +1,8 @@
## Contributing to Documentation
## Contribute with documentation
Prowler documentation is built using `mkdocs`, allowing contributors to easily add or enhance documentation.
We use `mkdocs` to build this Prowler documentation site so you can easily contribute back with new docs or improving them. To install all necessary dependencies use `poetry install --with docs`.
### Installation and Setup
Install all necessary dependencies using: `poetry install --with docs`.
1. Install `mkdocs` using your preferred package manager.
2. Running the Documentation Locally
Navigate to the `prowler` repository folder.
Start the local documentation server by running: `mkdocs serve`.
Open `http://localhost:8000` in your browser to view live updates.
3. Making Documentation Changes
Make all needed changes to docs or add new documents. Edit existing Markdown (.md) files inside `prowler/docs`.
To add new sections or files, update the `mkdocs.yaml` file located in the root directory of Prowlers repository.
4. Submitting Changes
Once documentation updates are complete:
Submit a pull request for review.
The Prowler team will assess and merge contributions.
Your efforts help improve Prowler documentation—thank you for contributing!
1. Install `mkdocs` with your favorite package manager.
2. Inside the `prowler` repository folder run `mkdocs serve` and point your browser to `http://localhost:8000` and you will see live changes to your local copy of this documentation site.
3. Make all needed changes to docs or add new documents. To do so just edit existing md files inside `prowler/docs` and if you are adding a new section or file please make sure you add it to `mkdocs.yaml` file in the root folder of the Prowler repo.
4. Once you are done with changes, please send a pull request to us for review and merge. Thank you in advance!
-133
View File
@@ -1,133 +0,0 @@
# Google Cloud Provider
This page details the [Google Cloud Platform (GCP)](https://cloud.google.com/) provider implementation in Prowler.
By default, Prowler will audit all the GCP projects that the authenticated identity can access. To configure it, follow the [getting started](../index.md#google-cloud) page.
## GCP Provider Classes Architecture
The GCP provider implementation follows the general [Provider structure](./provider.md). This section focuses on the GCP-specific implementation, highlighting how the generic provider concepts are realized for GCP in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### Main Class
- **Location:** [`prowler/providers/gcp/gcp_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/gcp_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for GCP-specific logic, session management, credential validation, project and organization discovery, and configuration.
- **Key GCP Responsibilities:**
- Initializes and manages GCP sessions (supports Application Default Credentials, Service Account, OAuth, and impersonation).
- Validates credentials and sets up the GCP identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Discovers accessible GCP projects and organization metadata.
- Provides properties and methods for downstream GCP service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/gcp/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/models.py)
- **Purpose:** Define structured data for GCP identity, project, and organization info.
- **Key GCP Models:**
- `GCPIdentityInfo`: Holds GCP identity metadata, such as the profile name.
- `GCPOrganization`: Represents a GCP organization with ID, name, and display name.
- `GCPProject`: Represents a GCP project with number, ID, name, organization, labels, and lifecycle state.
### `GCPService` (Service Base Class)
- **Location:** [`prowler/providers/gcp/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/lib/service/service.py)
- **Purpose:** Abstract base class that all GCP service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for GCP.
- **Key GCP Responsibilities:**
- Receives a `GcpProvider` instance to access session, identity, and configuration.
- Manages clients for all services by project.
- Filters projects to only those with the relevant API enabled.
- Provides `__threading_call__` method to make API calls in parallel by project or resource.
- Exposes common audit context (`project_ids`, `projects`, `default_project_id`, `audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/gcp/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for GCP-specific error handling, such as credential, session, and project access errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/gcp/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/lib/)
- **Purpose:** Helpers for argument parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in GCP Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/gcp/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/gcp/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other services already implemented as reference. In next subsection you can find a list of common patterns that are used accross all GCP services.
### GCP Service Common Patterns
- Services communicate with GCP using the Google Cloud Python SDK, you can find the documentation with all the services [here](https://cloud.google.com/python/docs/reference).
- Every GCP service class inherits from `GCPService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service name, provider, region (default "global"), and API version (default "v1"). Usually, the service name is the class name in lowercase, so it is called like `super().__init__(__class__.__name__, provider)`.
- Resource containers **must** be initialized in the constructor, typically as dictionaries keyed by resource ID and the value is the resource object.
- Only projects with the API enabled are included in the audit scope.
- Resource discovery and attribute collection can be parallelized using `self.__threading_call__`, typically by region/zone or resource.
- All GCP resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- Each GCP API calls are wrapped in try/except blocks, always logging errors.
- Tags and additional attributes that cannot be retrieved from the default call should be collected and stored for each resource using dedicated methods and threading.
## Specific Patterns in GCP Checks
The GCP checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/gcp/services/iam/iam_sa_user_managed_key_unused/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/gcp/services/iam/iam_sa_user_managed_key_unused))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is following the [GCP check implementation documentation](./checks.md#creating-a-check) and taking other similar checks as reference.
### Check Report Class
The `Check_Report_GCP` class models a single finding for a GCP resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_GCP` extends the base report structure with GCP-specific fields, enabling detailed tracking of the resource, project, and location associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_GCP`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its GCP-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses the explicit `resource_id` argument if provided.
- Otherwise, uses `resource.id` if present.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if none are available.
- **`resource_name`**:
- Uses the explicit `resource_name` argument if provided.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string.
- **`project_id`**:
- Uses the explicit `project_id` argument if provided.
- Otherwise, uses `resource.project_id` if present.
- Defaults to an empty string.
- **`location`**:
- Uses the explicit `location` argument if provided.
- Otherwise, uses `resource.location` if present.
- Otherwise, uses `resource.region` if present.
- Defaults to "global" if none are available.
All these attributes can be overridden by passing the corresponding argument to the constructor. If the resource object does not contain the required attributes, you must set them manually.
Others attributes are inherited from the `Check_Report` class, from that ones you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_GCP(
metadata=check_metadata,
resource=resource_object,
resource_id="custom-id", # Optional override
resource_name="custom-name", # Optional override
project_id="my-gcp-project", # Optional override
location="us-central1" # Optional override
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
-116
View File
@@ -1,116 +0,0 @@
# GitHub Provider
This page details the [GitHub](https://github.com/) provider implementation in Prowler.
By default, Prowler will audit the GitHub account - scanning all repositories, organizations, and applications that your configured credentials can access. To configure it, follow the [getting started](../index.md#github) page.
## GitHub Provider Classes Architecture
The GitHub provider implementation follows the general [Provider structure](./provider.md). This section focuses on the GitHub-specific implementation, highlighting how the generic provider concepts are realized for GitHub in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### `GithubProvider` (Main Class)
- **Location:** [`prowler/providers/github/github_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/github_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for GitHub-specific logic, session management, credential validation, and configuration.
- **Key GitHub Responsibilities:**
- Initializes and manages GitHub sessions (supports Personal Access Token, OAuth App, and GitHub App authentication).
- Validates credentials and sets up the GitHub identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Provides properties and methods for downstream GitHub service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/github/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/models.py)
- **Purpose:** Define structured data for GitHub identity, session, and output options.
- **Key GitHub Models:**
- `GithubSession`: Holds authentication tokens and keys for the session.
- `GithubIdentityInfo`, `GithubAppIdentityInfo`: Store account or app identity metadata.
### `GithubService` (Service Base Class)
- **Location:** [`prowler/providers/github/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/lib/service/service.py)
- **Purpose:** Abstract base class for all GitHub service-specific classes.
- **Key GitHub Responsibilities:**
- Receives a `GithubProvider` instance to access session, identity, and configuration.
- Manages GitHub API clients for the authenticated user or app.
- Exposes common audit context (`audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/github/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for GitHub-specific error handling, such as credential and session errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/github/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/lib/)
- **Purpose:** Helpers for argument parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in GitHub Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/github/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/github/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and by taking other already implemented services as reference.
### GitHub Service Common Patterns
- Services communicate with GitHub using the PyGithub Python SDK. See the [official documentation](https://pygithub.readthedocs.io/).
- Every GitHub service class inherits from `GithubService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the service name and provider (e.g. `super().__init__(__class__.__name__, provider))`). Ensure that the service name in PyGithub is the same that you use in the constructor. Usually is used the `__class__.__name__` to get the service name because it is the same as the class name.
- Resource containers **must** be initialized in the constructor, typically as dictionaries keyed by resource ID or name.
- All GitHub resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- GitHub API calls are wrapped in try/except blocks, always logging errors.
## Specific Patterns in GitHub Checks
The GitHub checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks in:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/github/services/repository/repository_secret_scanning_enabled/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/github/services/repository/repository_secret_scanning_enabled))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is the [GitHub check implementation documentation](./checks.md#creating-a-check) and by taking other checks as reference.
### Check Report Class
The `CheckReportGithub` class models a single finding for a GitHub resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`CheckReportGithub` extends the base report structure with GitHub-specific fields, enabling detailed tracking of the resource, name, and owner associated with each finding.
#### Constructor and Attribute Population
When you instantiate `CheckReportGithub`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its GitHub-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses the explicit `resource_id` argument if provided.
- Otherwise, uses `resource.id` if present.
- Defaults to an empty string if not available.
- **`resource_name`**:
- Uses the explicit `resource_name` argument if provided.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if not available.
- **`owner`**:
- Uses the explicit `owner` argument if provided.
- Otherwise, uses `resource.owner` for repositories and `resource.name` for organizations.
- Defaults to an empty string if not available.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = CheckReportGithub(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
+1 -1
View File
@@ -1,3 +1,3 @@
# Integration Tests
Coming soon ...
Coming soon ...
+43 -83
View File
@@ -2,89 +2,66 @@
## Introduction
Integrating Prowler with external tools enhances its functionality and enables seamless workflow automation. Prowler supports a variety of integrations to optimize security assessments and reporting.
Integrating Prowler with external tools enhances its functionality and seamlessly embeds it into your workflows. Prowler supports a wide range of integrations to streamline security assessments and reporting. Common integration targets include messaging platforms like Slack, project management tools like Jira, and cloud services such as AWS Security Hub.
### Supported Integration Targets
- Messaging Platforms Example: Slack
- Project Management Tools Example: Jira
- Cloud Services Example: AWS Security Hub
### Integration Guidelines
To integrate Prowler with a specific product:
Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) to understand its architecture and integration mechanisms.
* Identify the most suitable integration method for the intended platform.
* Consult the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) to understand how Prowler works and the way that you can integrate it with the desired product!
* Identify the best approach for the specific platform youre targeting.
## Steps to Create an Integration
### Defining the Integration Purpose
### Identify the Integration Purpose
* Before implementing an integration, clearly define its objective. Common purposes include:
* Clearly define the objective of the integration. For example:
* Sending Prowler findings to a platform for alerts, tracking, or further analysis.
* Review existing integrations in the [`prowler/lib/outputs`](https://github.com/prowler-cloud/prowler/tree/master/prowler/lib/outputs) folder for inspiration and implementation examples.
* Sending Prowler findings to a platform for alerting, tracking, or further analysis.
* For inspiration and implementation examples, please review the existing integrations in the [`prowler/lib/outputs`](https://github.com/prowler-cloud/prowler/tree/master/prowler/lib/outputs) folder.
### Developing the Integration
### Develop the Integration
* Script Development:
* Write a script to process Prowlers output and interact with the target platforms API.
* If the goal is to send findings, parse Prowlers results and use the platforms API to create entries or notifications.
* For example, to send findings, parse Prowlers results and use the platforms API to create entries or notifications.
* Configuration:
* Ensure the script supports environment-specific settings, such as:
- API endpoints
- Authentication tokens
- Any necessary configurable parameters.
* Ensure your script includes configurable options for environment-specific settings, such as API endpoints and authentication tokens.
### Fundamental Structure
* Integration Class:
* To implement an integration, create a class that encapsulates the required attributes and methods for interacting with the target platform. Example: Jira Integration
* Create a class that encapsulates attributes and methods for the integration.
Here is an example with Jira integration:
```python title="Jira Class"
class Jira:
"""
Jira class to interact with the Jira API
[Note]
This integration is limited to a single Jira Cloud instance, meaning all issues will be created under the same Jira Cloud ID. Future improvements will include the ability to specify a Jira Cloud ID for users associated with multiple accounts.
This integration is limited to a single Jira Cloud, therefore all the issues will be created for same Jira Cloud ID. We will need to work on the ability of providing a Jira Cloud ID if the user is present in more than one.
Attributes
- _redirect_uri: The redirect URI used
- _client_id: The client identifier
Attributes:
- _redirect_uri: The redirect URI
- _client_id: The client ID
- _client_secret: The client secret
- _access_token: The access token
- _refresh_token: The refresh token
- _expiration_date: The authentication expiration
- _cloud_id: The cloud identifier
- _cloud_id: The cloud ID
- _scopes: The scopes needed to authenticate, read:jira-user read:jira-work write:jira-work
- AUTH_URL: The URL to authenticate with Jira
- PARAMS_TEMPLATE: The template for the parameters to authenticate with Jira
- TOKEN_URL: The URL to get the access token from Jira
- API_TOKEN_URL: The URL to get the accessible resources from Jira
Methods
__init__: Initializes the Jira object
- input_authorization_code: Inputs the authorization code
- auth_code_url: Generates the URL to authorize the application
- get_auth: Gets the access token and refreshes it
- get_cloud_id: Gets the cloud identifier from Jira
- get_access_token: Gets the access token
- refresh_access_token: Refreshes the access token from Jira
- test_connection: Tests the connection to Jira and returns a Connection object
- get_projects: Gets the projects from Jira
- get_available_issue_types: Gets the available issue types for a project
- send_findings: Sends the findings to Jira and creates an issue
Methods:
- __init__: Initialize the Jira object
- input_authorization_code: Input the authorization code
- auth_code_url: Generate the URL to authorize the application
- get_auth: Get the access token and refresh token
- get_cloud_id: Get the cloud ID from Jira
- get_access_token: Get the access token
- refresh_access_token: Refresh the access token from Jira
- test_connection: Test the connection to Jira and return a Connection object
- get_projects: Get the projects from Jira
- get_available_issue_types: Get the available issue types for a project
- send_findings: Send the findings to Jira and create an issue
Raises:
- JiraGetAuthResponseError: Failed to get the access token and refresh token
@@ -151,17 +128,9 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
# More properties and methods
```
* Test Connection Method:
* Validating Credentials or Tokens
To ensure a successful connection to the target platform, implement a method that validates authentication credentials or tokens.
#### Method Implementation
The following example demonstrates the `test_connection` method for the `Jira` class:
* Implement a method to validate credentials or tokens, ensuring the connection to the target platform is successful.
The following is the code for the `test_connection` method for the `Jira` class:
```python title="Test connection"
@staticmethod
def test_connection(
@@ -173,8 +142,8 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
"""Test the connection to Jira
Args:
- redirect_uri: The redirect URI used
- client_id: The client identifier
- redirect_uri: The redirect URI
- client_id: The client ID
- client_secret: The client secret
- raise_on_exception: Whether to raise an exception or not
@@ -246,15 +215,9 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
)
return Connection(is_connected=False, error=error)
```
* Send Findings Method:
* Add a method to send Prowler findings to the target platform, adhering to its API specifications.
#### Method Implementation
The following example demonstrates the `send_findings` method for the `Jira` class:
The following is the code for the `send_findings` method for the `Jira` class:
```python title="Send findings method"
def send_findings(
self,
@@ -358,19 +321,16 @@ Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler
)
```
### Testing the Integration
### Testing
* Conduct integration testing in a controlled environment to validate expected behavior. Ensure the following:
* Transmission Accuracy Verify that Prowler findings are correctly sent and processed by the target platform.
* Error Handling Simulate edge cases to assess robustness and failure recovery mechanisms.
* Test the integration in a controlled environment to confirm it behaves as expected.
* Verify that Prowlers findings are accurately transmitted and correctly processed by the target platform.
* Simulate edge cases to ensure robust error handling.
### Documentation
* Ensure the following elements are included:
* Setup Instructions List all necessary dependencies and installation steps.
* Configuration Details Specify required environment variables, authentication steps, etc.
* Example Use Cases Provide practical scenarios demonstrating functionality.
* Troubleshooting Guide Document common issues and resolution steps.
* Comprehensive and clear documentation improves maintainability and simplifies onboarding.
* Provide clear, detailed documentation for your integration:
* Setup instructions, including any required dependencies.
* Configuration details, such as environment variables or authentication steps.
* Example use cases and troubleshooting tips.
* Good documentation ensures maintainability and simplifies onboarding for team members.
+36 -137
View File
@@ -1,176 +1,75 @@
# Introduction to developing in Prowler
# Developer Guide
Extending Prowler
You can extend Prowler Open Source in many different ways, in most cases you will want to create your own checks and compliance security frameworks, here is where you can learn about how to get started with it. We also include how to create custom outputs, integrations and more.
Prowler can be extended in various ways, with common use cases including:
## Get the code and install all dependencies
- New security checks
- New compliance frameworks
- New output formats
- New integrations
- New proposed features
First of all, you need a version of Python 3.9 or higher and also `pip` installed to be able to install all dependencies required.
All the relevant information for these cases is included in this guide.
Then, to start working with the Prowler Github repository you need to fork it to be able to propose changes for new features, bug fixing, etc. To fork the Prowler repo please refer to [this guide](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo?tool=webui#forking-a-repository).
## Getting the Code and Installing All Dependencies
### Prerequisites
Before proceeding, ensure the following:
- Git is installed.
- Python 3.9 or higher is installed.
- `poetry` is installed to manage dependencies.
### Forking the Prowler Repository
To contribute to Prowler, fork the Prowler GitHub repository. This allows you to propose changes, submit new features, and fix bugs. For guidance on forking, refer to the [official GitHub documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo?tool=webui#forking-a-repository).
### Cloning Your Forked Repository
Once your fork is created, clone it using the following commands:
Once that is satisfied go ahead and clone your forked repo:
```
git clone https://github.com/<your-github-user>/prowler
cd prowler
```
For isolation and to avoid conflicts with other environments, we recommend using `poetry`, a Python dependency management tool. You can install it by following the instructions [here](https://python-poetry.org/docs/#installation).
### Dependency Management and Environment Isolation
To prevent conflicts between environments, we recommend using `poetry`, a Python dependency management solution. Install it by following the [instructions](https://python-poetry.org/docs/#installation).
### Installing Dependencies
To install all required dependencies, including those needed for development, run:
Then install all dependencies including the ones for developers:
```
poetry install --with dev
eval $(poetry env activate)
eval $(poetry env activate) \
```
> [!IMPORTANT]
> Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
>
> If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
> In case you have any doubts, consult the Poetry environment activation guide: https://python-poetry.org/docs/managing-environments/#activating-the-environment
???+ important
Starting from Poetry v2.0.0, `poetry shell` has been deprecated in favor of `poetry env activate`.
If your poetry version is below 2.0.0 you must keep using `poetry shell` to activate your environment.
In case you have any doubts, consult the [Poetry environment activation guide](https://python-poetry.org/docs/managing-environments/#activating-the-environment).
## Contributing to Prowler
### Ways to Contribute
Here are some ideas for collaborating with Prowler:
1. **Review Current Issues**: Check out our [GitHub Issues](https://github.com/prowler-cloud/prowler/issues) page. We often tag issues as `good first issue` - these are perfect for new contributors as they are typically well-defined and manageable in scope.
2. **Expand Prowler's Capabilities**: Prowler is constantly evolving, and you can be a part of its growth. Whether you are adding checks, supporting new services, or introducing integrations, your contributions help improve the tool for everyone. Here is how you can get involved:
- **Adding New Checks**
Want to improve Prowler's detection capabilities for your favorite cloud provider? You can contribute by writing new checks. To get started, follow the [create a new check guide](./checks.md).
- **Adding New Services**
One key service for your favorite cloud provider is missing? Add it to Prowler! To add a new service, check out the [create a new service guide](./services.md). Do not forget to include relevant checks to validate functionality.
- **Adding New Providers**
If you would like to extend Prowler to work with a new cloud provider, follow the [create a new provider guide](./provider.md). This typically involves setting up new services and checks to ensure compatibility.
- **Adding New Output Formats**
Want to tailor how results are displayed or exported? You can add custom output formats by following the [create a new output format guide](./outputs.md).
- **Adding New Integrations**
Prowler can work with other tools and platforms through integrations. If you would like to add one, see the [create a new integration guide](./integrations.md).
- **Proposing or Implementing Features**
Got an idea to make Prowler better? Whether it is a brand-new feature or an enhancement to an existing one, you are welcome to propose it or help implement community-requested improvements.
3. **Improve Documentation**: Help make Prowler more accessible by enhancing our documentation, fixing typos, or adding examples/tutorials. See the tutorial of how we write our documentation [here](./documentation.md).
4. **Bug Fixes**: If you find any issues or bugs, you can report them in the [GitHub Issues](https://github.com/prowler-cloud/prowler/issues) page and if you want you can also fix them.
Remember, our community is here to help! If you need guidance, do not hesitate to ask questions in the issues or join our [Slack workspace](https://goto.prowler.com/slack).
### Pre-Commit Hooks
This repository uses Git pre-commit hooks managed by the [pre-commit](https://pre-commit.com/) tool, it is installed with `poetry install --with dev`. Next, run the following command in the root of this repository:
## Contributing with your code or fixes to Prowler
This repo has git pre-commit hooks managed via the [pre-commit](https://pre-commit.com/) tool. [Install](https://pre-commit.com/#install) it how ever you like, then in the root of this repo run:
```shell
pre-commit install
```
Successful installation should produce the following output:
You should get an output like the following:
```shell
pre-commit installed at .git/hooks/pre-commit
```
### Code Quality and Security Checks
Before merging pull requests, several automated checks and utilities ensure code security and updated dependencies:
Before we merge any of your pull requests we pass checks to the code, we use the following tools and automation to make sure the code is secure and dependencies up-to-dated:
???+ note
These should have been already installed if `poetry install --with dev` was already run.
These should have been already installed if you ran `poetry install --with dev`
- [`bandit`](https://pypi.org/project/bandit/) for code security review.
- [`safety`](https://pypi.org/project/safety/) and [`dependabot`](https://github.com/features/security) for dependencies.
- [`hadolint`](https://github.com/hadolint/hadolint) and [`dockle`](https://github.com/goodwithtech/dockle) for container security.
- [`Snyk`](https://docs.snyk.io/integrations/snyk-container-integrations/container-security-with-docker-hub-integration) for container security in Docker Hub.
- [`clair`](https://github.com/quay/clair) for container security in Amazon ECR.
- [`vulture`](https://pypi.org/project/vulture/), [`flake8`](https://pypi.org/project/flake8/), [`black`](https://pypi.org/project/black/), and [`pylint`](https://pypi.org/project/pylint/) for formatting and best practices.
- [`hadolint`](https://github.com/hadolint/hadolint) and [`dockle`](https://github.com/goodwithtech/dockle) for our containers security.
- [`Snyk`](https://docs.snyk.io/integrations/snyk-container-integrations/container-security-with-docker-hub-integration) in Docker Hub.
- [`clair`](https://github.com/quay/clair) in Amazon ECR.
- [`vulture`](https://pypi.org/project/vulture/), [`flake8`](https://pypi.org/project/flake8/), [`black`](https://pypi.org/project/black/) and [`pylint`](https://pypi.org/project/pylint/) for formatting and best practices.
Additionally, ensure the latest version of [`TruffleHog`](https://github.com/trufflesecurity/trufflehog) is installed to scan for sensitive data in the code. Follow the official [installation guide](https://github.com/trufflesecurity/trufflehog?tab=readme-ov-file#floppy_disk-installation) for setup.
You can see all dependencies in file `pyproject.toml`.
### Dependency Management
Moreover, you would need to install [`TruffleHog`](https://github.com/trufflesecurity/trufflehog) on the latest version to check for secrets in the code. You can install it using the official installation guide [here](https://github.com/trufflesecurity/trufflehog?tab=readme-ov-file#floppy_disk-installation).
All dependencies are listed in the `pyproject.toml` file.
For proper code documentation, refer to the following and follow the code documentation practices presented there: [Google Python Style Guide - Comments and Docstrings](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings).
Additionally, please ensure to follow the code documentation practices outlined in this guide: [Google Python Style Guide - Comments and Docstrings](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings).
???+ note
If you encounter issues when committing to the Prowler repository, use the `--no-verify` flag with the `git commit` command.
### Repository Folder Structure
Understanding the layout of the Prowler codebase will help you quickly find where to add new features, checks, or integrations. The following is a high-level overview from the root of the repository:
```
prowler/
├── prowler/ # Main source code for Prowler SDK (CLI, providers, services, checks, compliances, config, etc.)
├── api/ # API server and related code
├── dashboard/ # Local Dashboard extracted from the CLI output
├── ui/ # Web UI components
├── util/ # Utility scripts and helpers
├── tests/ # Prowler SDK test suite
├── docs/ # Documentation, including this guide
├── examples/ # Example output formats for providers and scripts
├── permissions/ # Permission-related files and policies
├── contrib/ # Community-contributed scripts or modules
├── kubernetes/ # Kubernetes deployment files
├── .github/ # GitHub related files (workflows, issue templates, etc.)
├── pyproject.toml # Python project configuration (Poetry)
├── poetry.lock # Poetry lock file
├── README.md # Project overview and getting started
├── Makefile # Common development commands
├── Dockerfile # SDK Docker container
├── docker-compose.yml # Prowler App Docker compose
└── ... # Other supporting files
```
If you have any trouble when committing to the Prowler repository, add the `--no-verify` flag to the `git commit` command.
## Pull Request Checklist
When creating or reviewing a pull request in https://github.com/prowler-cloud/prowler, follow [this checklist](https://github.com/prowler-cloud/prowler/blob/master/.github/pull_request_template.md#checklist).
If you create or review a PR in https://github.com/prowler-cloud/prowler please follow this checklist:
## Contribution Appreciation
- [ ] Make sure you've read the Prowler Developer Guide at https://docs.prowler.cloud/en/latest/developer-guide/introduction/
- [ ] Are we following the style guide, hence installed all the linters and formatters? Please check https://docs.prowler.cloud/en/latest/developer-guide/introduction/#contributing-with-your-code-or-fixes-to-prowler
- [ ] Are we increasing/decreasing the test coverage? Please, review if we need to include/modify tests for the new code.
- [ ] Are we modifying outputs? Please review it carefully.
- [ ] Do we need to modify the Prowler documentation to reflect the changes introduced?
- [ ] Are we introducing possible breaking changes? Are we modifying a core feature?
If you enjoy swag, wed love to thank you for your contribution with laptop stickers or other Prowler merchandise!
To request swag: Share your pull request details in our [Slack workspace](https://goto.prowler.com/slack).
## Want some swag as appreciation for your contribution?
You can also reach out to Toni de la Fuente on [Twitter](https://twitter.com/ToniBlyx)his DMs are open!
# Testing a Pull Request from a Specific Branch
To test Prowler from a specific branch (for example, to try out changes from a pull request before it is merged), you can use `pipx` to install directly from GitHub:
```sh
pipx install "git+https://github.com/prowler-cloud/prowler.git@branch-name"
```
Replace `branch-name` with the name of the branch you want to test. This will install Prowler in an isolated environment, allowing you to try out the changes safely.
If you are like us and you love swag, we are happy to thank you for your contribution with some laptop stickers or whatever other swag we may have at that time. Please, tell us more details and your pull request link in our [Slack workspace here](https://goto.prowler.com/slack). You can also reach out to Toni de la Fuente on Twitter [here](https://twitter.com/ToniBlyx), his DMs are open.
-117
View File
@@ -1,117 +0,0 @@
# Kubernetes Provider
This page details the [Kubernetes](https://kubernetes.io/) provider implementation in Prowler.
By default, Prowler will audit all namespaces in the Kubernetes cluster accessible by the configured context. To configure it, follow the [getting started](../index.md#kubernetes) page.
## Kubernetes Provider Classes Architecture
The Kubernetes provider implementation follows the general [Provider structure](./provider.md). This section focuses on the Kubernetes-specific implementation, highlighting how the generic provider concepts are realized for Kubernetes in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### `KubernetesProvider` (Main Class)
- **Location:** [`prowler/providers/kubernetes/kubernetes_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/kubernetes_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for Kubernetes-specific logic, session management, context and namespace discovery, credential validation, and configuration.
- **Key Kubernetes Responsibilities:**
- Initializes and manages Kubernetes sessions (supports kubeconfig file or content, context selection, and namespace scoping).
- Validates credentials and sets up the Kubernetes identity context.
- Loads and manages configuration, mutelist, and fixer settings.
- Discovers accessible namespaces and cluster metadata.
- Provides properties and methods for downstream Kubernetes service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/kubernetes/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/models.py)
- **Purpose:** Define structured data for Kubernetes identity and session info.
- **Key Kubernetes Models:**
- `KubernetesIdentityInfo`: Holds Kubernetes identity metadata, such as context, cluster, and user.
- `KubernetesSession`: Stores the Kubernetes API client and context information.
### `KubernetesService` (Service Base Class)
- **Location:** [`prowler/providers/kubernetes/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/lib/service/service.py)
- **Purpose:** Abstract base class that all Kubernetes service-specific classes inherit from. This implements the generic service pattern (described in [service page](./services.md#service-base-class)) specifically for Kubernetes.
- **Key Kubernetes Responsibilities:**
- Receives a `KubernetesProvider` instance to access session, identity, and configuration.
- Manages the Kubernetes API client and context.
- Provides a `__threading_call__` method to make API calls in parallel by resource.
- Exposes common audit context (`context`, `api_client`, `audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/kubernetes/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for Kubernetes-specific error handling, such as session, API, and configuration errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/kubernetes/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/lib/)
- **Purpose:** Helpers for argument parsing, mutelist management, and other cross-cutting concerns.
## Specific Patterns in Kubernetes Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/kubernetes/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/kubernetes/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is following the [service implementation documentation](./services.md#adding-a-new-service) and taking other already implemented services as reference.
### Kubernetes Service Common Patterns
- Services communicate with Kubernetes using the Kubernetes Python SDK. See the [official documentation](https://github.com/kubernetes-client/python/blob/master/kubernetes/README.md/).
- Every Kubernetes service class inherits from `KubernetesService`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the provider object, and initializes resource containers (typically as dictionaries keyed by resource UID or name).
- Resource discovery and attribute collection can be parallelized using `self.__threading_call__`.
- All Kubernetes resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- Kubernetes API calls are wrapped in try/except blocks, always logging errors.
- Additional attributes that cannot be retrieved from the default call should be collected and stored for each resource using dedicated methods and threading.
## Specific Patterns in Kubernetes Checks
The Kubernetes checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks in:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/kubernetes/services/rbac/rbac_minimize_wildcard_use_roles/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/kubernetes/services/rbac/rbac_minimize_wildcard_use_roles))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is following the [Kubernetes check implementation documentation](./checks.md#creating-a-check) and taking other checks as reference.
### Check Report Class
The `Check_Report_Kubernetes` class models a single finding for a Kubernetes resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`Check_Report_Kubernetes` extends the base report structure with Kubernetes-specific fields, enabling detailed tracking of the resource, name, and namespace associated with each finding.
#### Constructor and Attribute Population
When you instantiate `Check_Report_Kubernetes`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its Kubernetes-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**:
- Uses `resource.uid` if present.
- Otherwise, uses `resource.name` if present.
- Defaults to an empty string if none are available.
- **`resource_name`**:
- Uses `resource.name` if present.
- Defaults to an empty string if not available.
- **`namespace`**:
- Uses `resource.namespace` if present.
- Defaults to "cluster-wide" for cluster-scoped resources.
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = Check_Report_Kubernetes(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
-134
View File
@@ -1,134 +0,0 @@
# Extending Prowler Lighthouse AI
This guide helps developers customize and extend Prowler Lighthouse AI by adding or modifying AI agents.
## Understanding AI Agents
AI agents combine Large Language Models (LLMs) with specialized tools that provide environmental context. These tools can include API calls, system command execution, or any function-wrapped capability.
### Types of AI Agents
AI agents fall into two main categories:
- **Autonomous Agents**: Freely chooses from available tools to complete tasks, adapting their approach based on context. They decide which tools to use and when.
- **Workflow Agents**: Follows structured paths with predefined logic. They execute specific tool sequences and can include conditional logic.
Prowler Lighthouse AI is an autonomous agent - selecting the right tool(s) based on the users query.
???+ note
To learn more about AI agents, read [Anthropic's blog post on building effective agents](https://www.anthropic.com/engineering/building-effective-agents).
### LLM Dependency
The autonomous nature of agents depends on the underlying LLM. Autonomous agents using identical system prompts and tools but powered by different LLM providers might approach user queries differently. Agent with one LLM might solve a problem efficiently, while with another it might take a different route or fail entirely.
After evaluating multiple LLM providers (OpenAI, Gemini, Claude, LLama) based on tool calling features and response accuracy, we recommend using the `gpt-4o` model.
## Prowler Lighthouse AI Architecture
Prowler Lighthouse AI uses a multi-agent architecture orchestrated by the [Langgraph-Supervisor](https://www.npmjs.com/package/@langchain/langgraph-supervisor) library.
### Architecture Components
<img src="../../tutorials/img/lighthouse-architecture.png" alt="Prowler Lighthouse architecture">
Prowler Lighthouse AI integrates with the NextJS application:
- The [Langgraph-Supervisor](https://www.npmjs.com/package/@langchain/langgraph-supervisor) library integrates directly with NextJS
- The system uses the authenticated user session to interact with the Prowler API server
- Agents only access data the current user is authorized to view
- Session management operates automatically, ensuring Role-Based Access Control (RBAC) is maintained
## Available Prowler AI Agents
The following specialized AI agents are available in Prowler:
### Agent Overview
- **provider_agent**: Fetches information about cloud providers connected to Prowler
- **user_info_agent**: Retrieves information about Prowler users
- **scans_agent**: Fetches information about Prowler scans
- **compliance_agent**: Retrieves compliance overviews across scans
- **findings_agent**: Fetches information about individual findings across scans
- **overview_agent**: Retrieves overview information (providers, findings by status and severity, etc.)
## How to Add New Capabilities
### Updating the Supervisor Prompt
The supervisor agent controls system behavior, tone, and capabilities. You can find the supervisor prompt at: [https://github.com/prowler-cloud/prowler/blob/master/ui/lib/lighthouse/prompts.ts](https://github.com/prowler-cloud/prowler/blob/master/ui/lib/lighthouse/prompts.ts)
#### Supervisor Prompt Modifications
Modifying the supervisor prompt allows you to:
- Change personality or response style
- Add new high-level capabilities
- Modify task delegation to specialized agents
- Set up guardrails (query types to answer or decline)
???+ note
The supervisor agent should not have its own tools. This design keeps the system modular and maintainable.
### How to Create New Specialized Agents
The supervisor agent and all specialized agents are defined in the `route.ts` file. The supervisor agent uses [langgraph-supervisor](https://www.npmjs.com/package/@langchain/langgraph-supervisor), while other agents use the prebuilt [create-react-agent](https://langchain-ai.github.io/langgraphjs/how-tos/create-react-agent/).
To add new capabilities or all Lighthouse AI to interact with other APIs, create additional specialized agents:
1. First determine what the new agent would do. Create a detailed prompt defining the agent's purpose and capabilities. You can see an example from [here](https://github.com/prowler-cloud/prowler/blob/master/ui/lib/lighthouse/prompts.ts#L359-L385).
???+ note
Ensure that the new agent's capabilities don't collide with existing agents. For example, if there's already a *findings_agent* that talks to findings APIs don't create a new agent to do the same.
2. Create necessary tools for the agents to access specific data or perform actions. A tool is a specialized function that extends the capabilities of LLM by allowing it to access external data or APIs. A tool is triggered by LLM based on the description of the tool and the user's query.
For example, the description of `getScanTool` is "Fetches detailed information about a specific scan by its ID." If the description doesn't convey what the tool is capable of doing, LLM will not invoke the function. If the description of `getScanTool` was set to something random or not set at all, LLM will not answer queries like "Give me the critical issues from the scan ID xxxxxxxxxxxxxxx"
???+ note
Ensure that one tool is added to one agent only. Adding tools is optional. There can be agents with no tools at all.
3. Use the `createReactAgent` function to define a new agent. For example, the rolesAgent name is "roles_agent" and has access to call tools "*getRolesTool*" and "*getRoleTool*"
```js
const rolesAgent = createReactAgent({
llm: llm,
tools: [getRolesTool, getRoleTool],
name: "roles_agent",
prompt: rolesAgentPrompt,
});
```
4. Create a detailed prompt defining the agent's purpose and capabilities.
5. Add the new agent to the available agents list:
```js
const agents = [
userInfoAgent,
providerAgent,
overviewAgent,
scansAgent,
complianceAgent,
findingsAgent,
rolesAgent, // New agent added here
];
// Create supervisor workflow
const workflow = createSupervisor({
agents: agents,
llm: supervisorllm,
prompt: supervisorPrompt,
outputMode: "last_message",
});
```
6. Update the supervisor's system prompt to summarize the new agent's capabilities.
### Best Practices for Agent Development
When developing new agents or capabilities:
- **Clear Responsibility Boundaries**: Each agent should have a defined purpose with minimal overlap. No two agents should access the same tools or different tools accessing the same Prowler APIs.
- **Minimal Data Access**: Agents should only request the data they need, keeping requests specific to minimize context window usage, cost, and response time.
- **Thorough Prompting:** Ensure agent prompts include clear instructions about:
- The agent's purpose and limitations
- How to use its tools
- How to format responses for the supervisor
- Error handling procedures (Optional)
- **Security Considerations:** Agents should never modify data or access sensitive information like secrets or credentials.
- **Testing:** Thoroughly test new agents with various queries before deploying to production.
-131
View File
@@ -1,131 +0,0 @@
# Microsoft 365 (M365) Provider
This page details the [Microsoft 365 (M365)](https://www.microsoft.com/en-us/microsoft-365) provider implementation in Prowler.
By default, Prowler will audit the Microsoft Entra ID tenant and its supported services. To configure it, follow the [getting started](../index.md#microsoft-365) page.
---
## PowerShell Requirements for M365 Checks
> **Most Microsoft 365 checks in Prowler require PowerShell, not just the Microsoft Graph API.**
- **PowerShell is essential** for retrieving data from Exchange Online, Teams, Defender, Purview, and other M365 services. Many checks cannot be performed using only the Graph API.
- **PowerShell 7.4 or higher is required** (7.5 recommended). PowerShell 5.1 and earlier versions are not supported for M365 checks.
- **Required modules:**
- [ExchangeOnlineManagement](https://www.powershellgallery.com/packages/ExchangeOnlineManagement/3.6.0) (≥ 3.6.0)
- [MicrosoftTeams](https://www.powershellgallery.com/packages/MicrosoftTeams/6.6.0) (≥ 6.6.0)
- If you use Prowler Cloud or the official containers, PowerShell is pre-installed. For local or pip installations, you must install PowerShell and the modules yourself. See [Requirements: Supported PowerShell Versions](../getting-started/requirements.md#supported-powershell-versions) and [Needed PowerShell Modules](../getting-started/requirements.md#needed-powershell-modules).
- For more details and troubleshooting, see [Use of PowerShell in M365](../tutorials/microsoft365/use-of-powershell.md).
---
## M365 Provider Classes Architecture
The M365 provider implementation follows the general [Provider structure](./provider.md). This section focuses on the M365-specific implementation, highlighting how the generic provider concepts are realized for M365 in Prowler. For a full overview of the provider pattern, base classes, and extension guidelines, see [Provider documentation](./provider.md).
### `M365Provider` (Main Class)
- **Location:** [`prowler/providers/m365/m365_provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/m365_provider.py)
- **Base Class:** Inherits from `Provider` (see [base class details](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py)).
- **Purpose:** Central orchestrator for M365-specific logic, session management, credential validation, region/authority configuration, and identity context.
- **Key M365 Responsibilities:**
- Initializes and manages M365 sessions (supports Service Principal, environment variables, Azure CLI, browser, and user/password authentication).
- Validates credentials and sets up the M365 identity context.
- Manages the Microsoft Graph API client and the PowerShell client.
- Loads and manages configuration, mutelist, and fixer settings.
- Provides properties and methods for downstream M365 service classes to access session, identity, and configuration data.
### Data Models
- **Location:** [`prowler/providers/m365/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/models.py)
- **Purpose:** Define structured data for M365 identity, session, region configuration, and credentials.
- **Key M365 Models:**
- `M365IdentityInfo`: Holds M365 identity metadata, including tenant ID, domain(s), user, and location.
- `M365RegionConfig`: Stores the specific region/authority and API base URL for the tenant.
- `M365Credentials`: Represents credentials for authentication (user, password, client ID, client secret, tenant ID, etc.).
### `M365Service` (Service Base Class)
- **Location:** [`prowler/providers/m365/lib/service/service.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/lib/service/service.py)
- **Purpose:** Abstract base class for all M365 service-specific classes.
- **Key M365 Responsibilities:**
- Receives an `M365Provider` instance to access session, identity, and configuration.
- Manages the Microsoft Graph API client for the service.
- Initializes a PowerShell client for most services if credentials and identity are available.
- Exposes common audit context (`audit_config`, `fixer_config`) to subclasses.
### Exception Handling
- **Location:** [`prowler/providers/m365/exceptions/exceptions.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/exceptions/exceptions.py)
- **Purpose:** Custom exception classes for M365-specific error handling, such as credential, session, region, and argument errors.
### Session and Utility Helpers
- **Location:** [`prowler/providers/m365/lib/`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/lib/)
- **Purpose:** Helpers for argument parsing, region/authority setup, mutelist management, PowerShell integration, and other cross-cutting concerns.
> **Key File: [`m365_powershell.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/lib/powershell/m365_powershell.py)**
>
> This is the core module for Microsoft 365 PowerShell integration. It manages authentication, session handling, and provides a comprehensive set of methods for interacting with Microsoft Teams, Exchange Online, and Defender policies via PowerShell.
>
> This module provides secure credential management and authentication using MSAL and PowerShell. It handles automated installation and initialization of required PowerShell modules. The module offers a rich set of methods for retrieving and managing Teams, Exchange, and Defender configurations. It serves as the central component for all M365 provider operations that require PowerShell automation.
## Specific Patterns in M365 Services
The generic service pattern is described in [service page](./services.md#service-structure-and-initialisation). You can find all the currently implemented services in the following locations:
- Directly in the code, in location [`prowler/providers/m365/services/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/m365/services)
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new service is by following the [service implementation documentation](./services.md#adding-a-new-service) and by taking other already implemented services as reference.
### M365 Service Common Patterns
- Services communicate with Microsoft 365 using the Microsoft Graph API **and/or PowerShell**. See the [official documentation](https://learn.microsoft.com/en-us/graph/api/overview) and [PowerShell reference](https://learn.microsoft.com/en-us/powershell/).
- Every M365 service class inherits from `M365Service`, ensuring access to session, identity, configuration, and client utilities.
- The constructor (`__init__`) always calls `super().__init__` with the provider object, and initializes the Graph client and the PowerShell client.
- Resource containers **must** be initialized in the constructor, typically as objects that represent the different settings of the service.
- All M365 resources are represented as Pydantic `BaseModel` classes, providing type safety and structured access to resource attributes.
- Microsoft Graph API and PowerShell calls are wrapped in try/except blocks, always logging errors.
- To retrieve some data in the services, it is so common that you have to create a new method also in the `m365_powershell.py` file to later be called in the service.
## Specific Patterns in M365 Checks
The M365 checks pattern is described in [checks page](./checks.md). You can find all the currently implemented checks in:
- Directly in the code, within each service folder, each check has its own folder named after the name of the check. (e.g. [`prowler/providers/m365/services/entra/entra_users_mfa_enabled/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers/m365/services/entra/entra_users_mfa_enabled))
- In the [Prowler Hub](https://hub.prowler.com/) for a more human-readable view.
The best reference to understand how to implement a new check is following the [M365 check implementation documentation](./checks.md#creating-a-check) and by taking other checks as reference.
### Check Report Class
The `CheckReportM365` class models a single finding for a Microsoft 365 resource in a check report. It is defined in [`prowler/lib/check/models.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/check/models.py) and inherits from the generic `Check_Report` base class.
#### Purpose
`CheckReportM365` extends the base report structure with M365-specific fields, enabling detailed tracking of the resource, name, and location associated with each finding.
#### Constructor and Attribute Population
When you instantiate `CheckReportM365`, you must provide the check metadata and a resource object. The class will attempt to automatically populate its M365-specific attributes from the resource, using the following logic (in order of precedence):
- **`resource_id`**: A required field that **must** be explicitly set in the constructor to identify the resource being checked.
- **`resource_name`**: A required field that **must** be explicitly set in the constructor to provide a human-readable name for the resource.
- **`location`**: A required field that can be explicitly set in the constructor to indicate where the resource is located. If not specified, defaults to "global".
If the resource object does not contain the required attributes, you must set them manually in the check logic.
Other attributes are inherited from the `Check_Report` class, from which you **always** have to set the `status` and `status_extended` attributes in the check logic.
#### Example Usage
```python
report = CheckReportM365(
metadata=check_metadata,
resource=resource_object
)
report.status = "PASS"
report.status_extended = "Resource is compliant."
```
+122 -162
View File
@@ -2,38 +2,21 @@
## Introduction
Prowler supports multiple output formats, allowing users to tailor findings presentation to their needs. Custom output formats are valuable when integrating Prowler with third-party tools, generating specialized reports, or adapting data for specific workflows. By defining a custom output format, users can refine how findings are structured, extracting and displaying only the most relevant information.
Prowler can generate outputs in multiple formats, allowing users to customize the way findings are presented. This is particularly useful when integrating Prowler with third-party tools, creating specialized reports, or simply tailoring the data to meet specific requirements. A custom output format gives you the flexibility to extract and display only the most relevant information in the way you need it.
- Output Organization in Prowler
Prowler outputs are managed within the `/lib/outputs` directory. Each format—such as JSON, CSV, HTML—is implemented as a Python class.
- Outputs are generated based on scan findings, which are stored as structured dictionaries containing details such as:
- Resource IDs
- Severities
- Descriptions
- Other relevant metadata
- Creation Guidelines
Refer to the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) for insights into Prowlers architecture and best practices for creating custom outputs.
- Identify the most suitable integration method for the output being targeted.
* Prowler organizes its outputs in the `/lib/outputs` directory. Each format (e.g., JSON, CSV, HTML) is implemented as a Python class.
* Outputs are generated based on findings collected during a scan. Each finding is represented as a structured dictionary containing details like resource IDs, severities, descriptions, and more.
* Consult the [Prowler Developer Guide](https://docs.prowler.com/projects/prowler-open-source/en/latest/) to understand how Prowler works and the way that you can create it with the desired output!
* Identify the best approach for the specific output youre targeting.
## Steps to Create a Custom Output Format
### Schema
- Output Class:
- The class must inherit from `Output`. Review the [Output Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/outputs/output.py).
- Create a class that encapsulates the required attributes and methods for interacting with the target platform. Below the code for the `CSV` class is presented:
* Output Class:
* The class must inherit from `Output`. Review the [Output Class](https://github.com/prowler-cloud/prowler/blob/master/prowler/lib/outputs/output.py).
* Create a class that encapsulates attributes and methods for the output.
The following is the code for the `CSV` class:
```python title="CSV Class"
class CSV(Output):
def transform(self, findings: List[Finding]) -> None:
@@ -45,137 +28,118 @@ Prowler supports multiple output formats, allowing users to tailor findings pres
"""
...
```
* Transform Method:
* This method will transform the findings provided by Prowler to a specific format.
The following is the code for the `transform` method for the `CSV` class:
```python title="Transform"
def transform(self, findings: List[Finding]) -> None:
"""Transforms the findings into the CSV format.
Args:
findings (list[Finding]): a list of Finding objects
- Transform Method:
- This method will transform the findings provided by Prowler to a specific format.
#### Method Implementation
The following example demonstrates the `transform` method for the `CSV` class:
```python title="Transform"
def transform(self, findings: List[Finding]) -> None:
"""Transforms the findings into the CSV format.
Args:
findings (list[Finding]): a list of Finding objects
"""
try:
for finding in findings:
finding_dict = {}
finding_dict["AUTH_METHOD"] = finding.auth_method
finding_dict["TIMESTAMP"] = finding.timestamp
finding_dict["ACCOUNT_UID"] = finding.account_uid
finding_dict["ACCOUNT_NAME"] = finding.account_name
finding_dict["ACCOUNT_EMAIL"] = finding.account_email
finding_dict["ACCOUNT_ORGANIZATION_UID"] = (
finding.account_organization_uid
)
finding_dict["ACCOUNT_ORGANIZATION_NAME"] = (
finding.account_organization_name
)
finding_dict["ACCOUNT_TAGS"] = unroll_dict(
finding.account_tags, separator=":"
)
finding_dict["FINDING_UID"] = finding.uid
finding_dict["PROVIDER"] = finding.metadata.Provider
finding_dict["CHECK_ID"] = finding.metadata.CheckID
finding_dict["CHECK_TITLE"] = finding.metadata.CheckTitle
finding_dict["CHECK_TYPE"] = unroll_list(finding.metadata.CheckType)
finding_dict["STATUS"] = finding.status.value
finding_dict["STATUS_EXTENDED"] = finding.status_extended
finding_dict["MUTED"] = finding.muted
finding_dict["SERVICE_NAME"] = finding.metadata.ServiceName
finding_dict["SUBSERVICE_NAME"] = finding.metadata.SubServiceName
finding_dict["SEVERITY"] = finding.metadata.Severity.value
finding_dict["RESOURCE_TYPE"] = finding.metadata.ResourceType
finding_dict["RESOURCE_UID"] = finding.resource_uid
finding_dict["RESOURCE_NAME"] = finding.resource_name
finding_dict["RESOURCE_DETAILS"] = finding.resource_details
finding_dict["RESOURCE_TAGS"] = unroll_dict(finding.resource_tags)
finding_dict["PARTITION"] = finding.partition
finding_dict["REGION"] = finding.region
finding_dict["DESCRIPTION"] = finding.metadata.Description
finding_dict["RISK"] = finding.metadata.Risk
finding_dict["RELATED_URL"] = finding.metadata.RelatedUrl
finding_dict["REMEDIATION_RECOMMENDATION_TEXT"] = (
finding.metadata.Remediation.Recommendation.Text
)
finding_dict["REMEDIATION_RECOMMENDATION_URL"] = (
finding.metadata.Remediation.Recommendation.Url
)
finding_dict["REMEDIATION_CODE_NATIVEIAC"] = (
finding.metadata.Remediation.Code.NativeIaC
)
finding_dict["REMEDIATION_CODE_TERRAFORM"] = (
finding.metadata.Remediation.Code.Terraform
)
finding_dict["REMEDIATION_CODE_CLI"] = (
finding.metadata.Remediation.Code.CLI
)
finding_dict["REMEDIATION_CODE_OTHER"] = (
finding.metadata.Remediation.Code.Other
)
finding_dict["COMPLIANCE"] = unroll_dict(
finding.compliance, separator=": "
)
finding_dict["CATEGORIES"] = unroll_list(finding.metadata.Categories)
finding_dict["DEPENDS_ON"] = unroll_list(finding.metadata.DependsOn)
finding_dict["RELATED_TO"] = unroll_list(finding.metadata.RelatedTo)
finding_dict["NOTES"] = finding.metadata.Notes
finding_dict["PROWLER_VERSION"] = finding.prowler_version
self._data.append(finding_dict)
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
"""
try:
for finding in findings:
finding_dict = {}
finding_dict["AUTH_METHOD"] = finding.auth_method
finding_dict["TIMESTAMP"] = finding.timestamp
finding_dict["ACCOUNT_UID"] = finding.account_uid
finding_dict["ACCOUNT_NAME"] = finding.account_name
finding_dict["ACCOUNT_EMAIL"] = finding.account_email
finding_dict["ACCOUNT_ORGANIZATION_UID"] = (
finding.account_organization_uid
)
```
- Batch Write Data To File Method:
- This method will write the modeled object to a file.
#### Method Implementation
The following example demonstrates the `batch_write_data_to_file` method for the `CSV` class:
```python title="Batch Write Data To File"
def batch_write_data_to_file(self) -> None:
"""Writes the findings to a file using the CSV format using the `Output._file_descriptor`."""
try:
if (
getattr(self, "_file_descriptor", None)
and not self._file_descriptor.closed
and self._data
):
csv_writer = DictWriter(
self._file_descriptor,
fieldnames=self._data[0].keys(),
delimiter=";",
)
csv_writer.writeheader()
for finding in self._data:
csv_writer.writerow(finding)
self._file_descriptor.close()
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
finding_dict["ACCOUNT_ORGANIZATION_NAME"] = (
finding.account_organization_name
)
```
finding_dict["ACCOUNT_TAGS"] = unroll_dict(
finding.account_tags, separator=":"
)
finding_dict["FINDING_UID"] = finding.uid
finding_dict["PROVIDER"] = finding.metadata.Provider
finding_dict["CHECK_ID"] = finding.metadata.CheckID
finding_dict["CHECK_TITLE"] = finding.metadata.CheckTitle
finding_dict["CHECK_TYPE"] = unroll_list(finding.metadata.CheckType)
finding_dict["STATUS"] = finding.status.value
finding_dict["STATUS_EXTENDED"] = finding.status_extended
finding_dict["MUTED"] = finding.muted
finding_dict["SERVICE_NAME"] = finding.metadata.ServiceName
finding_dict["SUBSERVICE_NAME"] = finding.metadata.SubServiceName
finding_dict["SEVERITY"] = finding.metadata.Severity.value
finding_dict["RESOURCE_TYPE"] = finding.metadata.ResourceType
finding_dict["RESOURCE_UID"] = finding.resource_uid
finding_dict["RESOURCE_NAME"] = finding.resource_name
finding_dict["RESOURCE_DETAILS"] = finding.resource_details
finding_dict["RESOURCE_TAGS"] = unroll_dict(finding.resource_tags)
finding_dict["PARTITION"] = finding.partition
finding_dict["REGION"] = finding.region
finding_dict["DESCRIPTION"] = finding.metadata.Description
finding_dict["RISK"] = finding.metadata.Risk
finding_dict["RELATED_URL"] = finding.metadata.RelatedUrl
finding_dict["REMEDIATION_RECOMMENDATION_TEXT"] = (
finding.metadata.Remediation.Recommendation.Text
)
finding_dict["REMEDIATION_RECOMMENDATION_URL"] = (
finding.metadata.Remediation.Recommendation.Url
)
finding_dict["REMEDIATION_CODE_NATIVEIAC"] = (
finding.metadata.Remediation.Code.NativeIaC
)
finding_dict["REMEDIATION_CODE_TERRAFORM"] = (
finding.metadata.Remediation.Code.Terraform
)
finding_dict["REMEDIATION_CODE_CLI"] = (
finding.metadata.Remediation.Code.CLI
)
finding_dict["REMEDIATION_CODE_OTHER"] = (
finding.metadata.Remediation.Code.Other
)
finding_dict["COMPLIANCE"] = unroll_dict(
finding.compliance, separator=": "
)
finding_dict["CATEGORIES"] = unroll_list(finding.metadata.Categories)
finding_dict["DEPENDS_ON"] = unroll_list(finding.metadata.DependsOn)
finding_dict["RELATED_TO"] = unroll_list(finding.metadata.RelatedTo)
finding_dict["NOTES"] = finding.metadata.Notes
finding_dict["PROWLER_VERSION"] = finding.prowler_version
self._data.append(finding_dict)
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
```
* Batch Write Data To File Method:
* This method will write the modeled object to a file.
The following is the code for the `batch_write_data_to_file` method for the `CSV` class:
```python title="Batch Write Data To File"
def batch_write_data_to_file(self) -> None:
"""Writes the findings to a file using the CSV format using the `Output._file_descriptor`."""
try:
if (
getattr(self, "_file_descriptor", None)
and not self._file_descriptor.closed
and self._data
):
csv_writer = DictWriter(
self._file_descriptor,
fieldnames=self._data[0].keys(),
delimiter=";",
)
csv_writer.writeheader()
for finding in self._data:
csv_writer.writerow(finding)
self._file_descriptor.close()
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
```
### Integrating the Custom Output Format into Prowler
Once the custom output format is created, it must be integrated into Prowler to ensure compatibility with the existing architecture.
#### Reviewing Current Supported Outputs
Before implementing the new output format, examine the usage of currently supported formats to understand their structure and integration approach. Example: CSV Output Creation in Prowler
Below is an example of how Prowler generates and processes CSV output within its [codebase](https://github.com/prowler-cloud/prowler/blob/master/prowler/__main__.py):
### Integration With The Current Code
Once that the desired output format is created it has to be integrated with Prowler. Take a look at the the usage from the current supported output in order to add the new one.
Here is an example of the CSV output creation inside [prowler code](https://github.com/prowler-cloud/prowler/blob/master/prowler/__main__.py):
```python title="CSV creation"
if mode == "csv":
csv_output = CSV(
@@ -184,23 +148,19 @@ if mode == "csv":
file_path=f"{filename}{csv_file_suffix}",
)
generated_outputs["regular"].append(csv_output)
# Write CSV Finding Object to file.
# Write CSV Finding Object to file
csv_output.batch_write_data_to_file()
```
### Testing
* Verify that Prowlers findings are accurately typed in the desired output format.
* Error Handling Simulate edge cases to assess robustness and failure recovery mechanisms.
* Verify that Prowlers findings are accurately writed in the desired output format.
* Simulate edge cases to ensure robust error handling.
### Documentation
* Ensure the following elements are included:
* Setup Instructions List all necessary dependencies and installation steps.
* Provide clear, detailed documentation for your output:
* Setup instructions, including any required dependencies.
* Configuration details.
* Example Use Cases Provide practical scenarios demonstrating functionality.
* Troubleshooting Guide Document common issues and resolution steps.
* Comprehensive and clear documentation improves maintainability and simplifies onboarding of new users.
* Example use cases and troubleshooting tips.
* Good documentation ensures maintainability and simplifies onboarding for new users.
+173 -71
View File
@@ -1,78 +1,187 @@
# Prowler Providers
# Create a new Provider for Prowler
Here you can find how to create a new Provider in Prowler to give support for making all security checks needed and make your cloud safer!
## Introduction
Providers form the backbone of Prowler, enabling security assessments across various cloud environments.
Providers are the foundation on which Prowler is built, a simple definition for a cloud provider could be "third-party company that offers a platform where any IT resource you need is available at any time upon request". The most well-known cloud providers are Amazon Web Services, Azure from Microsoft and Google Cloud which are already supported by Prowler.
A provider is any platform or service that offers resources, data, or functionality that can be audited for security and compliance. This includes:
To create a new provider that is not supported now by Prowler and add your security checks you must create a new folder to store all the related files within it (services, checks, etc.). It must be store in route `prowler/providers/<new_provider_name>/`.
- Cloud Infrastructure Providers (like Amazon Web Services, Microsoft Azure, and Google Cloud)
- Software as a Service (SaaS) Platforms (like Microsoft 365)
- Development Platforms (like GitHub)
- Container Orchestration Platforms (like Kubernetes)
Inside that folder, you MUST create the following files and folders:
For providers supported by Prowler, refer to [Prowler Hub](https://hub.prowler.com/).
- A `lib` folder: to store all extra functions.
- A `services` folder: to store all [services](./services.md) to audit.
- An empty `__init__.py`: to make Python treat this service folder as a package.
- A `<new_provider_name>_provider.py`, containing all the provider's logic necessary to get authenticated in the provider, configurations and extra data useful for final report.
- A `models.py`, containing all the models necessary for the new provider.
???+ important
There are some custom providers added by the community, like [NHN Cloud](https://www.nhncloud.com/), that are not maintained by the Prowler team, but can be used in the Prowler CLI. They can be checked directly at the [Prowler GitHub repository](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
## Provider
## Adding a New Provider
To integrate an unsupported Prowler provider and implement its security checks, create a dedicated folder for all related files (e.g., services, checks)."
This folder must be placed within [`prowler/providers/<new_provider_name>/`](https://github.com/prowler-cloud/prowler/tree/master/prowler/providers).
Within this folder the following folders are also to be created:
- `lib` Stores additional utility functions and core files required by every provider. The following files and subfolders are commonly found in every provider's `lib` folder:
- `service/service.py` Provides a generic service class to be inherited by all services.
- `arguments/arguments.py` Handles provider-specific argument parsing.
- `mutelist/mutelist.py` Manages the mutelist functionality for the provider.
- `services` Stores all [services](./services.md) that the provider offers and want to be audited by [Prowler checks](./checks.md).
- `__init__.py` (empty) Ensures Python recognizes this folder as a package.
- `<new_provider_name>_provider.py` Defines authentication logic, configurations, and other provider-specific data.
- `models.py` Contains necessary models for the new provider.
By adhering to this structure, Prowler can effectively support services and security checks for additional providers.
???+ important
If your new provider requires a Python library (such as an official SDK or API client) to connect to its services, make sure to add it as a dependency in the `pyproject.toml` file. This ensures that all contributors and users have the necessary packages installed when working with your provider.
## Provider Structure in Prowler
Prowler's provider architecture is designed to facilitate security audits through a generic service tailored to each provider. This is accomplished by passing the necessary parameters to the constructor, which initializes all required session values.
The structure for Prowler's providers is set up in such a way that they can be utilized through a generic service specific to each provider. This is achieved by passing the required parameters to the constructor, which in turn initializes all the necessary session values.
### Base Class
All Prowler providers inherit from the same base class located in [`prowler/providers/common/provider.py`](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py). It is an [abstract base class](https://docs.python.org/3/library/abc.html) that defines the interface for all provider classes.
All the providers in Prowler inherits from the same [base class](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/common/provider.py). It is an [abstract base class](https://docs.python.org/3/library/abc.html) that defines the interface for all provider classes. The code of the class is the next:
```python title="Provider Base Class"
from abc import ABC, abstractmethod
from typing import Any
class Provider(ABC):
"""
The Provider class is an abstract base class that defines the interface for all provider classes in the auditing system.
Attributes:
type (property): The type of the provider.
identity (property): The identity of the provider for auditing.
session (property): The session of the provider for auditing.
audit_config (property): The audit configuration of the provider.
output_options (property): The output configuration of the provider for auditing.
Methods:
print_credentials(): Displays the provider's credentials used for auditing in the command-line interface.
setup_session(): Sets up the session for the provider.
validate_arguments(): Validates the arguments for the provider.
get_checks_to_execute_by_audit_resources(): Returns a set of checks based on the input resources to scan.
Note:
This is an abstract base class and should not be instantiated directly. Each provider should implement its own
version of the Provider class by inheriting from this base class and implementing the required methods and properties.
"""
@property
@abstractmethod
def type(self) -> str:
"""
type method stores the provider's type.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def identity(self) -> str:
"""
identity method stores the provider's identity to audit.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@abstractmethod
def setup_session(self) -> Any:
"""
setup_session sets up the session for the provider.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def session(self) -> str:
"""
session method stores the provider's session to audit.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def audit_config(self) -> str:
"""
audit_config method stores the provider's audit configuration.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@abstractmethod
def print_credentials(self) -> None:
"""
print_credentials is used to display in the CLI the provider's credentials used to audit.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@property
@abstractmethod
def output_options(self) -> str:
"""
output_options method returns the provider's audit output configuration.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@output_options.setter
@abstractmethod
def output_options(self, value: str) -> Any:
"""
output_options.setter sets the provider's audit output configuration.
This method needs to be created in each provider.
"""
raise NotImplementedError()
def validate_arguments(self) -> None:
"""
validate_arguments validates the arguments for the provider.
This method can be overridden in each provider if needed.
"""
raise NotImplementedError()
def get_checks_to_execute_by_audit_resources(self) -> set:
"""
get_checks_to_execute_by_audit_resources returns a set of checks based on the input resources to scan.
This is a fallback that returns None if the service has not implemented this function.
"""
return set()
@property
@abstractmethod
def mutelist(self):
"""
mutelist method returns the provider's mutelist.
This method needs to be created in each provider.
"""
raise NotImplementedError()
@mutelist.setter
@abstractmethod
def mutelist(self, path: str):
"""
mutelist.setter sets the provider's mutelist.
This method needs to be created in each provider.
"""
raise NotImplementedError()
```
### Provider Class
#### Provider Implementation Guidance
Given the complexity and variability of providers, use existing provider implementations as templates when developing new integrations.
Due to the complexity and differences of each provider use the rest of the providers as a template for the implementation.
- [AWS](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/aws/aws_provider.py)
- [GCP](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/gcp/gcp_provider.py)
- [Azure](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/azure/azure_provider.py)
- [Kubernetes](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/kubernetes/kubernetes_provider.py)
- [Microsoft365](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/microsoft365/microsoft365_provider.py)
- [GitHub](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/github/github_provider.py)
- [M365](https://github.com/prowler-cloud/prowler/blob/master/prowler/providers/m365/m365_provider.py)
### Basic Provider Implementation: Pseudocode Example
To simplify understanding, the following pseudocode outlines the fundamental structure of a provider, including library imports necessary for authentication.
To facilitate understanding here is a pseudocode of how the most basic provider could be with examples.
```python title="Provider Example Class"
# Library Imports for Authentication
# When implementing authentication for a provider, import the required libraries.
# Library imports to authenticate in the Provider
from prowler.config.config import load_and_validate_config_file
from prowler.lib.logger import logger
@@ -81,14 +190,14 @@ from prowler.lib.utils.utils import print_boxes
from prowler.providers.common.models import Audit_Metadata
from prowler.providers.common.provider import Provider
from prowler.providers.<new_provider_name>.models import (
# All provider models needed.
# All providers models needed
ProviderSessionModel,
ProviderIdentityModel,
ProviderOutputOptionsModel
)
class NewProvider(Provider):
# All properties from the class, some of which are properties in the base class.
# All properties from the class, some of this are properties in the base class
_type: str = "<provider_name>"
_session: <ProviderSessionModel>
_identity: <ProviderIdentityModel>
@@ -104,30 +213,20 @@ class NewProvider(Provider):
arguments (dict): A dictionary containing configuration arguments.
"""
logger.info("Setting <NewProviderName> provider ...")
# First get from arguments the necessary from the cloud account (subscriptions or projects or whatever the provider use for storing services)
# Initializing the Provider Session
# Steps:
# - Retrieve Account Information
# - Extract relevant account identifiers (subscriptions, projects, or other service references) from the provided arguments.
# Establish a Session
# Use the method enforced by the parent class to set up the session:
# Set the session with the method enforced by parent class
self._session = self.setup_session(credentials_file)
# Define Provider Identity
# Assign the identity class, typically provided by the Python provider library:
# Set the Identity class normaly the provider class give by Python provider library
self._identity = <ProviderIdentityModel>()
# Configure the Provider
# Set the provider-specific configuration.
# Set the provider configuration
self._audit_config = load_and_validate_config_file(
self._type, arguments.config_file
)
# All the enforced properties by the parent class.
# All enforced properties by the parent class
@property
def identity(self):
return self._identity
@@ -153,7 +252,7 @@ class NewProvider(Provider):
Sets up the Provider session.
Args:
<all_needed_for_auth> Can include all necessary arguments to set up the session
<all_needed_for_auth> Can include all necessary arguments to setup the session
Returns:
Credentials necessary to communicate with the provider.
@@ -163,8 +262,11 @@ class NewProvider(Provider):
"""
This method is enforced by parent class and is used to print all relevant
information during the prowler execution as a header of execution.
Displaying Account Information with Color Formatting. In Prowler, Account IDs, usernames, and other identifiers are typically displayed using color formatting provided by the colorama module (Fore).
Normally the Account ID, User name or stuff like this is displayed in colors using the colorama module (Fore).
"""
def print_credentials(self):
pass
```
@@ -1,25 +1,20 @@
# Creating a New Security Compliance Framework in Prowler
# Create a new security compliance framework
## Introduction
To create or contribute a custom security framework for Prowler—or to integrate a public framework—you must ensure the necessary checks are available. If they are missing, they must be implemented before proceeding.
Each framework is defined in a compliance file per provider. The file should follow the structure used in `prowler/compliance/<provider>/` and be named `<framework>_<version>_<provider>.json`. Follow the format below to create your own.
If you want to create or contribute with your own security frameworks or add public ones to Prowler you need to make sure the checks are available if not you have to create your own. Then create a compliance file per provider like in `prowler/compliance/<provider>/` and name it as `<framework>_<version>_<provider>.json` then follow the following format to create yours.
## Compliance Framework
Each file version of a framework will have the following structure at high level with the case that each framework needs to be generally identified, one requirement can be also called one control but one requirement can be linked to multiple prowler checks.:
### Compliance Framework Structure
Each compliance framework file consists of structured metadata that identifies the framework and maps security checks to requirements or controls. Please note that a single requirement can be linked to multiple Prowler checks:
- `Framework`: string The distinguished name of the framework (e.g., CIS).
- `Provider`: string The cloud provider where the framework applies (AWS, Azure, OCI).
- `Version`: string The framework version (e.g., 1.4 for CIS).
- `Requirements`: array of objects. Defines security requirements and their mapping to Prowler checks. All requirements or controls are to be included with the mapping to Prowler.
- `Requirements_Id`: string A unique identifier for each requirement within the framework
- `Requirements_Description`: string The requirement description as specified in the framework.
- `Requirements_Attributes`: array of objects. Contains relevant metadata such as security levels, sections, and any additional data needed for reporting with the result of the findings. Attributes should be derived directly from the frameworks own terminology, ensuring consistency with its established definitions.
- `Requirements_Checks`: array. The Prowler checks that are needed to prove this requirement. It can be one or multiple checks. In case automation is not feasible, this can be empty.
- `Framework`: string. Distinguish name of the framework, like CIS
- `Provider`: string. Provider where the framework applies, such as AWS, Azure, OCI,...
- `Version`: string. Version of the framework itself, like 1.4 for CIS.
- `Requirements`: array of objects. Include all requirements or controls with the mapping to Prowler.
- `Requirements_Id`: string. Unique identifier per each requirement in the specific framework
- `Requirements_Description`: string. Description as in the framework.
- `Requirements_Attributes`: array of objects. Includes all needed attributes per each requirement, like levels, sections, etc. Whatever helps to create a dedicated report with the result of the findings. Attributes would be taken as closely as possible from the framework's own terminology directly.
- `Requirements_Checks`: array. Prowler checks that are needed to prove this requirement. It can be one or multiple checks. In case of no automation possible this can be empty.
```
{
@@ -28,9 +23,9 @@ Each compliance framework file consists of structured metadata that identifies t
"Requirements": [
{
"Id": "<unique-id>",
"Description": "Full description of the requirement",
"Description": "Requirement full description",
"Checks": [
"Here is the prowler check or checks that will be executed"
"Here is the prowler check or checks that is going to be executed"
],
"Attributes": [
{
@@ -43,4 +38,4 @@ Each compliance framework file consists of structured metadata that identifies t
}
```
Finally, to have a proper output file for your reports, your framework data model has to be created in `prowler/lib/outputs/models.py` and also the CLI table output in `prowler/lib/outputs/compliance.py`. Also, you need to add a new conditional in `prowler/lib/outputs/file_descriptors.py` if creating a new CSV model.
Finally, to have a proper output file for your reports, your framework data model has to be created in `prowler/lib/outputs/models.py` and also the CLI table output in `prowler/lib/outputs/compliance.py`. Also, you need to add a new conditional in `prowler/lib/outputs/file_descriptors.py` if you create a new CSV model.

Some files were not shown because too many files have changed in this diff Show More