Compare commits

..

4 Commits

Author SHA1 Message Date
Andoni A. b48b381624 feat(image): add private registry authentication support
Add registry_username, registry_password, and registry_token params
to ImageProvider following the IaC provider pattern (explicit params
with env var fallback). Credentials are injected as environment
variables into Trivy subprocess calls.
2026-02-06 08:18:05 +01:00
Andoni A. 9d5e3f4758 fix(image): replace sys.exit calls with exceptions, fix mutable defaults, add tests
- Create exceptions module (codes 9000-9005) following OCI provider pattern
- Replace all sys.exit(1) calls with typed exceptions
- Fix mutable default arguments in ImageProvider and CheckReportImage
- Add return type hints to all properties and methods
- Add ResourceGroup field to metadata dict
- Add test suite with 23 test cases covering initialization, finding
  processing, scan execution, error handling, and connection testing
- Update CHANGELOG with container image provider entry
2026-02-05 20:37:29 +01:00
Andoni A. beb74a6459 Merge remote-tracking branch 'origin/master' into image-scan-poc 2026-02-05 19:20:34 +01:00
Andoni A. f42de0d21b feat(image): add container image provider POC
Add initial proof of concept for a container image security scanning
provider that uses Trivy for vulnerability detection in container images.
2026-01-29 08:41:11 +01:00
569 changed files with 5589 additions and 35533 deletions
+1 -20
View File
@@ -48,26 +48,6 @@ POSTGRES_DB=prowler_db
# POSTGRES_REPLICA_MAX_ATTEMPTS=3
# POSTGRES_REPLICA_RETRY_BASE_DELAY=0.5
# Neo4j auth
NEO4J_HOST=neo4j
NEO4J_PORT=7687
NEO4J_USER=neo4j
NEO4J_PASSWORD=neo4j_password
# Neo4j settings
NEO4J_DBMS_MAX__DATABASES=1000
NEO4J_SERVER_MEMORY_PAGECACHE_SIZE=1G
NEO4J_SERVER_MEMORY_HEAP_INITIAL__SIZE=1G
NEO4J_SERVER_MEMORY_HEAP_MAX__SIZE=1G
NEO4J_POC_EXPORT_FILE_ENABLED=true
NEO4J_APOC_IMPORT_FILE_ENABLED=true
NEO4J_APOC_IMPORT_FILE_USE_NEO4J_CONFIG=true
NEO4J_PLUGINS=["apoc"]
NEO4J_DBMS_SECURITY_PROCEDURES_ALLOWLIST=apoc.*
NEO4J_DBMS_SECURITY_PROCEDURES_UNRESTRICTED=apoc.*
NEO4J_DBMS_CONNECTOR_BOLT_LISTEN_ADDRESS=0.0.0.0:7687
# Neo4j Prowler settings
ATTACK_PATHS_FINDINGS_BATCH_SIZE=1000
# Celery-Prowler task settings
TASK_RETRY_DELAY_SECONDS=0.1
TASK_RETRY_ATTEMPTS=5
@@ -137,6 +117,7 @@ SENTRY_ENVIRONMENT=local
SENTRY_RELEASE=local
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT}
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.16.0
@@ -29,7 +29,7 @@ runs:
run: |
BRANCH_NAME="${GITHUB_HEAD_REF:-${GITHUB_REF_NAME}}"
echo "Using branch: $BRANCH_NAME"
sed -i "s|\(git+https://github.com/prowler-cloud/prowler[^@]*\)@master|\1@$BRANCH_NAME|g" pyproject.toml
sed -i "s|@master|@$BRANCH_NAME|g" pyproject.toml
- name: Install poetry
shell: bash
+2 -13
View File
@@ -46,17 +46,12 @@ provider/oci:
- changed-files:
- any-glob-to-any-file: "prowler/providers/oraclecloud/**"
- any-glob-to-any-file: "tests/providers/oraclecloud/**"
provider/alibabacloud:
- changed-files:
- any-glob-to-any-file: "prowler/providers/alibabacloud/**"
- any-glob-to-any-file: "tests/providers/alibabacloud/**"
provider/cloudflare:
- changed-files:
- any-glob-to-any-file: "prowler/providers/cloudflare/**"
- any-glob-to-any-file: "tests/providers/cloudflare/**"
github_actions:
- changed-files:
- any-glob-to-any-file: ".github/workflows/*"
@@ -72,21 +67,15 @@ mutelist:
- any-glob-to-any-file: "prowler/providers/azure/lib/mutelist/**"
- any-glob-to-any-file: "prowler/providers/gcp/lib/mutelist/**"
- any-glob-to-any-file: "prowler/providers/kubernetes/lib/mutelist/**"
- any-glob-to-any-file: "prowler/providers/m365/lib/mutelist/**"
- any-glob-to-any-file: "prowler/providers/mongodbatlas/lib/mutelist/**"
- any-glob-to-any-file: "prowler/providers/oraclecloud/lib/mutelist/**"
- any-glob-to-any-file: "prowler/providers/alibabacloud/lib/mutelist/**"
- any-glob-to-any-file: "prowler/providers/cloudflare/lib/mutelist/**"
- any-glob-to-any-file: "tests/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/aws/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/azure/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/gcp/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/kubernetes/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/m365/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/mongodbatlas/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/oraclecloud/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/oci/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/alibabacloud/lib/mutelist/**"
- any-glob-to-any-file: "tests/providers/cloudflare/lib/mutelist/**"
integration/s3:
- changed-files:
-1
View File
@@ -46,7 +46,6 @@ jobs:
api/docs/**
api/README.md
api/CHANGELOG.md
api/AGENTS.md
- name: Setup Python with Poetry
if: steps.check-changes.outputs.any_changed == 'true'
@@ -74,7 +74,6 @@ jobs:
api/docs/**
api/README.md
api/CHANGELOG.md
api/AGENTS.md
- name: Set up Docker Buildx
if: steps.check-changes.outputs.any_changed == 'true'
+1 -3
View File
@@ -46,7 +46,6 @@ jobs:
api/docs/**
api/README.md
api/CHANGELOG.md
api/AGENTS.md
- name: Setup Python with Poetry
if: steps.check-changes.outputs.any_changed == 'true'
@@ -61,8 +60,7 @@ jobs:
- name: Safety
if: steps.check-changes.outputs.any_changed == 'true'
run: poetry run safety check --ignore 79023,79027
# TODO: 79023 & 79027 knack ReDoS until `azure-cli-core` (via `cartography`) allows `knack` >=0.13.0
run: poetry run safety check
- name: Vulture
if: steps.check-changes.outputs.any_changed == 'true'
-1
View File
@@ -86,7 +86,6 @@ jobs:
api/docs/**
api/README.md
api/CHANGELOG.md
api/AGENTS.md
- name: Setup Python with Poetry
if: steps.check-changes.outputs.any_changed == 'true'
+1 -19
View File
@@ -42,16 +42,14 @@ jobs:
ui/**
prowler/**
mcp_server/**
poetry.lock
pyproject.toml
- name: Check for folder changes and changelog presence
id: check-folders
run: |
missing_changelogs=""
# Check api folder
if [[ "${{ steps.changed-files.outputs.any_changed }}" == "true" ]]; then
# Check monitored folders
for folder in $MONITORED_FOLDERS; do
# Get files changed in this folder
changed_in_folder=$(echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr ' ' '\n' | grep "^${folder}/" || true)
@@ -66,22 +64,6 @@ jobs:
fi
fi
done
# Check root-level dependency files (poetry.lock, pyproject.toml)
# These are associated with the prowler folder changelog
root_deps_changed=$(echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr ' ' '\n' | grep -E "^(poetry\.lock|pyproject\.toml)$" || true)
if [ -n "$root_deps_changed" ]; then
echo "Detected changes in root dependency files: $root_deps_changed"
# Check if prowler/CHANGELOG.md was already updated (might have been caught above)
prowler_changelog_updated=$(echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr ' ' '\n' | grep "^prowler/CHANGELOG.md$" || true)
if [ -z "$prowler_changelog_updated" ]; then
# Only add if prowler wasn't already flagged
if ! echo "$missing_changelogs" | grep -q "prowler"; then
echo "No changelog update found for root dependency changes"
missing_changelogs="${missing_changelogs}- \`prowler\` (root dependency files changed)"$'\n'
fi
fi
fi
fi
{
+1 -3
View File
@@ -47,7 +47,6 @@ jobs:
ui/**
dashboard/**
mcp_server/**
skills/**
README.md
mkdocs.yml
.backportrc.json
@@ -56,7 +55,6 @@ jobs:
examples/**
.gitignore
contrib/**
**/AGENTS.md
- name: Install Poetry
if: steps.check-changes.outputs.any_changed == 'true'
@@ -85,7 +83,7 @@ jobs:
- name: Check format with black
if: steps.check-changes.outputs.any_changed == 'true'
run: poetry run black --exclude "api|ui|skills" --check .
run: poetry run black --exclude api ui skills --check .
- name: Lint with pylint
if: steps.check-changes.outputs.any_changed == 'true'
@@ -78,7 +78,6 @@ jobs:
ui/**
dashboard/**
mcp_server/**
skills/**
README.md
mkdocs.yml
.backportrc.json
@@ -87,7 +86,6 @@ jobs:
examples/**
.gitignore
contrib/**
**/AGENTS.md
- name: Set up Docker Buildx
if: steps.check-changes.outputs.any_changed == 'true'
-2
View File
@@ -42,7 +42,6 @@ jobs:
ui/**
dashboard/**
mcp_server/**
skills/**
README.md
mkdocs.yml
.backportrc.json
@@ -51,7 +50,6 @@ jobs:
examples/**
.gitignore
contrib/**
**/AGENTS.md
- name: Install Poetry
if: steps.check-changes.outputs.any_changed == 'true'
-2
View File
@@ -47,7 +47,6 @@ jobs:
ui/**
dashboard/**
mcp_server/**
skills/**
README.md
mkdocs.yml
.backportrc.json
@@ -56,7 +55,6 @@ jobs:
examples/**
.gitignore
contrib/**
**/AGENTS.md
- name: Install Poetry
if: steps.check-changes.outputs.any_changed == 'true'
@@ -73,7 +73,6 @@ jobs:
files_ignore: |
ui/CHANGELOG.md
ui/README.md
ui/AGENTS.md
- name: Set up Docker Buildx
if: steps.check-changes.outputs.any_changed == 'true'
+6 -10
View File
@@ -116,7 +116,7 @@ jobs:
- name: Setup Node.js environment
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
with:
node-version: '24.13.0'
node-version: '20.x'
- name: Setup pnpm
uses: pnpm/action-setup@v4
with:
@@ -125,20 +125,16 @@ jobs:
- name: Get pnpm store directory
shell: bash
run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
- name: Setup pnpm and Next.js cache
- name: Setup pnpm cache
uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1
with:
path: |
${{ env.STORE_PATH }}
./ui/node_modules
./ui/.next/cache
key: ${{ runner.os }}-pnpm-nextjs-${{ hashFiles('ui/pnpm-lock.yaml') }}-${{ hashFiles('ui/**/*.ts', 'ui/**/*.tsx', 'ui/**/*.js', 'ui/**/*.jsx') }}
path: ${{ env.STORE_PATH }}
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('ui/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-nextjs-${{ hashFiles('ui/pnpm-lock.yaml') }}-
${{ runner.os }}-pnpm-nextjs-
${{ runner.os }}-pnpm-store-
- name: Install UI dependencies
working-directory: ./ui
run: pnpm install --frozen-lockfile --prefer-offline
run: pnpm install --frozen-lockfile
- name: Build UI application
working-directory: ./ui
run: pnpm run build
+6 -11
View File
@@ -16,7 +16,7 @@ concurrency:
env:
UI_WORKING_DIR: ./ui
NODE_VERSION: '24.13.0'
NODE_VERSION: '20.x'
jobs:
ui-tests:
@@ -42,7 +42,6 @@ jobs:
files_ignore: |
ui/CHANGELOG.md
ui/README.md
ui/AGENTS.md
- name: Setup Node.js ${{ env.NODE_VERSION }}
if: steps.check-changes.outputs.any_changed == 'true'
@@ -62,22 +61,18 @@ jobs:
shell: bash
run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
- name: Setup pnpm and Next.js cache
- name: Setup pnpm cache
if: steps.check-changes.outputs.any_changed == 'true'
uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1
with:
path: |
${{ env.STORE_PATH }}
${{ env.UI_WORKING_DIR }}/node_modules
${{ env.UI_WORKING_DIR }}/.next/cache
key: ${{ runner.os }}-pnpm-nextjs-${{ hashFiles('ui/pnpm-lock.yaml') }}-${{ hashFiles('ui/**/*.ts', 'ui/**/*.tsx', 'ui/**/*.js', 'ui/**/*.jsx') }}
path: ${{ env.STORE_PATH }}
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('ui/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-nextjs-${{ hashFiles('ui/pnpm-lock.yaml') }}-
${{ runner.os }}-pnpm-nextjs-
${{ runner.os }}-pnpm-store-
- name: Install dependencies
if: steps.check-changes.outputs.any_changed == 'true'
run: pnpm install --frozen-lockfile --prefer-offline
run: pnpm install --frozen-lockfile
- name: Run healthcheck
if: steps.check-changes.outputs.any_changed == 'true'
+1 -3
View File
@@ -150,10 +150,8 @@ node_modules
# Persistent data
_data/
# AI Instructions (generated by skills/setup.sh from AGENTS.md)
# Claude
CLAUDE.md
GEMINI.md
.github/copilot-instructions.md
# Compliance report
*.pdf
+2 -3
View File
@@ -42,7 +42,7 @@ repos:
"--remove-unused-variable",
]
- repo: https://github.com/pycqa/isort
- repo: https://github.com/timothycrosley/isort
rev: 5.13.2
hooks:
- id: isort
@@ -120,8 +120,7 @@ repos:
name: safety
description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities"
# TODO: Botocore needs urllib3 1.X so we need to ignore these vulnerabilities 77744,77745. Remove this once we upgrade to urllib3 2.X
# TODO: 79023 & 79027 knack ReDoS until `azure-cli-core` (via `cartography`) allows `knack` >=0.13.0
entry: bash -c 'safety check --ignore 70612,66963,74429,76352,76353,77744,77745,79023,79027'
entry: bash -c 'safety check --ignore 70612,66963,74429,76352,76353,77744,77745'
language: system
- id: vulture
-57
View File
@@ -36,68 +36,11 @@ Use these skills for detailed patterns on-demand:
| `prowler-test-api` | API testing (pytest-django + RLS) | [SKILL.md](skills/prowler-test-api/SKILL.md) |
| `prowler-test-ui` | E2E testing (Playwright) | [SKILL.md](skills/prowler-test-ui/SKILL.md) |
| `prowler-compliance` | Compliance framework structure | [SKILL.md](skills/prowler-compliance/SKILL.md) |
| `prowler-compliance-review` | Review compliance framework PRs | [SKILL.md](skills/prowler-compliance-review/SKILL.md) |
| `prowler-provider` | Add new cloud providers | [SKILL.md](skills/prowler-provider/SKILL.md) |
| `prowler-changelog` | Changelog entries (keepachangelog.com) | [SKILL.md](skills/prowler-changelog/SKILL.md) |
| `prowler-ci` | CI checks and PR gates (GitHub Actions) | [SKILL.md](skills/prowler-ci/SKILL.md) |
| `prowler-pr` | Pull request conventions | [SKILL.md](skills/prowler-pr/SKILL.md) |
| `prowler-docs` | Documentation style guide | [SKILL.md](skills/prowler-docs/SKILL.md) |
| `skill-creator` | Create new AI agent skills | [SKILL.md](skills/skill-creator/SKILL.md) |
### Auto-invoke Skills
When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Action | Skill |
|--------|-------|
| Add changelog entry for a PR or feature | `prowler-changelog` |
| Adding new providers | `prowler-provider` |
| Adding services to existing providers | `prowler-provider` |
| After creating/modifying a skill | `skill-sync` |
| App Router / Server Actions | `nextjs-15` |
| Building AI chat features | `ai-sdk-5` |
| Create PR that requires changelog entry | `prowler-changelog` |
| Create a PR with gh pr create | `prowler-pr` |
| Creating Zod schemas | `zod-4` |
| Creating new checks | `prowler-sdk-check` |
| Creating new skills | `skill-creator` |
| Creating/modifying Prowler UI components | `prowler-ui` |
| Creating/modifying models, views, serializers | `prowler-api` |
| Creating/updating compliance frameworks | `prowler-compliance` |
| Debug why a GitHub Actions job is failing | `prowler-ci` |
| Fill .github/pull_request_template.md (Context/Description/Steps to review/Checklist) | `prowler-pr` |
| General Prowler development questions | `prowler` |
| Generic DRF patterns | `django-drf` |
| Inspect PR CI checks and gates (.github/workflows/*) | `prowler-ci` |
| Inspect PR CI workflows (.github/workflows/*): conventional-commit, pr-check-changelog, pr-conflict-checker, labeler | `prowler-pr` |
| Mapping checks to compliance controls | `prowler-compliance` |
| Mocking AWS with moto in tests | `prowler-test-sdk` |
| Regenerate AGENTS.md Auto-invoke tables (sync.sh) | `skill-sync` |
| Review PR requirements: template, title conventions, changelog gate | `prowler-pr` |
| Review changelog format and conventions | `prowler-changelog` |
| Reviewing compliance framework PRs | `prowler-compliance-review` |
| Testing RLS tenant isolation | `prowler-test-api` |
| Troubleshoot why a skill is missing from AGENTS.md auto-invoke | `skill-sync` |
| Understand CODEOWNERS/labeler-based automation | `prowler-ci` |
| Understand PR title conventional-commit validation | `prowler-ci` |
| Understand changelog gate and no-changelog label behavior | `prowler-ci` |
| Understand review ownership with CODEOWNERS | `prowler-pr` |
| Update CHANGELOG.md in any component | `prowler-changelog` |
| Updating existing checks and metadata | `prowler-sdk-check` |
| Using Zustand stores | `zustand-5` |
| Working on MCP server tools | `prowler-mcp` |
| Working on Prowler UI structure (actions/adapters/types/hooks) | `prowler-ui` |
| Working with Prowler UI test helpers/pages | `prowler-test-ui` |
| Working with Tailwind classes | `tailwind-4` |
| Writing Playwright E2E tests | `playwright` |
| Writing Prowler API tests | `prowler-test-api` |
| Writing Prowler SDK tests | `prowler-test-sdk` |
| Writing Prowler UI E2E tests | `prowler-test-ui` |
| Writing Python tests with pytest | `pytest` |
| Writing React components | `react-19` |
| Writing TypeScript types/interfaces | `typescript` |
| Writing documentation | `prowler-docs` |
---
## Project Overview
-17
View File
@@ -80,23 +80,6 @@ prowler dashboard
```
![Prowler Dashboard](docs/images/products/dashboard.png)
## Attack Paths
Attack Paths automatically extends every completed AWS scan with a Neo4j graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan and therefore requires:
- An accessible Neo4j instance (the Docker Compose files already ships a `neo4j` service).
- The following environment variables so Django and Celery can connect:
| Variable | Description | Default |
| --- | --- | --- |
| `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
| `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
| `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
Every AWS provider scan will enqueue an Attack Paths ingestion job automatically. Other cloud providers will be added in future iterations.
# Prowler at a Glance
> [!Tip]
> For the most accurate and up-to-date information about checks, services, frameworks, and categories, visit [**Prowler Hub**](https://hub.prowler.com).
-18
View File
@@ -6,24 +6,6 @@
> - [`django-drf`](../skills/django-drf/SKILL.md) - Generic DRF patterns
> - [`pytest`](../skills/pytest/SKILL.md) - Generic pytest patterns
### Auto-invoke Skills
When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Action | Skill |
|--------|-------|
| Add changelog entry for a PR or feature | `prowler-changelog` |
| Create PR that requires changelog entry | `prowler-changelog` |
| Creating/modifying models, views, serializers | `prowler-api` |
| Generic DRF patterns | `django-drf` |
| Review changelog format and conventions | `prowler-changelog` |
| Testing RLS tenant isolation | `prowler-test-api` |
| Update CHANGELOG.md in any component | `prowler-changelog` |
| Writing Prowler API tests | `prowler-test-api` |
| Writing Python tests with pytest | `pytest` |
---
## CRITICAL RULES - NON-NEGOTIABLE
### Models
+8 -82
View File
@@ -2,49 +2,28 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.18.1] (Prowler v5.17.1)
### Fixed
- Improve API startup process by `manage.py` argument detection [(#9856)](https://github.com/prowler-cloud/prowler/pull/9856)
- Deleting providers don't try to delete a `None` Neo4j database when an Attack Paths scan is scheduled [(#9858)](https://github.com/prowler-cloud/prowler/pull/9858)
- Use replica database for reading Findings to add them to the Attack Paths graph [(#9861)](https://github.com/prowler-cloud/prowler/pull/9861)
- Attack paths findings loading query to use streaming generator for O(batch_size) memory instead of O(total_findings) [(#9862)](https://github.com/prowler-cloud/prowler/pull/9862)
- Lazy load Neo4j driver [(#9868)](https://github.com/prowler-cloud/prowler/pull/9868)
- Use `Findings.all_objects` to avoid the `ActiveProviderPartitionedManager` [(#9869)](https://github.com/prowler-cloud/prowler/pull/9869)
- Lazy load Neo4j driver for workers only [(#9872)](https://github.com/prowler-cloud/prowler/pull/9872)
- Improve Cypher query for inserting Findings into Attack Paths scan graphs [(#9874)](https://github.com/prowler-cloud/prowler/pull/9874)
- Clear Neo4j database cache after Attack Paths scan and each API query [(#9877)](https://github.com/prowler-cloud/prowler/pull/9877)
- Deduplicated scheduled scans for long-running providers [(#9829)](https://github.com/prowler-cloud/prowler/pull/9829)
## [1.18.0] (Prowler v5.17.0)
## [1.18.0] (Prowler UNRELEASED)
### Added
- `/api/v1/overviews/compliance-watchlist` endpoint to retrieve the compliance watchlist [(#9596)](https://github.com/prowler-cloud/prowler/pull/9596)
- AlibabaCloud provider support [(#9485)](https://github.com/prowler-cloud/prowler/pull/9485)
- `/api/v1/overviews/resource-groups` endpoint to retrieve an overview of resource groups based on finding severities [(#9694)](https://github.com/prowler-cloud/prowler/pull/9694)
- `group` filter for `GET /findings` and `GET /findings/metadata/latest` endpoints [(#9694)](https://github.com/prowler-cloud/prowler/pull/9694)
- `/api/v1/overviews/compliance-watchlist` to retrieve the compliance watchlist [(#9596)](https://github.com/prowler-cloud/prowler/pull/9596)
- Support AlibabaCloud provider [(#9485)](https://github.com/prowler-cloud/prowler/pull/9485)
- `provider_id` and `provider_id__in` filter aliases for findings endpoints to enable consistent frontend parameter naming [(#9701)](https://github.com/prowler-cloud/prowler/pull/9701)
- Attack Paths: `/api/v1/attack-paths-scans` for AWS providers backed by Neo4j [(#9805)](https://github.com/prowler-cloud/prowler/pull/9805)
---
## [1.17.2] (Prowler v5.16.2)
### Security
- Django 5.1.15 (CVE-2025-64460, CVE-2025-13372), Werkzeug 3.1.4 (CVE-2025-66221), sqlparse 0.5.5 (PVE-2025-82038), fonttools 4.60.2 (CVE-2025-66034) [(#9730)](https://github.com/prowler-cloud/prowler/pull/9730)
- `safety` to `3.7.0` and `filelock` to `3.20.3` due to [Safety vulnerability 82754 (CVE-2025-68146)](https://data.safetycli.com/v/82754/97c/) [(#9816)](https://github.com/prowler-cloud/prowler/pull/9816)
- `pyasn1` to v0.6.2 to address [CVE-2026-23490](https://nvd.nist.gov/vuln/detail/CVE-2026-23490) [(#9818)](https://github.com/prowler-cloud/prowler/pull/9818)
- `django-allauth[saml]` to v65.13.0 to address [CVE-2025-65431](https://nvd.nist.gov/vuln/detail/CVE-2025-65431) [(#9575)](https://github.com/prowler-cloud/prowler/pull/9575)
- Updated dependencies to patch security vulnerabilities: Django 5.1.15 (CVE-2025-64460, CVE-2025-13372), Werkzeug 3.1.4 (CVE-2025-66221), sqlparse 0.5.5 (PVE-2025-82038), fonttools 4.60.2 (CVE-2025-66034) [(#9730)](https://github.com/prowler-cloud/prowler/pull/9730)
---
## [1.17.1] (Prowler v5.16.1)
### Changed
- Security Hub integration error when no regions [(#9635)](https://github.com/prowler-cloud/prowler/pull/9635)
### Fixed
- Orphan scheduled scans caused by transaction isolation during provider creation [(#9633)](https://github.com/prowler-cloud/prowler/pull/9633)
---
@@ -52,19 +31,16 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.17.0] (Prowler v5.16.0)
### Added
- New endpoint to retrieve and overview of the categories based on finding severities [(#9529)](https://github.com/prowler-cloud/prowler/pull/9529)
- Endpoints `GET /findings` and `GET /findings/latests` can now use the category filter [(#9529)](https://github.com/prowler-cloud/prowler/pull/9529)
- Account id, alias and provider name to PDF reporting table [(#9574)](https://github.com/prowler-cloud/prowler/pull/9574)
### Changed
- Endpoint `GET /overviews/attack-surfaces` no longer returns the related check IDs [(#9529)](https://github.com/prowler-cloud/prowler/pull/9529)
- OpenAI provider to only load chat-compatible models with tool calling support [(#9523)](https://github.com/prowler-cloud/prowler/pull/9523)
- Increased execution delay for the first scheduled scan tasks to 5 seconds[(#9558)](https://github.com/prowler-cloud/prowler/pull/9558)
### Fixed
- Made `scan_id` a required filter in the compliance overview endpoint [(#9560)](https://github.com/prowler-cloud/prowler/pull/9560)
- Reduced unnecessary UPDATE resources operations by only saving when tag mappings change, lowering write load during scans [(#9569)](https://github.com/prowler-cloud/prowler/pull/9569)
@@ -73,13 +49,11 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.16.1] (Prowler v5.15.1)
### Fixed
- Race condition in scheduled scan creation by adding countdown to task [(#9516)](https://github.com/prowler-cloud/prowler/pull/9516)
## [1.16.0] (Prowler v5.15.0)
### Added
- New endpoint to retrieve an overview of the attack surfaces [(#9309)](https://github.com/prowler-cloud/prowler/pull/9309)
- New endpoint `GET /api/v1/overviews/findings_severity/timeseries` to retrieve daily aggregated findings by severity level [(#9363)](https://github.com/prowler-cloud/prowler/pull/9363)
- Lighthouse AI support for Amazon Bedrock API key [(#9343)](https://github.com/prowler-cloud/prowler/pull/9343)
@@ -87,7 +61,6 @@ All notable changes to the **Prowler API** are documented in this file.
- Support to use admin credentials through the read replica database [(#9440)](https://github.com/prowler-cloud/prowler/pull/9440)
### Changed
- Error messages from Lighthouse celery tasks [(#9165)](https://github.com/prowler-cloud/prowler/pull/9165)
- Restore the compliance overview endpoint's mandatory filters [(#9338)](https://github.com/prowler-cloud/prowler/pull/9338)
@@ -96,7 +69,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.15.2] (Prowler v5.14.2)
### Fixed
- Unique constraint violation during compliance overviews task [(#9436)](https://github.com/prowler-cloud/prowler/pull/9436)
- Division by zero error in ENS PDF report when all requirements are manual [(#9443)](https://github.com/prowler-cloud/prowler/pull/9443)
@@ -105,7 +77,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.15.1] (Prowler v5.14.1)
### Fixed
- Fix typo in PDF reporting [(#9345)](https://github.com/prowler-cloud/prowler/pull/9345)
- Fix IaC provider initialization failure when mutelist processor is configured [(#9331)](https://github.com/prowler-cloud/prowler/pull/9331)
- Match logic for ThreatScore when counting findings [(#9348)](https://github.com/prowler-cloud/prowler/pull/9348)
@@ -115,7 +86,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.15.0] (Prowler v5.14.0)
### Added
- IaC (Infrastructure as Code) provider support for remote repositories [(#8751)](https://github.com/prowler-cloud/prowler/pull/8751)
- Extend `GET /api/v1/providers` with provider-type filters and optional pagination disable to support the new Overview filters [(#8975)](https://github.com/prowler-cloud/prowler/pull/8975)
- New endpoint to retrieve the number of providers grouped by provider type [(#8975)](https://github.com/prowler-cloud/prowler/pull/8975)
@@ -135,12 +105,10 @@ All notable changes to the **Prowler API** are documented in this file.
- New endpoint `GET /api/v1/overview/regions` to retrieve aggregated findings data by region [(#9273)](https://github.com/prowler-cloud/prowler/pull/9273)
### Changed
- Optimized database write queries for scan related tasks [(#9190)](https://github.com/prowler-cloud/prowler/pull/9190)
- Date filters are now optional for `GET /api/v1/overviews/services` endpoint; returns latest scan data by default [(#9248)](https://github.com/prowler-cloud/prowler/pull/9248)
### Fixed
- Scans no longer fail when findings have UIDs exceeding 300 characters; such findings are now skipped with detailed logging [(#9246)](https://github.com/prowler-cloud/prowler/pull/9246)
- Updated unique constraint for `Provider` model to exclude soft-deleted entries, resolving duplicate errors when re-deleting providers [(#9054)](https://github.com/prowler-cloud/prowler/pull/9054)
- Removed compliance generation for providers without compliance frameworks [(#9208)](https://github.com/prowler-cloud/prowler/pull/9208)
@@ -149,7 +117,6 @@ All notable changes to the **Prowler API** are documented in this file.
- Fixed discrepancy between ThreatScore PDF report values and database calculations [(#9296)](https://github.com/prowler-cloud/prowler/pull/9296)
### Security
- Django updated to the latest 5.1 security release, 5.1.14, due to problems with potential [SQL injection](https://github.com/prowler-cloud/prowler/security/dependabot/113) and [denial-of-service vulnerability](https://github.com/prowler-cloud/prowler/security/dependabot/114) [(#9176)](https://github.com/prowler-cloud/prowler/pull/9176)
---
@@ -157,7 +124,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.14.1] (Prowler v5.13.1)
### Fixed
- `/api/v1/overviews/providers` collapses data by provider type so the UI receives a single aggregated record per cloud family even when multiple accounts exist [(#9053)](https://github.com/prowler-cloud/prowler/pull/9053)
- Added retry logic to database transactions to handle Aurora read replica connection failures during scale-down events [(#9064)](https://github.com/prowler-cloud/prowler/pull/9064)
- Security Hub integrations stop failing when they read relationships via the replica by allowing replica relations and saving updates through the primary [(#9080)](https://github.com/prowler-cloud/prowler/pull/9080)
@@ -167,7 +133,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.14.0] (Prowler v5.13.0)
### Added
- Default JWT keys are generated and stored if they are missing from configuration [(#8655)](https://github.com/prowler-cloud/prowler/pull/8655)
- `compliance_name` for each compliance [(#7920)](https://github.com/prowler-cloud/prowler/pull/7920)
- Support C5 compliance framework for the AWS provider [(#8830)](https://github.com/prowler-cloud/prowler/pull/8830)
@@ -181,12 +146,10 @@ All notable changes to the **Prowler API** are documented in this file.
- Add `provider_id__in` filter support to findings and findings severity overview endpoints [(#8951)](https://github.com/prowler-cloud/prowler/pull/8951)
### Changed
- Now the MANAGE_ACCOUNT permission is required to modify or read user permissions instead of MANAGE_USERS [(#8281)](https://github.com/prowler-cloud/prowler/pull/8281)
- Now at least one user with MANAGE_ACCOUNT permission is required in the tenant [(#8729)](https://github.com/prowler-cloud/prowler/pull/8729)
### Security
- Django updated to the latest 5.1 security release, 5.1.13, due to problems with potential [SQL injection](https://github.com/prowler-cloud/prowler/security/dependabot/104) and [directory traversals](https://github.com/prowler-cloud/prowler/security/dependabot/103) [(#8842)](https://github.com/prowler-cloud/prowler/pull/8842)
---
@@ -194,7 +157,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.13.2] (Prowler v5.12.3)
### Fixed
- 500 error when deleting user [(#8731)](https://github.com/prowler-cloud/prowler/pull/8731)
---
@@ -202,11 +164,9 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.13.1] (Prowler v5.12.2)
### Changed
- Renamed compliance overview task queue to `compliance` [(#8755)](https://github.com/prowler-cloud/prowler/pull/8755)
### Security
- Django updated to the latest 5.1 security release, 5.1.12, due to [problems](https://www.djangoproject.com/weblog/2025/sep/03/security-releases/) with potential SQL injection in FilteredRelation column aliases [(#8693)](https://github.com/prowler-cloud/prowler/pull/8693)
---
@@ -214,7 +174,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.13.0] (Prowler v5.12.0)
### Added
- Integration with JIRA, enabling sending findings to a JIRA project [(#8622)](https://github.com/prowler-cloud/prowler/pull/8622), [(#8637)](https://github.com/prowler-cloud/prowler/pull/8637)
- `GET /overviews/findings_severity` now supports `filter[status]` and `filter[status__in]` to aggregate by specific statuses (`FAIL`, `PASS`)[(#8186)](https://github.com/prowler-cloud/prowler/pull/8186)
- Throttling options for `/api/v1/tokens` using the `DJANGO_THROTTLE_TOKEN_OBTAIN` environment variable [(#8647)](https://github.com/prowler-cloud/prowler/pull/8647)
@@ -224,13 +183,11 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.12.0] (Prowler v5.11.0)
### Added
- Lighthouse support for OpenAI GPT-5 [(#8527)](https://github.com/prowler-cloud/prowler/pull/8527)
- Integration with Amazon Security Hub, enabling sending findings to Security Hub [(#8365)](https://github.com/prowler-cloud/prowler/pull/8365)
- Generate ASFF output for AWS providers with SecurityHub integration enabled [(#8569)](https://github.com/prowler-cloud/prowler/pull/8569)
### Fixed
- GitHub provider always scans user instead of organization when using provider UID [(#8587)](https://github.com/prowler-cloud/prowler/pull/8587)
---
@@ -238,12 +195,10 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.11.0] (Prowler v5.10.0)
### Added
- Github provider support [(#8271)](https://github.com/prowler-cloud/prowler/pull/8271)
- Integration with Amazon S3, enabling storage and retrieval of scan data via S3 buckets [(#8056)](https://github.com/prowler-cloud/prowler/pull/8056)
### Fixed
- Avoid sending errors to Sentry in M365 provider when user authentication fails [(#8420)](https://github.com/prowler-cloud/prowler/pull/8420)
---
@@ -251,7 +206,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [1.10.2] (Prowler v5.9.2)
### Changed
- Optimized queries for resources views [(#8336)](https://github.com/prowler-cloud/prowler/pull/8336)
---
@@ -259,7 +213,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.10.1] (Prowler v5.9.1)
### Fixed
- Calculate failed findings during scans to prevent heavy database queries [(#8322)](https://github.com/prowler-cloud/prowler/pull/8322)
---
@@ -267,28 +220,23 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.10.0] (Prowler v5.9.0)
### Added
- SSO with SAML support [(#8175)](https://github.com/prowler-cloud/prowler/pull/8175)
- `GET /resources/metadata`, `GET /resources/metadata/latest` and `GET /resources/latest` to expose resource metadata and latest scan results [(#8112)](https://github.com/prowler-cloud/prowler/pull/8112)
### Changed
- `/processors` endpoints to post-process findings. Currently, only the Mutelist processor is supported to allow to mute findings.
- Optimized the underlying queries for resources endpoints [(#8112)](https://github.com/prowler-cloud/prowler/pull/8112)
- Optimized include parameters for resources view [(#8229)](https://github.com/prowler-cloud/prowler/pull/8229)
- Optimized overview background tasks [(#8300)](https://github.com/prowler-cloud/prowler/pull/8300)
### Fixed
- Search filter for findings and resources [(#8112)](https://github.com/prowler-cloud/prowler/pull/8112)
- RBAC is now applied to `GET /overviews/providers` [(#8277)](https://github.com/prowler-cloud/prowler/pull/8277)
### Changed
- `POST /schedules/daily` returns a `409 CONFLICT` if already created [(#8258)](https://github.com/prowler-cloud/prowler/pull/8258)
### Security
- Enhanced password validation to enforce 12+ character passwords with special characters, uppercase, lowercase, and numbers [(#8225)](https://github.com/prowler-cloud/prowler/pull/8225)
---
@@ -296,20 +244,16 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.9.1] (Prowler v5.8.1)
### Added
- Custom exception for provider connection errors during scans [(#8234)](https://github.com/prowler-cloud/prowler/pull/8234)
### Changed
- Summary and overview tasks now use a dedicated queue and no longer propagate errors to compliance tasks [(#8214)](https://github.com/prowler-cloud/prowler/pull/8214)
### Fixed
- Scan with no resources will not trigger legacy code for findings metadata [(#8183)](https://github.com/prowler-cloud/prowler/pull/8183)
- Invitation email comparison case-insensitive [(#8206)](https://github.com/prowler-cloud/prowler/pull/8206)
### Removed
- Validation of the provider's secret type during updates [(#8197)](https://github.com/prowler-cloud/prowler/pull/8197)
---
@@ -317,18 +261,15 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.9.0] (Prowler v5.8.0)
### Added
- Support GCP Service Account key [(#7824)](https://github.com/prowler-cloud/prowler/pull/7824)
- `GET /compliance-overviews` endpoints to retrieve compliance metadata and specific requirements statuses [(#7877)](https://github.com/prowler-cloud/prowler/pull/7877)
- Lighthouse configuration support [(#7848)](https://github.com/prowler-cloud/prowler/pull/7848)
### Changed
- Reworked `GET /compliance-overviews` to return proper requirement metrics [(#7877)](https://github.com/prowler-cloud/prowler/pull/7877)
- Optional `user` and `password` for M365 provider [(#7992)](https://github.com/prowler-cloud/prowler/pull/7992)
### Fixed
- Scheduled scans are no longer deleted when their daily schedule run is disabled [(#8082)](https://github.com/prowler-cloud/prowler/pull/8082)
---
@@ -336,7 +277,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.8.5] (Prowler v5.7.5)
### Fixed
- Normalize provider UID to ensure safe and unique export directory paths [(#8007)](https://github.com/prowler-cloud/prowler/pull/8007).
- Blank resource types in `/metadata` endpoints [(#8027)](https://github.com/prowler-cloud/prowler/pull/8027)
@@ -345,7 +285,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.8.4] (Prowler v5.7.4)
### Removed
- Reverted RLS transaction handling and DB custom backend [(#7994)](https://github.com/prowler-cloud/prowler/pull/7994)
---
@@ -353,15 +292,12 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.8.3] (Prowler v5.7.3)
### Added
- Database backend to handle already closed connections [(#7935)](https://github.com/prowler-cloud/prowler/pull/7935)
### Changed
- Renamed field encrypted_password to password for M365 provider [(#7784)](https://github.com/prowler-cloud/prowler/pull/7784)
### Fixed
- Transaction persistence with RLS operations [(#7916)](https://github.com/prowler-cloud/prowler/pull/7916)
- Reverted the change `get_with_retry` to use the original `get` method for retrieving tasks [(#7932)](https://github.com/prowler-cloud/prowler/pull/7932)
@@ -370,7 +306,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.8.2] (Prowler v5.7.2)
### Fixed
- Task lookup to use task_kwargs instead of task_args for scan report resolution [(#7830)](https://github.com/prowler-cloud/prowler/pull/7830)
- Kubernetes UID validation to allow valid context names [(#7871)](https://github.com/prowler-cloud/prowler/pull/7871)
- Connection status verification before launching a scan [(#7831)](https://github.com/prowler-cloud/prowler/pull/7831)
@@ -382,7 +317,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.8.1] (Prowler v5.7.1)
### Fixed
- Added database index to improve performance on finding lookup [(#7800)](https://github.com/prowler-cloud/prowler/pull/7800)
---
@@ -390,7 +324,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.8.0] (Prowler v5.7.0)
### Added
- Huge improvements to `/findings/metadata` and resource related filters for findings [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690)
- Improvements to `/overviews` endpoints [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690)
- Queue to perform backfill background tasks [(#7690)](https://github.com/prowler-cloud/prowler/pull/7690)
@@ -424,7 +357,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.5.4] (Prowler v5.4.4)
### Fixed
- Bug with periodic tasks when trying to delete a provider [(#7466)](https://github.com/prowler-cloud/prowler/pull/7466)
---
@@ -432,7 +364,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.5.3] (Prowler v5.4.3)
### Fixed
- Duplicated scheduled scans handling [(#7401)](https://github.com/prowler-cloud/prowler/pull/7401)
- Environment variable to configure the deletion task batch size [(#7423)](https://github.com/prowler-cloud/prowler/pull/7423)
@@ -441,7 +372,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.5.2] (Prowler v5.4.2)
### Changed
- Refactored deletion logic and implemented retry mechanism for deletion tasks [(#7349)](https://github.com/prowler-cloud/prowler/pull/7349)
---
@@ -449,7 +379,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.5.1] (Prowler v5.4.1)
### Fixed
- Handle response in case local files are missing [(#7183)](https://github.com/prowler-cloud/prowler/pull/7183)
- Race condition when deleting export files after the S3 upload [(#7172)](https://github.com/prowler-cloud/prowler/pull/7172)
- Handle exception when a provider has no secret in test connection [(#7283)](https://github.com/prowler-cloud/prowler/pull/7283)
@@ -459,13 +388,11 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.5.0] (Prowler v5.4.0)
### Added
- Social login integration with Google and GitHub [(#6906)](https://github.com/prowler-cloud/prowler/pull/6906)
- API scan report system, now all scans launched from the API will generate a compressed file with the report in OCSF, CSV and HTML formats [(#6878)](https://github.com/prowler-cloud/prowler/pull/6878)
- Configurable Sentry integration [(#6874)](https://github.com/prowler-cloud/prowler/pull/6874)
### Changed
- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019)
---
@@ -473,7 +400,6 @@ All notable changes to the **Prowler API** are documented in this file.
## [v1.4.0] (Prowler v5.3.0)
### Changed
- Daily scheduled scan instances are now created beforehand with `SCHEDULED` state [(#6700)](https://github.com/prowler-cloud/prowler/pull/6700)
- Findings endpoints now require at least one date filter [(#6800)](https://github.com/prowler-cloud/prowler/pull/6800)
- Findings metadata endpoint received a performance improvement [(#6863)](https://github.com/prowler-cloud/prowler/pull/6863)
+1 -1
View File
@@ -32,7 +32,7 @@ start_prod_server() {
start_worker() {
echo "Starting the worker..."
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans,scan-reports,deletion,backfill,overview,integrations,compliance,attack-paths-scans -E --max-tasks-per-child 1
poetry run python -m celery -A config.celery worker -l "${DJANGO_LOGGING_LEVEL:-info}" -Q celery,scans,scan-reports,deletion,backfill,overview,integrations,compliance -E --max-tasks-per-child 1
}
start_worker_beat() {
+366 -1832
View File
File diff suppressed because it is too large Load Diff
+5 -8
View File
@@ -8,7 +8,7 @@ dependencies = [
"celery[pytest] (>=5.4.0,<6.0.0)",
"dj-rest-auth[with_social,jwt] (==7.0.1)",
"django (==5.1.15)",
"django-allauth[saml] (>=65.13.0,<66.0.0)",
"django-allauth[saml] (>=65.8.0,<66.0.0)",
"django-celery-beat (>=2.7.0,<3.0.0)",
"django-celery-results (>=2.5.1,<3.0.0)",
"django-cors-headers==4.4.0",
@@ -24,7 +24,7 @@ dependencies = [
"drf-spectacular-jsonapi==0.5.1",
"gunicorn==23.0.0",
"lxml==5.3.2",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.17",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (>=1.0.1,<2.0.0)",
"sentry-sdk[django] (>=2.20.0,<3.0.0)",
@@ -36,8 +36,6 @@ dependencies = [
"drf-simple-apikey (==2.2.1)",
"matplotlib (>=3.10.6,<4.0.0)",
"reportlab (>=4.4.4,<5.0.0)",
"neo4j (<6.0.0)",
"cartography @ git+https://github.com/prowler-cloud/cartography@master",
"gevent (>=25.9.1,<26.0.0)",
"werkzeug (>=3.1.4)",
"sqlparse (>=0.5.4)",
@@ -49,7 +47,7 @@ name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
version = "1.18.2"
version = "1.18.0"
[project.scripts]
celery = "src.backend.config.settings.celery"
@@ -70,7 +68,6 @@ pytest-env = "1.1.3"
pytest-randomly = "3.15.0"
pytest-xdist = "3.6.1"
ruff = "0.5.0"
safety = "3.7.0"
filelock = "3.20.3"
vulture = "2.14"
safety = "3.2.9"
tqdm = "4.67.1"
vulture = "2.14"
+3 -38
View File
@@ -30,50 +30,15 @@ class ApiConfig(AppConfig):
def ready(self):
from api import schema_extensions # noqa: F401
from api import signals # noqa: F401
from api.attack_paths import database as graph_database
from api.compliance import load_prowler_compliance
# Generate required cryptographic keys if not present, but only if:
# `"manage.py" not in sys.argv[0]`: If an external server (e.g., Gunicorn) is running the app
# `"manage.py" not in sys.argv`: If an external server (e.g., Gunicorn) is running the app
# `os.environ.get("RUN_MAIN")`: If it's not a Django command or using `runserver`,
# only the main process will do it
if (len(sys.argv) >= 1 and "manage.py" not in sys.argv[0]) or os.environ.get(
"RUN_MAIN"
):
if "manage.py" not in sys.argv or os.environ.get("RUN_MAIN"):
self._ensure_crypto_keys()
# Commands that don't need Neo4j
SKIP_NEO4J_DJANGO_COMMANDS = [
"makemigrations",
"migrate",
"pgpartition",
"check",
"help",
"showmigrations",
"check_and_fix_socialaccount_sites_migration",
]
# Skip Neo4j initialization during tests, some Django commands, and Celery
if getattr(settings, "TESTING", False) or (
len(sys.argv) > 1
and (
(
"manage.py" in sys.argv[0]
and sys.argv[1] in SKIP_NEO4J_DJANGO_COMMANDS
)
or "celery" in sys.argv[0]
)
):
logger.info(
"Skipping Neo4j initialization because tests, some Django commands or Celery"
)
else:
graph_database.init_driver()
# Neo4j driver is initialized at API startup (see api.attack_paths.database)
# It remains lazy for Celery workers and selected Django commands
load_prowler_compliance()
def _ensure_crypto_keys(self):
@@ -89,7 +54,7 @@ class ApiConfig(AppConfig):
global _keys_initialized
# Skip key generation if running tests
if getattr(settings, "TESTING", False):
if hasattr(settings, "TESTING") and settings.TESTING:
return
# Skip if already initialized in this process
@@ -1,13 +0,0 @@
from api.attack_paths.query_definitions import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
get_queries_for_provider,
get_query_by_id,
)
__all__ = [
"AttackPathsQueryDefinition",
"AttackPathsQueryParameterDefinition",
"get_queries_for_provider",
"get_query_by_id",
]
@@ -1,161 +0,0 @@
import atexit
import logging
import threading
from contextlib import contextmanager
from typing import Iterator
from uuid import UUID
import neo4j
import neo4j.exceptions
from django.conf import settings
from api.attack_paths.retryable_session import RetryableSession
# Without this Celery goes crazy with Neo4j logging
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j").propagate = False
SERVICE_UNAVAILABLE_MAX_RETRIES = 3
# Module-level process-wide driver singleton
_driver: neo4j.Driver | None = None
_lock = threading.Lock()
# Base Neo4j functions
def get_uri() -> str:
host = settings.DATABASES["neo4j"]["HOST"]
port = settings.DATABASES["neo4j"]["PORT"]
return f"bolt://{host}:{port}"
def init_driver() -> neo4j.Driver:
global _driver
if _driver is not None:
return _driver
with _lock:
if _driver is None:
uri = get_uri()
config = settings.DATABASES["neo4j"]
_driver = neo4j.GraphDatabase.driver(
uri,
auth=(config["USER"], config["PASSWORD"]),
keep_alive=True,
max_connection_lifetime=7200,
connection_acquisition_timeout=120,
max_connection_pool_size=50,
)
_driver.verify_connectivity()
# Register cleanup handler (only runs once since we're inside the _driver is None block)
atexit.register(close_driver)
return _driver
def get_driver() -> neo4j.Driver:
return init_driver()
def close_driver() -> None: # TODO: Use it
global _driver
with _lock:
if _driver is not None:
try:
_driver.close()
finally:
_driver = None
@contextmanager
def get_session(database: str | None = None) -> Iterator[RetryableSession]:
session_wrapper: RetryableSession | None = None
try:
session_wrapper = RetryableSession(
session_factory=lambda: get_driver().session(database=database),
max_retries=SERVICE_UNAVAILABLE_MAX_RETRIES,
)
yield session_wrapper
except neo4j.exceptions.Neo4jError as exc:
raise GraphDatabaseQueryException(message=exc.message, code=exc.code)
finally:
if session_wrapper is not None:
session_wrapper.close()
def create_database(database: str) -> None:
query = "CREATE DATABASE $database IF NOT EXISTS"
parameters = {"database": database}
with get_session() as session:
session.run(query, parameters)
def drop_database(database: str) -> None:
query = f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA"
with get_session() as session:
session.run(query)
def drop_subgraph(database: str, root_node_label: str, root_node_id: str) -> int:
query = """
MATCH (a:__ROOT_NODE_LABEL__ {id: $root_node_id})
CALL apoc.path.subgraphNodes(a, {})
YIELD node
DETACH DELETE node
RETURN COUNT(node) AS deleted_nodes_count
""".replace("__ROOT_NODE_LABEL__", root_node_label)
parameters = {"root_node_id": root_node_id}
with get_session(database) as session:
result = session.run(query, parameters)
try:
return result.single()["deleted_nodes_count"]
except neo4j.exceptions.ResultConsumedError:
return 0 # As there are no nodes to delete, the result is empty
def clear_cache(database: str) -> None:
query = "CALL db.clearQueryCaches()"
try:
with get_session(database) as session:
session.run(query)
except GraphDatabaseQueryException as exc:
logging.warning(f"Failed to clear query cache for database `{database}`: {exc}")
# Neo4j functions related to Prowler + Cartography
DATABASE_NAME_TEMPLATE = "db-{attack_paths_scan_id}"
def get_database_name(attack_paths_scan_id: UUID) -> str:
attack_paths_scan_id_str = str(attack_paths_scan_id).lower()
return DATABASE_NAME_TEMPLATE.format(attack_paths_scan_id=attack_paths_scan_id_str)
# Exceptions
class GraphDatabaseQueryException(Exception):
def __init__(self, message: str, code: str | None = None) -> None:
super().__init__(message)
self.message = message
self.code = code
def __str__(self) -> str:
if self.code:
return f"{self.code}: {self.message}"
return self.message
@@ -1,514 +0,0 @@
from dataclasses import dataclass, field
# Dataclases for handling API's Attack Path query definitions and their parameters
@dataclass
class AttackPathsQueryParameterDefinition:
"""
Metadata describing a parameter that must be provided to an Attack Paths query.
"""
name: str
label: str
data_type: str = "string"
cast: type = str
description: str | None = None
placeholder: str | None = None
@dataclass
class AttackPathsQueryDefinition:
"""
Immutable representation of an Attack Path query.
"""
id: str
name: str
description: str
provider: str
cypher: str
parameters: list[AttackPathsQueryParameterDefinition] = field(default_factory=list)
# Accessor functions for API's Attack Paths query definitions
def get_queries_for_provider(provider: str) -> list[AttackPathsQueryDefinition]:
return _QUERY_DEFINITIONS.get(provider, [])
def get_query_by_id(query_id: str) -> AttackPathsQueryDefinition | None:
return _QUERIES_BY_ID.get(query_id)
# API's Attack Paths query definitions
_QUERY_DEFINITIONS: dict[str, list[AttackPathsQueryDefinition]] = {
"aws": [
# Custom query for detecting internet-exposed EC2 instances with sensitive S3 access
AttackPathsQueryDefinition(
id="aws-internet-exposed-ec2-sensitive-s3-access",
name="Identify internet-exposed EC2 instances with sensitive S3 access",
description="Detect EC2 instances with SSH exposed to the internet that can assume higher-privileged roles to read tagged sensitive S3 buckets despite bucket-level public access blocks.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path_s3 = (aws:AWSAccount {id: $provider_uid})--(s3:S3Bucket)--(t:AWSTag)
WHERE toLower(t.key) = toLower($tag_key) AND toLower(t.value) = toLower($tag_value)
MATCH path_ec2 = (aws)--(ec2:EC2Instance)--(sg:EC2SecurityGroup)--(ipi:IpPermissionInbound)
WHERE ec2.exposed_internet = true
AND ipi.toport = 22
MATCH path_role = (r:AWSRole)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE ANY(x IN stmt.resource WHERE x CONTAINS s3.name)
AND ANY(x IN stmt.action WHERE toLower(x) =~ 's3:(listbucket|getobject).*')
MATCH path_assume_role = (ec2)-[p:STS_ASSUMEROLE_ALLOW*1..9]-(r:AWSRole)
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, ec2)
YIELD rel AS can_access
UNWIND nodes(path_s3) + nodes(path_ec2) + nodes(path_role) + nodes(path_assume_role) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path_s3, path_ec2, path_role, path_assume_role, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[
AttackPathsQueryParameterDefinition(
name="tag_key",
label="Tag key",
description="Tag key to filter the S3 bucket, e.g. DataClassification.",
placeholder="DataClassification",
),
AttackPathsQueryParameterDefinition(
name="tag_value",
label="Tag value",
description="Tag value to filter the S3 bucket, e.g. Sensitive.",
placeholder="Sensitive",
),
],
),
# Regular Cartography Attack Paths queries
AttackPathsQueryDefinition(
id="aws-rds-instances",
name="Identify provisioned RDS instances",
description="List the selected AWS account alongside the RDS instances it owns.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(rds:RDSInstance)
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-rds-unencrypted-storage",
name="Identify RDS instances without storage encryption",
description="Find RDS instances with storage encryption disabled within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(rds:RDSInstance)
WHERE rds.storage_encrypted = false
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-s3-anonymous-access-buckets",
name="Identify S3 buckets with anonymous access",
description="Find S3 buckets that allow anonymous access within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(s3:S3Bucket)
WHERE s3.anonymous_access = true
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-iam-statements-allow-all-actions",
name="Identify IAM statements that allow all actions",
description="Find IAM policy statements that allow all actions via '*' within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(x IN stmt.action WHERE x = '*')
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-iam-statements-allow-delete-policy",
name="Identify IAM statements that allow iam:DeletePolicy",
description="Find IAM policy statements that allow the iam:DeletePolicy action within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = 'Allow'
AND any(x IN stmt.action WHERE x = "iam:DeletePolicy")
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-iam-statements-allow-create-actions",
name="Identify IAM statements that allow create actions",
description="Find IAM policy statements that allow actions containing 'create' within the selected account.",
provider="aws",
cypher="""
MATCH path = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)--(pol:AWSPolicy)--(stmt:AWSPolicyStatement)
WHERE stmt.effect = "Allow"
AND any(x IN stmt.action WHERE toLower(x) CONTAINS "create")
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-ec2-instances-internet-exposed",
name="Identify internet-exposed EC2 instances",
description="Find EC2 instances flagged as exposed to the internet within the selected account.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path = (aws:AWSAccount {id: $provider_uid})--(ec2:EC2Instance)
WHERE ec2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, ec2)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-security-groups-open-internet-facing",
name="Identify internet-facing resources with open security groups",
description="Find internet-facing resources associated with security groups that allow inbound access from '0.0.0.0/0'.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path_open = (aws:AWSAccount {id: $provider_uid})-[r0]-(open)
MATCH path_sg = (open)-[r1:MEMBER_OF_EC2_SECURITY_GROUP]-(sg:EC2SecurityGroup)
MATCH path_ip = (sg)-[r2:MEMBER_OF_EC2_SECURITY_GROUP]-(ipi:IpPermissionInbound)
MATCH path_ipi = (ipi)-[r3]-(ir:IpRange)
WHERE ir.range = "0.0.0.0/0"
OPTIONAL MATCH path_dns = (dns:AWSDNSRecord)-[:DNS_POINTS_TO]->(lb)
WHERE open.scheme = 'internet-facing'
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, open)
YIELD rel AS can_access
UNWIND nodes(path_open) + nodes(path_sg) + nodes(path_ip) + nodes(path_ipi) + nodes(path_dns) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path_open, path_sg, path_ip, path_ipi, path_dns, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-classic-elb-internet-exposed",
name="Identify internet-exposed Classic Load Balancers",
description="Find Classic Load Balancers exposed to the internet along with their listeners.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path = (aws:AWSAccount {id: $provider_uid})--(elb:LoadBalancer)--(listener:ELBListener)
WHERE elb.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, elb)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-elbv2-internet-exposed",
name="Identify internet-exposed ELBv2 load balancers",
description="Find ELBv2 load balancers exposed to the internet along with their listeners.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
MATCH path = (aws:AWSAccount {id: $provider_uid})--(elbv2:LoadBalancerV2)--(listener:ELBV2Listener)
WHERE elbv2.exposed_internet = true
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, elbv2)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-public-ip-resource-lookup",
name="Identify resources by public IP address",
description="Given a public IP address, find the related AWS resource and its adjacent node within the selected account.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['Internet'], {id: 'Internet', name: 'Internet'})
YIELD node AS internet
CALL () {
MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:EC2PrivateIp)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:EC2Instance)-[q]-(y)
WHERE x.publicipaddress = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:NetworkInterface)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
UNION MATCH path = (aws:AWSAccount {id: $provider_uid})-[r]-(x:ElasticIPAddress)-[q]-(y)
WHERE x.public_ip = $ip
RETURN path, x
}
WITH path, x, internet
CALL apoc.create.vRelationship(internet, 'CAN_ACCESS', {}, x)
YIELD rel AS can_access
UNWIND nodes(path) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path, collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr, internet, can_access
""",
parameters=[
AttackPathsQueryParameterDefinition(
name="ip",
label="IP address",
description="Public IP address, e.g. 192.0.2.0.",
placeholder="192.0.2.0",
),
],
),
# Privilege Escalation Queries (based on pathfinding.cloud research): https://github.com/DataDog/pathfinding.cloud
AttackPathsQueryDefinition(
id="aws-iam-privesc-passrole-ec2",
name="Privilege Escalation: iam:PassRole + ec2:RunInstances",
description="Detect principals who can launch EC2 instances with privileged IAM roles attached. This allows gaining the permissions of the passed role by accessing the EC2 instance metadata service. This is a new-passrole escalation path (pathfinding.cloud: ec2-001).",
provider="aws",
cypher="""
// Create a single shared virtual EC2 instance node
CALL apoc.create.vNode(['EC2Instance'], {
id: 'potential-ec2-passrole',
name: 'New EC2 Instance',
description: 'Attacker-controlled EC2 with privileged role'
})
YIELD node AS ec2_node
// Create a single shared virtual escalation outcome node (styled like a finding)
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator-passrole-ec2',
check_title: 'Privilege Escalation',
name: 'Effective Administrator',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS escalation_outcome
WITH ec2_node, escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Find statements granting iam:PassRole
MATCH path_passrole = (principal)--(passrole_policy:AWSPolicy)--(stmt_passrole:AWSPolicyStatement)
WHERE stmt_passrole.effect = 'Allow'
AND any(action IN stmt_passrole.action WHERE
toLower(action) = 'iam:passrole'
OR toLower(action) = 'iam:*'
OR action = '*'
)
// Find statements granting ec2:RunInstances
MATCH path_ec2 = (principal)--(ec2_policy:AWSPolicy)--(stmt_ec2:AWSPolicyStatement)
WHERE stmt_ec2.effect = 'Allow'
AND any(action IN stmt_ec2.action WHERE
toLower(action) = 'ec2:runinstances'
OR toLower(action) = 'ec2:*'
OR action = '*'
)
// Find roles that trust EC2 service (can be passed to EC2)
MATCH path_target = (aws)--(target_role:AWSRole)
WHERE target_role.arn CONTAINS $provider_uid
// Check if principal can pass this role
AND any(resource IN stmt_passrole.resource WHERE
resource = '*'
OR target_role.arn CONTAINS resource
OR resource CONTAINS target_role.name
)
// Check if target role has elevated permissions (optional, for severity assessment)
OPTIONAL MATCH (target_role)--(role_policy:AWSPolicy)--(role_stmt:AWSPolicyStatement)
WHERE role_stmt.effect = 'Allow'
AND (
any(action IN role_stmt.action WHERE action = '*')
OR any(action IN role_stmt.action WHERE toLower(action) = 'iam:*')
)
CALL apoc.create.vRelationship(principal, 'CAN_LAUNCH', {
via: 'ec2:RunInstances + iam:PassRole'
}, ec2_node)
YIELD rel AS launch_rel
CALL apoc.create.vRelationship(ec2_node, 'ASSUMES_ROLE', {}, target_role)
YIELD rel AS assumes_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {
reference: 'https://pathfinding.cloud/paths/ec2-001'
}, escalation_outcome)
YIELD rel AS grants_rel
UNWIND nodes(path_principal) + nodes(path_passrole) + nodes(path_ec2) + nodes(path_target) as n
OPTIONAL MATCH (n)-[pfr]-(pf:ProwlerFinding)
WHERE pf.status = 'FAIL'
RETURN path_principal, path_passrole, path_ec2, path_target,
ec2_node, escalation_outcome, launch_rel, assumes_rel, grants_rel,
collect(DISTINCT pf) as dpf, collect(DISTINCT pfr) as dpfr
""",
parameters=[],
),
AttackPathsQueryDefinition(
id="aws-glue-privesc-passrole-dev-endpoint",
name="Privilege Escalation: Glue Dev Endpoint with PassRole",
description="Detect principals that can escalate privileges by passing a role to a Glue development endpoint. The attacker creates a dev endpoint with an arbitrary role attached, then accesses those credentials through the endpoint.",
provider="aws",
cypher="""
CALL apoc.create.vNode(['PrivilegeEscalation'], {
id: 'effective-administrator-glue',
check_title: 'Privilege Escalation',
name: 'Effective Administrator (Glue)',
status: 'FAIL',
severity: 'critical'
})
YIELD node AS escalation_outcome
WITH escalation_outcome
// Find principals in the account
MATCH path_principal = (aws:AWSAccount {id: $provider_uid})--(principal:AWSPrincipal)
// Principal can assume roles (up to 2 hops)
OPTIONAL MATCH path_assume = (principal)-[:STS_ASSUMEROLE_ALLOW*0..2]->(acting_as:AWSRole)
WITH escalation_outcome, principal, path_principal, path_assume,
CASE WHEN path_assume IS NULL THEN principal ELSE acting_as END AS effective_principal
// Find iam:PassRole permission
MATCH path_passrole = (effective_principal)--(passrole_policy:AWSPolicy)--(passrole_stmt:AWSPolicyStatement)
WHERE passrole_stmt.effect = 'Allow'
AND any(action IN passrole_stmt.action WHERE toLower(action) = 'iam:passrole' OR action = '*')
// Find Glue CreateDevEndpoint permission
MATCH (effective_principal)--(glue_policy:AWSPolicy)--(glue_stmt:AWSPolicyStatement)
WHERE glue_stmt.effect = 'Allow'
AND any(action IN glue_stmt.action WHERE toLower(action) = 'glue:createdevendpoint' OR action = '*' OR toLower(action) = 'glue:*')
// Find target role with elevated permissions
MATCH (aws)--(target_role:AWSRole)--(target_policy:AWSPolicy)--(target_stmt:AWSPolicyStatement)
WHERE target_stmt.effect = 'Allow'
AND (
any(action IN target_stmt.action WHERE action = '*')
OR any(action IN target_stmt.action WHERE toLower(action) = 'iam:*')
)
// Deduplicate before creating virtual nodes
WITH DISTINCT escalation_outcome, aws, principal, effective_principal, target_role
// Create virtual Glue endpoint node (one per unique principal->target pair)
CALL apoc.create.vNode(['GlueDevEndpoint'], {
name: 'New Dev Endpoint',
description: 'Glue endpoint with target role attached',
id: effective_principal.arn + '->' + target_role.arn
})
YIELD node AS glue_endpoint
CALL apoc.create.vRelationship(effective_principal, 'CREATES_ENDPOINT', {
permissions: ['iam:PassRole', 'glue:CreateDevEndpoint'],
technique: 'new-passrole'
}, glue_endpoint)
YIELD rel AS create_rel
CALL apoc.create.vRelationship(glue_endpoint, 'RUNS_AS', {}, target_role)
YIELD rel AS runs_rel
CALL apoc.create.vRelationship(target_role, 'GRANTS_ACCESS', {
reference: 'https://pathfinding.cloud/paths/glue-001'
}, escalation_outcome)
YIELD rel AS grants_rel
// Re-match paths for visualization
MATCH path_principal = (aws)--(principal)
MATCH path_target = (aws)--(target_role)
RETURN path_principal, path_target,
glue_endpoint, escalation_outcome, create_rel, runs_rel, grants_rel
""",
parameters=[],
),
],
}
_QUERIES_BY_ID: dict[str, AttackPathsQueryDefinition] = {
definition.id: definition
for definitions in _QUERY_DEFINITIONS.values()
for definition in definitions
}
@@ -1,92 +0,0 @@
import logging
from collections.abc import Callable
from typing import Any
import neo4j
import neo4j.exceptions
logger = logging.getLogger(__name__)
class RetryableSession:
"""
Wrapper around `neo4j.Session` that retries `neo4j.exceptions.ServiceUnavailable` errors.
"""
def __init__(
self,
session_factory: Callable[[], neo4j.Session],
max_retries: int,
) -> None:
self._session_factory = session_factory
self._max_retries = max(0, max_retries)
self._session = self._session_factory()
def close(self) -> None:
if self._session is not None:
self._session.close()
self._session = None
def __enter__(self) -> "RetryableSession":
return self
def __exit__(
self, _: Any, __: Any, ___: Any
) -> None: # Unused args: exc_type, exc, exc_tb
self.close()
def run(self, *args: Any, **kwargs: Any) -> Any:
return self._call_with_retry("run", *args, **kwargs)
def write_transaction(self, *args: Any, **kwargs: Any) -> Any:
return self._call_with_retry("write_transaction", *args, **kwargs)
def read_transaction(self, *args: Any, **kwargs: Any) -> Any:
return self._call_with_retry("read_transaction", *args, **kwargs)
def execute_write(self, *args: Any, **kwargs: Any) -> Any:
return self._call_with_retry("execute_write", *args, **kwargs)
def execute_read(self, *args: Any, **kwargs: Any) -> Any:
return self._call_with_retry("execute_read", *args, **kwargs)
def __getattr__(self, item: str) -> Any:
return getattr(self._session, item)
def _call_with_retry(self, method_name: str, *args: Any, **kwargs: Any) -> Any:
attempt = 0
last_exc: Exception | None = None
while attempt <= self._max_retries:
try:
method = getattr(self._session, method_name)
return method(*args, **kwargs)
except (
BrokenPipeError,
ConnectionResetError,
neo4j.exceptions.ServiceUnavailable,
) as exc: # pragma: no cover - depends on infra
last_exc = exc
attempt += 1
if attempt > self._max_retries:
raise
logger.warning(
f"Neo4j session {method_name} failed with {type(exc).__name__} ({attempt}/{self._max_retries} attempts). Retrying..."
)
self._refresh_session()
raise last_exc if last_exc else RuntimeError("Unexpected retry loop exit")
def _refresh_session(self) -> None:
if self._session is not None:
try:
self._session.close()
except Exception:
# Best-effort close; failures just mean we open a new session below
pass
self._session = self._session_factory()
@@ -1,143 +0,0 @@
import logging
from typing import Any
from rest_framework.exceptions import APIException, ValidationError
from api.attack_paths import database as graph_database, AttackPathsQueryDefinition
from api.models import AttackPathsScan
from config.custom_logging import BackendLogger
logger = logging.getLogger(BackendLogger.API)
def normalize_run_payload(raw_data):
if not isinstance(raw_data, dict): # Let the serializer handle this
return raw_data
if "data" in raw_data and isinstance(raw_data.get("data"), dict):
data_section = raw_data.get("data") or {}
attributes = data_section.get("attributes") or {}
payload = {
"id": attributes.get("id", data_section.get("id")),
"parameters": attributes.get("parameters"),
}
# Remove `None` parameters to allow defaults downstream
if payload.get("parameters") is None:
payload.pop("parameters")
return payload
return raw_data
def prepare_query_parameters(
definition: AttackPathsQueryDefinition,
provided_parameters: dict[str, Any],
provider_uid: str,
) -> dict[str, Any]:
parameters = dict(provided_parameters or {})
expected_names = {parameter.name for parameter in definition.parameters}
provided_names = set(parameters.keys())
unexpected = provided_names - expected_names
if unexpected:
raise ValidationError(
{"parameters": f"Unknown parameter(s): {', '.join(sorted(unexpected))}"}
)
missing = expected_names - provided_names
if missing:
raise ValidationError(
{
"parameters": f"Missing required parameter(s): {', '.join(sorted(missing))}"
}
)
clean_parameters = {
"provider_uid": str(provider_uid),
}
for definition_parameter in definition.parameters:
raw_value = provided_parameters[definition_parameter.name]
try:
casted_value = definition_parameter.cast(raw_value)
except (ValueError, TypeError) as exc:
raise ValidationError(
{
"parameters": (
f"Invalid value for parameter `{definition_parameter.name}`: {str(exc)}"
)
}
)
clean_parameters[definition_parameter.name] = casted_value
return clean_parameters
def execute_attack_paths_query(
attack_paths_scan: AttackPathsScan,
definition: AttackPathsQueryDefinition,
parameters: dict[str, Any],
) -> dict[str, Any]:
try:
with graph_database.get_session(attack_paths_scan.graph_database) as session:
result = session.run(definition.cypher, parameters)
return _serialize_graph(result.graph())
except graph_database.GraphDatabaseQueryException as exc:
logger.error(f"Query failed for Attack Paths query `{definition.id}`: {exc}")
raise APIException(
"Attack Paths query execution failed due to a database error"
)
def _serialize_graph(graph):
nodes = []
for node in graph.nodes:
nodes.append(
{
"id": node.element_id,
"labels": list(node.labels),
"properties": _serialize_properties(node._properties),
},
)
relationships = []
for relationship in graph.relationships:
relationships.append(
{
"id": relationship.element_id,
"label": relationship.type,
"source": relationship.start_node.element_id,
"target": relationship.end_node.element_id,
"properties": _serialize_properties(relationship._properties),
},
)
return {
"nodes": nodes,
"relationships": relationships,
}
def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]:
"""Convert Neo4j property values into JSON-serializable primitives."""
def _serialize_value(value: Any) -> Any:
# Neo4j temporal and spatial values expose `to_native` returning Python primitives
if hasattr(value, "to_native") and callable(value.to_native):
return _serialize_value(value.to_native())
if isinstance(value, (list, tuple)):
return [_serialize_value(item) for item in value]
if isinstance(value, dict):
return {key: _serialize_value(val) for key, val in value.items()}
return value
return {key: _serialize_value(val) for key, val in properties.items()}
-52
View File
@@ -29,7 +29,6 @@ from api.models import (
Finding,
Integration,
Invitation,
AttackPathsScan,
LighthouseProviderConfiguration,
LighthouseProviderModels,
Membership,
@@ -46,7 +45,6 @@ from api.models import (
Role,
Scan,
ScanCategorySummary,
ScanGroupSummary,
ScanSummary,
SeverityChoices,
StateChoices,
@@ -216,9 +214,6 @@ class CommonFindingFilters(FilterSet):
category = CharFilter(method="filter_category")
category__in = CharInFilter(field_name="categories", lookup_expr="overlap")
resource_groups = CharFilter(field_name="resource_groups", lookup_expr="exact")
resource_groups__in = CharInFilter(field_name="resource_groups", lookup_expr="in")
# Temporarily disabled until we implement tag filtering in the UI
# resource_tag_key = CharFilter(field_name="resources__tags__key")
# resource_tag_key__in = CharInFilter(
@@ -397,23 +392,6 @@ class ScanFilter(ProviderRelationshipFilterSet):
}
class AttackPathsScanFilter(ProviderRelationshipFilterSet):
inserted_at = DateFilter(field_name="inserted_at", lookup_expr="date")
completed_at = DateFilter(field_name="completed_at", lookup_expr="date")
started_at = DateFilter(field_name="started_at", lookup_expr="date")
state = ChoiceFilter(choices=StateChoices.choices)
state__in = ChoiceInFilter(
field_name="state", choices=StateChoices.choices, lookup_expr="in"
)
class Meta:
model = AttackPathsScan
fields = {
"provider": ["exact", "in"],
"scan": ["exact", "in"],
}
class TaskFilter(FilterSet):
name = CharFilter(field_name="task_runner_task__task_name", lookup_expr="exact")
name__icontains = CharFilter(
@@ -461,8 +439,6 @@ class ResourceFilter(ProviderRelationshipFilterSet):
updated_at = DateFilter(field_name="updated_at", lookup_expr="date")
scan = UUIDFilter(field_name="provider__scan", lookup_expr="exact")
scan__in = UUIDInFilter(field_name="provider__scan", lookup_expr="in")
groups = CharFilter(method="filter_groups")
groups__in = CharInFilter(field_name="groups", lookup_expr="overlap")
class Meta:
model = Resource
@@ -477,9 +453,6 @@ class ResourceFilter(ProviderRelationshipFilterSet):
"updated_at": ["gte", "lte"],
}
def filter_groups(self, queryset, name, value):
return queryset.filter(groups__contains=[value])
def filter_queryset(self, queryset):
if not (self.data.get("scan") or self.data.get("scan__in")) and not (
self.data.get("updated_at")
@@ -544,8 +517,6 @@ class LatestResourceFilter(ProviderRelationshipFilterSet):
tag_value = CharFilter(method="filter_tag_value")
tag = CharFilter(method="filter_tag")
tags = CharFilter(method="filter_tag")
groups = CharFilter(method="filter_groups")
groups__in = CharInFilter(field_name="groups", lookup_expr="overlap")
class Meta:
model = Resource
@@ -558,9 +529,6 @@ class LatestResourceFilter(ProviderRelationshipFilterSet):
"type": ["exact", "icontains", "in"],
}
def filter_groups(self, queryset, name, value):
return queryset.filter(groups__contains=[value])
def filter_tag_key(self, queryset, name, value):
return queryset.filter(Q(tags__key=value) | Q(tags__key__icontains=value))
@@ -1186,26 +1154,6 @@ class CategoryOverviewFilter(BaseScanProviderFilter):
class Meta(BaseScanProviderFilter.Meta):
model = ScanCategorySummary
fields = {}
class ResourceGroupOverviewFilter(FilterSet):
provider_id = UUIDFilter(field_name="scan__provider__id", lookup_expr="exact")
provider_id__in = UUIDInFilter(field_name="scan__provider__id", lookup_expr="in")
provider_type = ChoiceFilter(
field_name="scan__provider__provider", choices=Provider.ProviderChoices.choices
)
provider_type__in = ChoiceInFilter(
field_name="scan__provider__provider",
choices=Provider.ProviderChoices.choices,
lookup_expr="in",
)
resource_group = CharFilter(field_name="resource_group", lookup_expr="exact")
resource_group__in = CharInFilter(field_name="resource_group", lookup_expr="in")
class Meta:
model = ScanGroupSummary
fields = {}
class ComplianceWatchlistFilter(BaseProviderFilter):
@@ -1,41 +0,0 @@
[
{
"model": "api.attackpathsscan",
"pk": "a7f0f6de-6f8e-4b3a-8cbe-3f6dd9012345",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"provider": "b85601a8-4b45-4194-8135-03fb980ef428",
"scan": "01920573-aa9c-73c9-bcda-f2e35c9b19d2",
"state": "completed",
"progress": 100,
"update_tag": 1693586667,
"graph_database": "db-a7f0f6de-6f8e-4b3a-8cbe-3f6dd9012345",
"is_graph_database_deleted": false,
"task": null,
"inserted_at": "2024-09-01T17:24:37Z",
"updated_at": "2024-09-01T17:44:37Z",
"started_at": "2024-09-01T17:34:37Z",
"completed_at": "2024-09-01T17:44:37Z",
"duration": 269,
"ingestion_exceptions": {}
}
},
{
"model": "api.attackpathsscan",
"pk": "4a2fb2af-8a60-4d7d-9cae-4ca65e098765",
"fields": {
"tenant": "12646005-9067-4d2a-a098-8bb378604362",
"provider": "15fce1fa-ecaa-433f-a9dc-62553f3a2555",
"scan": "01929f3b-ed2e-7623-ad63-7c37cd37828f",
"state": "executing",
"progress": 48,
"update_tag": 1697625000,
"graph_database": "db-4a2fb2af-8a60-4d7d-9cae-4ca65e098765",
"is_graph_database_deleted": false,
"task": null,
"inserted_at": "2024-10-18T10:55:57Z",
"updated_at": "2024-10-18T10:56:15Z",
"started_at": "2024-10-18T10:56:05Z"
}
}
]
@@ -1,126 +0,0 @@
import uuid
import django.db.models.deletion
from django.db import migrations, models
import api.db_utils
import api.rls
class Migration(migrations.Migration):
dependencies = [
("api", "0067_tenant_compliance_summary"),
]
operations = [
migrations.AddField(
model_name="finding",
name="resource_groups",
field=models.TextField(
blank=True,
help_text="Resource group from check metadata for efficient filtering",
null=True,
),
),
migrations.CreateModel(
name="ScanGroupSummary",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="api.tenant",
),
),
(
"inserted_at",
models.DateTimeField(auto_now_add=True),
),
(
"scan",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="resource_group_summaries",
related_query_name="resource_group_summary",
to="api.scan",
),
),
(
"resource_group",
models.CharField(max_length=50),
),
(
"severity",
api.db_utils.SeverityEnumField(
choices=[
("critical", "Critical"),
("high", "High"),
("medium", "Medium"),
("low", "Low"),
("informational", "Informational"),
],
),
),
(
"total_findings",
models.IntegerField(
default=0, help_text="Non-muted findings (PASS + FAIL)"
),
),
(
"failed_findings",
models.IntegerField(
default=0,
help_text="Non-muted FAIL findings (subset of total_findings)",
),
),
(
"new_failed_findings",
models.IntegerField(
default=0,
help_text="Non-muted FAIL with delta='new' (subset of failed_findings)",
),
),
(
"resources_count",
models.IntegerField(
default=0, help_text="Count of distinct resource_uid values"
),
),
],
options={
"db_table": "scan_resource_group_summaries",
"abstract": False,
},
),
migrations.AddIndex(
model_name="scangroupsummary",
index=models.Index(
fields=["tenant_id", "scan"], name="srgs_tenant_scan_idx"
),
),
migrations.AddConstraint(
model_name="scangroupsummary",
constraint=models.UniqueConstraint(
fields=("tenant_id", "scan_id", "resource_group", "severity"),
name="unique_resource_group_severity_per_scan",
),
),
migrations.AddConstraint(
model_name="scangroupsummary",
constraint=api.rls.RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_scangroupsummary",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
]
@@ -1,21 +0,0 @@
from django.contrib.postgres.fields import ArrayField
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("api", "0068_finding_resource_group_scangroupsummary"),
]
operations = [
migrations.AddField(
model_name="resource",
name="groups",
field=ArrayField(
models.CharField(max_length=100),
blank=True,
help_text="Groups for categorization (e.g., compute, storage, IAM)",
null=True,
),
),
]
@@ -1,154 +0,0 @@
# Generated by Django 5.1.13 on 2025-11-06 16:20
import django.db.models.deletion
from django.db import migrations, models
from uuid6 import uuid7
import api.rls
class Migration(migrations.Migration):
dependencies = [
("api", "0069_resource_resource_group"),
]
operations = [
migrations.CreateModel(
name="AttackPathsScan",
fields=[
(
"id",
models.UUIDField(
default=uuid7,
editable=False,
primary_key=True,
serialize=False,
),
),
("inserted_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"state",
api.db_utils.StateEnumField(
choices=[
("available", "Available"),
("scheduled", "Scheduled"),
("executing", "Executing"),
("completed", "Completed"),
("failed", "Failed"),
("cancelled", "Cancelled"),
],
default="available",
),
),
("progress", models.IntegerField(default=0)),
("started_at", models.DateTimeField(blank=True, null=True)),
("completed_at", models.DateTimeField(blank=True, null=True)),
(
"duration",
models.IntegerField(
blank=True, help_text="Duration in seconds", null=True
),
),
(
"update_tag",
models.BigIntegerField(
blank=True,
help_text="Cartography update tag (epoch)",
null=True,
),
),
(
"graph_database",
models.CharField(blank=True, max_length=63, null=True),
),
(
"is_graph_database_deleted",
models.BooleanField(default=False),
),
(
"ingestion_exceptions",
models.JSONField(blank=True, default=dict, null=True),
),
(
"provider",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="attack_paths_scans",
related_query_name="attack_paths_scan",
to="api.provider",
),
),
(
"scan",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="attack_paths_scans",
related_query_name="attack_paths_scan",
to="api.scan",
),
),
(
"task",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="attack_paths_scans",
related_query_name="attack_paths_scan",
to="api.task",
),
),
(
"tenant",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.tenant"
),
),
],
options={
"db_table": "attack_paths_scans",
"abstract": False,
"indexes": [
models.Index(
fields=["tenant_id", "provider_id", "-inserted_at"],
name="aps_prov_ins_desc_idx",
),
models.Index(
fields=["tenant_id", "state", "-inserted_at"],
name="aps_state_ins_desc_idx",
),
models.Index(
fields=["tenant_id", "scan_id"],
name="aps_scan_lookup_idx",
),
models.Index(
fields=["tenant_id", "provider_id"],
name="aps_active_graph_idx",
include=["graph_database", "id"],
condition=models.Q(("is_graph_database_deleted", False)),
),
models.Index(
fields=["tenant_id", "provider_id", "-completed_at"],
name="aps_completed_graph_idx",
include=["graph_database", "id"],
condition=models.Q(
("state", "completed"),
("is_graph_database_deleted", False),
),
),
],
},
),
migrations.AddConstraint(
model_name="attackpathsscan",
constraint=api.rls.RowLevelSecurityConstraint(
"tenant_id",
name="rls_on_attackpathsscan",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
),
]
-167
View File
@@ -626,101 +626,6 @@ class Scan(RowLevelSecurityProtectedModel):
resource_name = "scans"
class AttackPathsScan(RowLevelSecurityProtectedModel):
objects = ActiveProviderManager()
all_objects = models.Manager()
id = models.UUIDField(primary_key=True, default=uuid7, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
updated_at = models.DateTimeField(auto_now=True, editable=False)
state = StateEnumField(choices=StateChoices.choices, default=StateChoices.AVAILABLE)
progress = models.IntegerField(default=0)
# Timing
started_at = models.DateTimeField(null=True, blank=True)
completed_at = models.DateTimeField(null=True, blank=True)
duration = models.IntegerField(
null=True, blank=True, help_text="Duration in seconds"
)
# Relationship to the provider and optional prowler Scan and celery Task
provider = models.ForeignKey(
"Provider",
on_delete=models.CASCADE,
related_name="attack_paths_scans",
related_query_name="attack_paths_scan",
)
scan = models.ForeignKey(
"Scan",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="attack_paths_scans",
related_query_name="attack_paths_scan",
)
task = models.ForeignKey(
"Task",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="attack_paths_scans",
related_query_name="attack_paths_scan",
)
# Cartography specific metadata
update_tag = models.BigIntegerField(
null=True, blank=True, help_text="Cartography update tag (epoch)"
)
graph_database = models.CharField(max_length=63, null=True, blank=True)
is_graph_database_deleted = models.BooleanField(default=False)
ingestion_exceptions = models.JSONField(default=dict, null=True, blank=True)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "attack_paths_scans"
constraints = [
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
indexes = [
models.Index(
fields=["tenant_id", "provider_id", "-inserted_at"],
name="aps_prov_ins_desc_idx",
),
models.Index(
fields=["tenant_id", "state", "-inserted_at"],
name="aps_state_ins_desc_idx",
),
models.Index(
fields=["tenant_id", "scan_id"],
name="aps_scan_lookup_idx",
),
models.Index(
fields=["tenant_id", "provider_id"],
name="aps_active_graph_idx",
include=["graph_database", "id"],
condition=Q(is_graph_database_deleted=False),
),
models.Index(
fields=["tenant_id", "provider_id", "-completed_at"],
name="aps_completed_graph_idx",
include=["graph_database", "id"],
condition=Q(
state=StateChoices.COMPLETED,
is_graph_database_deleted=False,
),
),
]
class JSONAPIMeta:
resource_name = "attack-paths-scans"
class ResourceTag(RowLevelSecurityProtectedModel):
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
@@ -799,12 +704,6 @@ class Resource(RowLevelSecurityProtectedModel):
metadata = models.TextField(blank=True, null=True)
details = models.TextField(blank=True, null=True)
partition = models.TextField(blank=True, null=True)
groups = ArrayField(
models.CharField(max_length=100),
blank=True,
null=True,
help_text="Groups for categorization (e.g., compute, storage, IAM)",
)
failed_findings_count = models.IntegerField(default=0)
@@ -991,11 +890,6 @@ class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
null=True,
help_text="Categories from check metadata for efficient filtering",
)
resource_groups = models.TextField(
blank=True,
null=True,
help_text="Resource group from check metadata for efficient filtering",
)
# Relationships
scan = models.ForeignKey(to=Scan, related_name="findings", on_delete=models.CASCADE)
@@ -2138,67 +2032,6 @@ class ScanCategorySummary(RowLevelSecurityProtectedModel):
resource_name = "scan-category-summaries"
class ScanGroupSummary(RowLevelSecurityProtectedModel):
"""
Pre-aggregated resource group metrics per scan by severity.
Stores one row per (resource_group, severity) combination per scan for efficient
overview queries. Resource groups come from check_metadata.Group.
Count relationships (each is a subset of the previous):
- total_findings >= failed_findings >= new_failed_findings
"""
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
inserted_at = models.DateTimeField(auto_now_add=True, editable=False)
scan = models.ForeignKey(
Scan,
on_delete=models.CASCADE,
related_name="resource_group_summaries",
related_query_name="resource_group_summary",
)
resource_group = models.CharField(max_length=50)
severity = SeverityEnumField(choices=SeverityChoices)
total_findings = models.IntegerField(
default=0, help_text="Non-muted findings (PASS + FAIL)"
)
failed_findings = models.IntegerField(
default=0, help_text="Non-muted FAIL findings (subset of total_findings)"
)
new_failed_findings = models.IntegerField(
default=0,
help_text="Non-muted FAIL with delta='new' (subset of failed_findings)",
)
resources_count = models.IntegerField(
default=0, help_text="Count of distinct resource_uid values"
)
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "scan_resource_group_summaries"
indexes = [
models.Index(fields=["tenant_id", "scan"], name="srgs_tenant_scan_idx"),
]
constraints = [
models.UniqueConstraint(
fields=("tenant_id", "scan_id", "resource_group", "severity"),
name="unique_resource_group_severity_per_scan",
),
RowLevelSecurityConstraint(
field="tenant_id",
name="rls_on_%(class)s",
statements=["SELECT", "INSERT", "UPDATE", "DELETE"],
),
]
class JSONAPIMeta:
resource_name = "scan-resource-group-summaries"
class LighthouseConfiguration(RowLevelSecurityProtectedModel):
"""
Stores configuration and API keys for LLM services.
File diff suppressed because it is too large Load Diff
+1 -83
View File
@@ -1,13 +1,10 @@
import os
import sys
import types
from pathlib import Path
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock
import pytest
from django.conf import settings
import api
import api.apps as api_apps_module
from api.apps import (
ApiConfig,
@@ -153,82 +150,3 @@ def test_ensure_crypto_keys_skips_when_env_vars(monkeypatch, tmp_path):
# Assert: orchestrator did not trigger generation when env present
assert called["ensure"] is False
@pytest.fixture(autouse=True)
def stub_api_modules():
"""Provide dummy modules imported during ApiConfig.ready()."""
created = []
for name in ("api.schema_extensions", "api.signals"):
if name not in sys.modules:
sys.modules[name] = types.ModuleType(name)
created.append(name)
yield
for name in created:
sys.modules.pop(name, None)
def _set_argv(monkeypatch, argv):
monkeypatch.setattr(sys, "argv", argv, raising=False)
def _set_testing(monkeypatch, value):
monkeypatch.setattr(settings, "TESTING", value, raising=False)
def _make_app():
return ApiConfig("api", api)
def test_ready_initializes_driver_for_api_process(monkeypatch):
config = _make_app()
_set_argv(monkeypatch, ["gunicorn"])
_set_testing(monkeypatch, False)
with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch(
"api.attack_paths.database.init_driver"
) as init_driver:
config.ready()
init_driver.assert_called_once()
def test_ready_skips_driver_for_celery(monkeypatch):
config = _make_app()
_set_argv(monkeypatch, ["celery", "-A", "api"])
_set_testing(monkeypatch, False)
with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch(
"api.attack_paths.database.init_driver"
) as init_driver:
config.ready()
init_driver.assert_not_called()
def test_ready_skips_driver_for_manage_py_skip_command(monkeypatch):
config = _make_app()
_set_argv(monkeypatch, ["manage.py", "migrate"])
_set_testing(monkeypatch, False)
with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch(
"api.attack_paths.database.init_driver"
) as init_driver:
config.ready()
init_driver.assert_not_called()
def test_ready_skips_driver_when_testing(monkeypatch):
config = _make_app()
_set_argv(monkeypatch, ["gunicorn"])
_set_testing(monkeypatch, True)
with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch(
"api.attack_paths.database.init_driver"
) as init_driver:
config.ready()
init_driver.assert_not_called()
@@ -1,172 +0,0 @@
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from rest_framework.exceptions import APIException, ValidationError
from api.attack_paths import database as graph_database
from api.attack_paths import views_helpers
def test_normalize_run_payload_extracts_attributes_section():
payload = {
"data": {
"id": "ignored",
"attributes": {
"id": "aws-rds",
"parameters": {"ip": "192.0.2.0"},
},
}
}
result = views_helpers.normalize_run_payload(payload)
assert result == {"id": "aws-rds", "parameters": {"ip": "192.0.2.0"}}
def test_normalize_run_payload_passthrough_for_non_dict():
sentinel = "not-a-dict"
assert views_helpers.normalize_run_payload(sentinel) is sentinel
def test_prepare_query_parameters_includes_provider_and_casts(
attack_paths_query_definition_factory,
):
definition = attack_paths_query_definition_factory(cast_type=int)
result = views_helpers.prepare_query_parameters(
definition,
{"limit": "5"},
provider_uid="123456789012",
)
assert result["provider_uid"] == "123456789012"
assert result["limit"] == 5
@pytest.mark.parametrize(
"provided,expected_message",
[
({}, "Missing required parameter"),
({"limit": 10, "extra": True}, "Unknown parameter"),
],
)
def test_prepare_query_parameters_validates_names(
attack_paths_query_definition_factory, provided, expected_message
):
definition = attack_paths_query_definition_factory()
with pytest.raises(ValidationError) as exc:
views_helpers.prepare_query_parameters(definition, provided, provider_uid="1")
assert expected_message in str(exc.value)
def test_prepare_query_parameters_validates_cast(
attack_paths_query_definition_factory,
):
definition = attack_paths_query_definition_factory(cast_type=int)
with pytest.raises(ValidationError) as exc:
views_helpers.prepare_query_parameters(
definition,
{"limit": "not-an-int"},
provider_uid="1",
)
assert "Invalid value" in str(exc.value)
def test_execute_attack_paths_query_serializes_graph(
attack_paths_query_definition_factory, attack_paths_graph_stub_classes
):
definition = attack_paths_query_definition_factory(
id="aws-rds",
name="RDS",
description="",
cypher="MATCH (n) RETURN n",
parameters=[],
)
parameters = {"provider_uid": "123"}
attack_paths_scan = SimpleNamespace(graph_database="tenant-db")
node = attack_paths_graph_stub_classes.Node(
element_id="node-1",
labels=["AWSAccount"],
properties={
"name": "account",
"complex": {
"items": [
attack_paths_graph_stub_classes.NativeValue("value"),
{"nested": 1},
]
},
},
)
relationship = attack_paths_graph_stub_classes.Relationship(
element_id="rel-1",
rel_type="OWNS",
start_node=node,
end_node=attack_paths_graph_stub_classes.Node("node-2", ["RDSInstance"], {}),
properties={"weight": 1},
)
graph = SimpleNamespace(nodes=[node], relationships=[relationship])
run_result = MagicMock()
run_result.graph.return_value = graph
session = MagicMock()
session.run.return_value = run_result
session_ctx = MagicMock()
session_ctx.__enter__.return_value = session
session_ctx.__exit__.return_value = False
with patch(
"api.attack_paths.views_helpers.graph_database.get_session",
return_value=session_ctx,
) as mock_get_session:
result = views_helpers.execute_attack_paths_query(
attack_paths_scan, definition, parameters
)
mock_get_session.assert_called_once_with("tenant-db")
session.run.assert_called_once_with(definition.cypher, parameters)
assert result["nodes"][0]["id"] == "node-1"
assert result["nodes"][0]["properties"]["complex"]["items"][0] == "value"
assert result["relationships"][0]["label"] == "OWNS"
def test_execute_attack_paths_query_wraps_graph_errors(
attack_paths_query_definition_factory,
):
definition = attack_paths_query_definition_factory(
id="aws-rds",
name="RDS",
description="",
cypher="MATCH (n) RETURN n",
parameters=[],
)
attack_paths_scan = SimpleNamespace(graph_database="tenant-db")
parameters = {"provider_uid": "123"}
class ExplodingContext:
def __enter__(self):
raise graph_database.GraphDatabaseQueryException("boom")
def __exit__(self, exc_type, exc, tb):
return False
with (
patch(
"api.attack_paths.views_helpers.graph_database.get_session",
return_value=ExplodingContext(),
),
patch("api.attack_paths.views_helpers.logger") as mock_logger,
):
with pytest.raises(APIException):
views_helpers.execute_attack_paths_query(
attack_paths_scan, definition, parameters
)
mock_logger.error.assert_called_once()
@@ -1,303 +0,0 @@
"""
Tests for Neo4j database lazy initialization.
The Neo4j driver connects on first use by default. API processes may
eagerly initialize the driver during app startup, while Celery workers
remain lazy. These tests validate the database module behavior itself.
"""
import threading
from unittest.mock import MagicMock, patch
import pytest
class TestLazyInitialization:
"""Test that Neo4j driver is initialized lazily on first use."""
@pytest.fixture(autouse=True)
def reset_module_state(self):
"""Reset module-level singleton state before each test."""
import api.attack_paths.database as db_module
original_driver = db_module._driver
db_module._driver = None
yield
db_module._driver = original_driver
def test_driver_not_initialized_at_import(self):
"""Driver should be None after module import (no eager connection)."""
import api.attack_paths.database as db_module
assert db_module._driver is None
@patch("api.attack_paths.database.settings")
@patch("api.attack_paths.database.neo4j.GraphDatabase.driver")
def test_init_driver_creates_connection_on_first_call(
self, mock_driver_factory, mock_settings
):
"""init_driver() should create connection only when called."""
import api.attack_paths.database as db_module
mock_driver = MagicMock()
mock_driver_factory.return_value = mock_driver
mock_settings.DATABASES = {
"neo4j": {
"HOST": "localhost",
"PORT": 7687,
"USER": "neo4j",
"PASSWORD": "password",
}
}
assert db_module._driver is None
result = db_module.init_driver()
mock_driver_factory.assert_called_once()
mock_driver.verify_connectivity.assert_called_once()
assert result is mock_driver
assert db_module._driver is mock_driver
@patch("api.attack_paths.database.settings")
@patch("api.attack_paths.database.neo4j.GraphDatabase.driver")
def test_init_driver_returns_cached_driver_on_subsequent_calls(
self, mock_driver_factory, mock_settings
):
"""Subsequent calls should return cached driver without reconnecting."""
import api.attack_paths.database as db_module
mock_driver = MagicMock()
mock_driver_factory.return_value = mock_driver
mock_settings.DATABASES = {
"neo4j": {
"HOST": "localhost",
"PORT": 7687,
"USER": "neo4j",
"PASSWORD": "password",
}
}
first_result = db_module.init_driver()
second_result = db_module.init_driver()
third_result = db_module.init_driver()
# Only one connection attempt
assert mock_driver_factory.call_count == 1
assert mock_driver.verify_connectivity.call_count == 1
# All calls return same instance
assert first_result is second_result is third_result
@patch("api.attack_paths.database.settings")
@patch("api.attack_paths.database.neo4j.GraphDatabase.driver")
def test_get_driver_delegates_to_init_driver(
self, mock_driver_factory, mock_settings
):
"""get_driver() should use init_driver() for lazy initialization."""
import api.attack_paths.database as db_module
mock_driver = MagicMock()
mock_driver_factory.return_value = mock_driver
mock_settings.DATABASES = {
"neo4j": {
"HOST": "localhost",
"PORT": 7687,
"USER": "neo4j",
"PASSWORD": "password",
}
}
result = db_module.get_driver()
assert result is mock_driver
mock_driver_factory.assert_called_once()
class TestAtexitRegistration:
"""Test that atexit cleanup handler is registered correctly."""
@pytest.fixture(autouse=True)
def reset_module_state(self):
"""Reset module-level singleton state before each test."""
import api.attack_paths.database as db_module
original_driver = db_module._driver
db_module._driver = None
yield
db_module._driver = original_driver
@patch("api.attack_paths.database.settings")
@patch("api.attack_paths.database.atexit.register")
@patch("api.attack_paths.database.neo4j.GraphDatabase.driver")
def test_atexit_registered_on_first_init(
self, mock_driver_factory, mock_atexit_register, mock_settings
):
"""atexit.register should be called on first initialization."""
import api.attack_paths.database as db_module
mock_driver_factory.return_value = MagicMock()
mock_settings.DATABASES = {
"neo4j": {
"HOST": "localhost",
"PORT": 7687,
"USER": "neo4j",
"PASSWORD": "password",
}
}
db_module.init_driver()
mock_atexit_register.assert_called_once_with(db_module.close_driver)
@patch("api.attack_paths.database.settings")
@patch("api.attack_paths.database.atexit.register")
@patch("api.attack_paths.database.neo4j.GraphDatabase.driver")
def test_atexit_registered_only_once(
self, mock_driver_factory, mock_atexit_register, mock_settings
):
"""atexit.register should only be called once across multiple inits.
The double-checked locking on _driver ensures the atexit registration
block only executes once (when _driver is first created).
"""
import api.attack_paths.database as db_module
mock_driver_factory.return_value = MagicMock()
mock_settings.DATABASES = {
"neo4j": {
"HOST": "localhost",
"PORT": 7687,
"USER": "neo4j",
"PASSWORD": "password",
}
}
db_module.init_driver()
db_module.init_driver()
db_module.init_driver()
# Only registered once because subsequent calls hit the fast path
assert mock_atexit_register.call_count == 1
class TestCloseDriver:
"""Test driver cleanup functionality."""
@pytest.fixture(autouse=True)
def reset_module_state(self):
"""Reset module-level singleton state before each test."""
import api.attack_paths.database as db_module
original_driver = db_module._driver
db_module._driver = None
yield
db_module._driver = original_driver
def test_close_driver_closes_and_clears_driver(self):
"""close_driver() should close the driver and set it to None."""
import api.attack_paths.database as db_module
mock_driver = MagicMock()
db_module._driver = mock_driver
db_module.close_driver()
mock_driver.close.assert_called_once()
assert db_module._driver is None
def test_close_driver_handles_none_driver(self):
"""close_driver() should handle case where driver is None."""
import api.attack_paths.database as db_module
db_module._driver = None
# Should not raise
db_module.close_driver()
assert db_module._driver is None
def test_close_driver_clears_driver_even_on_close_error(self):
"""Driver should be cleared even if close() raises an exception."""
import api.attack_paths.database as db_module
mock_driver = MagicMock()
mock_driver.close.side_effect = Exception("Connection error")
db_module._driver = mock_driver
with pytest.raises(Exception, match="Connection error"):
db_module.close_driver()
# Driver should still be cleared
assert db_module._driver is None
class TestThreadSafety:
"""Test thread-safe initialization."""
@pytest.fixture(autouse=True)
def reset_module_state(self):
"""Reset module-level singleton state before each test."""
import api.attack_paths.database as db_module
original_driver = db_module._driver
db_module._driver = None
yield
db_module._driver = original_driver
@patch("api.attack_paths.database.settings")
@patch("api.attack_paths.database.neo4j.GraphDatabase.driver")
def test_concurrent_init_creates_single_driver(
self, mock_driver_factory, mock_settings
):
"""Multiple threads calling init_driver() should create only one driver."""
import api.attack_paths.database as db_module
mock_driver = MagicMock()
mock_driver_factory.return_value = mock_driver
mock_settings.DATABASES = {
"neo4j": {
"HOST": "localhost",
"PORT": 7687,
"USER": "neo4j",
"PASSWORD": "password",
}
}
results = []
errors = []
def call_init():
try:
result = db_module.init_driver()
results.append(result)
except Exception as e:
errors.append(e)
threads = [threading.Thread(target=call_init) for _ in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
assert not errors, f"Threads raised errors: {errors}"
# Only one driver created
assert mock_driver_factory.call_count == 1
# All threads got the same driver instance
assert all(r is mock_driver for r in results)
assert len(results) == 10
+10 -716
View File
@@ -32,10 +32,6 @@ from django_celery_results.models import TaskResult
from rest_framework import status
from rest_framework.response import Response
from api.attack_paths import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
)
from api.compliance import get_compliance_frameworks
from api.db_router import MainRouter
from api.models import (
@@ -3606,423 +3602,6 @@ class TestTaskViewSet:
assert response.status_code == status.HTTP_400_BAD_REQUEST
@pytest.mark.django_db
class TestAttackPathsScanViewSet:
@staticmethod
def _run_payload(query_id="aws-rds", parameters=None):
return {
"data": {
"type": "attack-paths-query-run-requests",
"attributes": {
"id": query_id,
"parameters": parameters or {},
},
}
}
def test_attack_paths_scans_list_returns_latest_entry_per_provider(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
other_provider = providers_fixture[1]
older_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
state=StateChoices.AVAILABLE,
progress=10,
)
latest_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
state=StateChoices.COMPLETED,
progress=95,
)
other_provider_scan = create_attack_paths_scan(
other_provider,
scan=scans_fixture[2],
state=StateChoices.FAILED,
progress=50,
)
response = authenticated_client.get(reverse("attack-paths-scans-list"))
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
ids = {item["id"] for item in data}
assert ids == {str(latest_scan.id), str(other_provider_scan.id)}
assert str(older_scan.id) not in ids
provider_entry = next(
item
for item in data
if item["relationships"]["provider"]["data"]["id"] == str(provider.id)
)
first_attributes = provider_entry["attributes"]
assert first_attributes["provider_alias"] == provider.alias
assert first_attributes["provider_type"] == provider.provider
assert first_attributes["provider_uid"] == provider.uid
def test_attack_paths_scans_list_respects_provider_group_visibility(
self,
authenticated_client_no_permissions_rbac,
providers_fixture,
create_attack_paths_scan,
):
client = authenticated_client_no_permissions_rbac
limited_user = client.user
membership = Membership.objects.filter(user=limited_user).first()
tenant = membership.tenant
allowed_provider = providers_fixture[0]
denied_provider = providers_fixture[1]
allowed_scan = create_attack_paths_scan(allowed_provider)
create_attack_paths_scan(denied_provider)
provider_group = ProviderGroup.objects.create(
name="limited-group",
tenant_id=tenant.id,
)
ProviderGroupMembership.objects.create(
tenant_id=tenant.id,
provider_group=provider_group,
provider=allowed_provider,
)
limited_role = limited_user.roles.first()
RoleProviderGroupRelationship.objects.create(
tenant_id=tenant.id,
role=limited_role,
provider_group=provider_group,
)
response = client.get(reverse("attack-paths-scans-list"))
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["id"] == str(allowed_scan.id)
def test_attack_paths_scan_retrieve(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
state=StateChoices.COMPLETED,
progress=80,
)
response = authenticated_client.get(
reverse("attack-paths-scans-detail", kwargs={"pk": attack_paths_scan.id})
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert data["id"] == str(attack_paths_scan.id)
assert data["relationships"]["provider"]["data"]["id"] == str(provider.id)
assert data["attributes"]["state"] == StateChoices.COMPLETED
def test_attack_paths_scan_retrieve_not_found_for_foreign_tenant(
self, authenticated_client, create_attack_paths_scan
):
other_tenant = Tenant.objects.create(name="Foreign AttackPaths Tenant")
foreign_provider = Provider.objects.create(
provider="aws",
uid="333333333333",
alias="foreign",
tenant_id=other_tenant.id,
)
foreign_scan = create_attack_paths_scan(foreign_provider)
response = authenticated_client.get(
reverse("attack-paths-scans-detail", kwargs={"pk": foreign_scan.id})
)
assert response.status_code == status.HTTP_404_NOT_FOUND
def test_attack_paths_queries_returns_catalog(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
)
definitions = [
AttackPathsQueryDefinition(
id="aws-rds",
name="RDS inventory",
description="List account RDS assets",
provider=provider.provider,
cypher="MATCH (n) RETURN n",
parameters=[
AttackPathsQueryParameterDefinition(name="ip", label="IP address")
],
)
]
with patch(
"api.v1.views.get_queries_for_provider", return_value=definitions
) as mock_get_queries:
response = authenticated_client.get(
reverse(
"attack-paths-scans-queries", kwargs={"pk": attack_paths_scan.id}
)
)
assert response.status_code == status.HTTP_200_OK
mock_get_queries.assert_called_once_with(provider.provider)
payload = response.json()["data"]
assert len(payload) == 1
assert payload[0]["id"] == "aws-rds"
assert payload[0]["attributes"]["name"] == "RDS inventory"
assert payload[0]["attributes"]["parameters"][0]["name"] == "ip"
def test_attack_paths_queries_returns_404_when_catalog_missing(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(provider, scan=scans_fixture[0])
with patch("api.v1.views.get_queries_for_provider", return_value=[]):
response = authenticated_client.get(
reverse(
"attack-paths-scans-queries", kwargs={"pk": attack_paths_scan.id}
)
)
assert response.status_code == status.HTTP_404_NOT_FOUND
assert "No queries found" in str(response.json())
def test_run_attack_paths_query_returns_graph(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
graph_database="tenant-db",
)
query_definition = AttackPathsQueryDefinition(
id="aws-rds",
name="RDS inventory",
description="List account RDS assets",
provider=provider.provider,
cypher="MATCH (n) RETURN n",
parameters=[],
)
prepared_parameters = {"provider_uid": provider.uid}
graph_payload = {
"nodes": [
{
"id": "node-1",
"labels": ["AWSAccount"],
"properties": {"name": "root"},
}
],
"relationships": [
{
"id": "rel-1",
"label": "OWNS",
"source": "node-1",
"target": "node-2",
"properties": {},
}
],
}
with (
patch(
"api.v1.views.get_query_by_id", return_value=query_definition
) as mock_get_query,
patch(
"api.v1.views.attack_paths_views_helpers.prepare_query_parameters",
return_value=prepared_parameters,
) as mock_prepare,
patch(
"api.v1.views.attack_paths_views_helpers.execute_attack_paths_query",
return_value=graph_payload,
) as mock_execute,
patch("api.v1.views.graph_database.clear_cache") as mock_clear_cache,
):
response = authenticated_client.post(
reverse(
"attack-paths-scans-queries-run",
kwargs={"pk": attack_paths_scan.id},
),
data=self._run_payload("aws-rds"),
content_type=API_JSON_CONTENT_TYPE,
)
assert response.status_code == status.HTTP_200_OK
mock_get_query.assert_called_once_with("aws-rds")
mock_prepare.assert_called_once_with(
query_definition,
{},
attack_paths_scan.provider.uid,
)
mock_execute.assert_called_once_with(
attack_paths_scan,
query_definition,
prepared_parameters,
)
mock_clear_cache.assert_called_once_with(attack_paths_scan.graph_database)
result = response.json()["data"]
attributes = result["attributes"]
assert attributes["nodes"] == graph_payload["nodes"]
assert attributes["relationships"] == graph_payload["relationships"]
def test_run_attack_paths_query_requires_completed_scan(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
state=StateChoices.EXECUTING,
)
response = authenticated_client.post(
reverse(
"attack-paths-scans-queries-run", kwargs={"pk": attack_paths_scan.id}
),
data=self._run_payload(),
content_type=API_JSON_CONTENT_TYPE,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "must be completed" in response.json()["errors"][0]["detail"]
def test_run_attack_paths_query_requires_graph_database(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
graph_database=None,
)
response = authenticated_client.post(
reverse(
"attack-paths-scans-queries-run", kwargs={"pk": attack_paths_scan.id}
),
data=self._run_payload(),
content_type=API_JSON_CONTENT_TYPE,
)
assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
assert "does not reference a graph database" in str(response.json())
def test_run_attack_paths_query_unknown_query(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
)
with patch("api.v1.views.get_query_by_id", return_value=None):
response = authenticated_client.post(
reverse(
"attack-paths-scans-queries-run",
kwargs={"pk": attack_paths_scan.id},
),
data=self._run_payload("unknown-query"),
content_type=API_JSON_CONTENT_TYPE,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Unknown Attack Paths query" in response.json()["errors"][0]["detail"]
def test_run_attack_paths_query_returns_404_when_no_nodes_found(
self,
authenticated_client,
providers_fixture,
scans_fixture,
create_attack_paths_scan,
):
provider = providers_fixture[0]
attack_paths_scan = create_attack_paths_scan(
provider,
scan=scans_fixture[0],
)
query_definition = AttackPathsQueryDefinition(
id="aws-empty",
name="empty",
description="",
provider=provider.provider,
cypher="MATCH (n) RETURN n",
)
with (
patch("api.v1.views.get_query_by_id", return_value=query_definition),
patch(
"api.v1.views.attack_paths_views_helpers.prepare_query_parameters",
return_value={"provider_uid": provider.uid},
),
patch(
"api.v1.views.attack_paths_views_helpers.execute_attack_paths_query",
return_value={"nodes": [], "relationships": []},
),
patch("api.v1.views.graph_database.clear_cache"),
):
response = authenticated_client.post(
reverse(
"attack-paths-scans-queries-run",
kwargs={"pk": attack_paths_scan.id},
),
data=self._run_payload("aws-empty"),
content_type=API_JSON_CONTENT_TYPE,
)
assert response.status_code == status.HTTP_404_NOT_FOUND
payload = response.json()
if "data" in payload:
attributes = payload["data"].get("attributes", {})
assert attributes.get("nodes") == []
assert attributes.get("relationships") == []
else:
assert "errors" in payload
@pytest.mark.django_db
class TestResourceViewSet:
def test_resources_list_none(self, authenticated_client):
@@ -4046,7 +3625,6 @@ class TestResourceViewSet:
assert "metadata" in response.json()["data"][0]["attributes"]
assert "details" in response.json()["data"][0]["attributes"]
assert "partition" in response.json()["data"][0]["attributes"]
assert "groups" in response.json()["data"][0]["attributes"]
@pytest.mark.parametrize(
"include_values, expected_resources",
@@ -4121,10 +3699,6 @@ class TestResourceViewSet:
# full text search on resource tags
("search", "multi word", 1),
("search", "key2", 2),
# groups filter (ArrayField)
("groups", "compute", 2),
("groups", "storage", 1),
("groups.in", "compute,storage", 3),
]
),
)
@@ -4271,14 +3845,12 @@ class TestResourceViewSet:
expected_services = {"ec2", "s3"}
expected_regions = {"us-east-1", "eu-west-1"}
expected_resource_types = {"prowler-test"}
expected_groups = {"compute", "storage"}
assert data["data"]["type"] == "resources-metadata"
assert data["data"]["id"] is None
assert set(data["data"]["attributes"]["services"]) == expected_services
assert set(data["data"]["attributes"]["regions"]) == expected_regions
assert set(data["data"]["attributes"]["types"]) == expected_resource_types
assert set(data["data"]["attributes"]["groups"]) == expected_groups
def test_resources_metadata_resource_filter_retrieve(
self, authenticated_client, resources_fixture, backfill_scan_metadata_fixture
@@ -4314,7 +3886,6 @@ class TestResourceViewSet:
assert data["data"]["attributes"]["services"] == []
assert data["data"]["attributes"]["regions"] == []
assert data["data"]["attributes"]["types"] == []
assert data["data"]["attributes"]["groups"] == []
def test_resources_metadata_invalid_date(self, authenticated_client):
response = authenticated_client.get(
@@ -4354,7 +3925,6 @@ class TestResourceViewSet:
assert attributes["services"] == [latest_scan_resource.service]
assert attributes["regions"] == [latest_scan_resource.region]
assert attributes["types"] == [latest_scan_resource.type]
assert "groups" in attributes
@pytest.mark.django_db
@@ -4847,17 +4417,6 @@ class TestFindingViewSet:
attributes = response.json()["data"]["attributes"]
assert set(attributes["categories"]) == {"gen-ai", "iam"}
def test_findings_metadata_latest_groups(
self, authenticated_client, latest_scan_finding_with_categories
):
response = authenticated_client.get(
reverse("finding-metadata_latest"),
)
assert response.status_code == status.HTTP_200_OK
attributes = response.json()["data"]["attributes"]
assert "groups" in attributes
assert "ai_ml" in attributes["groups"]
def test_findings_filter_by_category(
self, authenticated_client, findings_with_categories
):
@@ -4904,49 +4463,6 @@ class TestFindingViewSet:
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 0
def test_findings_filter_by_resource_groups(
self, authenticated_client, findings_with_group
):
finding = findings_with_group
response = authenticated_client.get(
reverse("finding-list"),
{
"filter[resource_groups]": "storage",
"filter[inserted_at]": finding.inserted_at.strftime("%Y-%m-%d"),
},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 1
assert response.json()["data"][0]["attributes"]["resource_groups"] == "storage"
def test_findings_filter_by_resource_groups_in(
self, authenticated_client, findings_with_multiple_groups
):
finding1, _ = findings_with_multiple_groups
response = authenticated_client.get(
reverse("finding-list"),
{
"filter[resource_groups__in]": "storage,security",
"filter[inserted_at]": finding1.inserted_at.strftime("%Y-%m-%d"),
},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 2
def test_findings_filter_by_resource_groups_no_match(
self, authenticated_client, findings_with_group
):
finding = findings_with_group
response = authenticated_client.get(
reverse("finding-list"),
{
"filter[resource_groups]": "nonexistent",
"filter[inserted_at]": finding.inserted_at.strftime("%Y-%m-%d"),
},
)
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["data"]) == 0
@pytest.mark.django_db
class TestJWTFields:
@@ -8493,228 +8009,6 @@ class TestOverviewViewSet:
assert data[0]["attributes"]["failed_findings"] == 13
assert data[0]["attributes"]["new_failed_findings"] == 5
def test_overview_groups_no_data(self, authenticated_client):
response = authenticated_client.get(reverse("overview-resource-groups"))
assert response.status_code == status.HTTP_200_OK
assert response.json()["data"] == []
def test_overview_groups_aggregates_by_group_with_severity(
self,
authenticated_client,
tenants_fixture,
providers_fixture,
create_scan_resource_group_summary,
):
tenant = tenants_fixture[0]
provider = providers_fixture[0]
scan = Scan.objects.create(
name="resource-groups-scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant=tenant,
)
# resources_count is group-level (same for all severities within a group)
create_scan_resource_group_summary(
tenant,
scan,
"storage",
"high",
total_findings=20,
failed_findings=10,
new_failed_findings=5,
resources_count=8,
)
create_scan_resource_group_summary(
tenant,
scan,
"storage",
"medium",
total_findings=15,
failed_findings=7,
new_failed_findings=3,
resources_count=8, # Same as high - group-level count
)
create_scan_resource_group_summary(
tenant,
scan,
"security",
"critical",
total_findings=10,
failed_findings=8,
new_failed_findings=2,
resources_count=4,
)
response = authenticated_client.get(reverse("overview-resource-groups"))
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 2
storage_data = next(d for d in data if d["id"] == "storage")
security_data = next(d for d in data if d["id"] == "security")
assert storage_data["attributes"]["total_findings"] == 35
assert storage_data["attributes"]["failed_findings"] == 17
assert storage_data["attributes"]["new_failed_findings"] == 8
assert (
storage_data["attributes"]["resources_count"] == 8
) # Group-level, not sum
assert security_data["attributes"]["total_findings"] == 10
assert security_data["attributes"]["failed_findings"] == 8
assert security_data["attributes"]["resources_count"] == 4
@pytest.mark.parametrize(
"filter_key,filter_value_fn,expected_total,expected_failed",
[
("filter[provider_id]", lambda p1, p2: str(p1.id), 10, 5),
("filter[provider_id__in]", lambda p1, p2: f"{p1.id},{p2.id}", 25, 12),
("filter[provider_type]", lambda p1, p2: "aws", 10, 5),
("filter[provider_type__in]", lambda p1, p2: "aws,gcp", 25, 12),
],
)
def test_overview_groups_provider_filters(
self,
authenticated_client,
tenants_fixture,
providers_fixture,
create_scan_resource_group_summary,
filter_key,
filter_value_fn,
expected_total,
expected_failed,
):
tenant = tenants_fixture[0]
provider1 = providers_fixture[0] # AWS
gcp_provider = providers_fixture[2] # GCP
scan1 = Scan.objects.create(
name="aws-rg-scan",
provider=provider1,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant=tenant,
)
scan2 = Scan.objects.create(
name="gcp-rg-scan",
provider=gcp_provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant=tenant,
)
create_scan_resource_group_summary(
tenant, scan1, "storage", "high", total_findings=10, failed_findings=5
)
create_scan_resource_group_summary(
tenant, scan2, "storage", "high", total_findings=15, failed_findings=7
)
response = authenticated_client.get(
reverse("overview-resource-groups"),
{filter_key: filter_value_fn(provider1, gcp_provider)},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["attributes"]["total_findings"] == expected_total
assert data[0]["attributes"]["failed_findings"] == expected_failed
def test_overview_groups_group_filter(
self,
authenticated_client,
tenants_fixture,
providers_fixture,
create_scan_resource_group_summary,
):
tenant = tenants_fixture[0]
provider = providers_fixture[0]
scan = Scan.objects.create(
name="rg-filter-scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant=tenant,
)
create_scan_resource_group_summary(
tenant, scan, "storage", "high", total_findings=10, failed_findings=5
)
create_scan_resource_group_summary(
tenant, scan, "compute", "medium", total_findings=20, failed_findings=8
)
create_scan_resource_group_summary(
tenant, scan, "security", "low", total_findings=15, failed_findings=3
)
response = authenticated_client.get(
reverse("overview-resource-groups"),
{"filter[resource_group__in]": "storage,compute"},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
group_ids = {item["id"] for item in data}
assert group_ids == {"storage", "compute"}
def test_overview_groups_aggregates_multiple_providers(
self,
authenticated_client,
tenants_fixture,
providers_fixture,
create_scan_resource_group_summary,
):
tenant = tenants_fixture[0]
provider1, provider2, *_ = providers_fixture
scan1 = Scan.objects.create(
name="multi-provider-rg-scan-1",
provider=provider1,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant=tenant,
)
scan2 = Scan.objects.create(
name="multi-provider-rg-scan-2",
provider=provider2,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant=tenant,
)
create_scan_resource_group_summary(
tenant,
scan1,
"storage",
"high",
total_findings=10,
failed_findings=5,
new_failed_findings=2,
resources_count=4,
)
create_scan_resource_group_summary(
tenant,
scan2,
"storage",
"high",
total_findings=15,
failed_findings=8,
new_failed_findings=3,
resources_count=6,
)
response = authenticated_client.get(reverse("overview-resource-groups"))
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
assert data[0]["id"] == "storage"
assert data[0]["attributes"]["total_findings"] == 25
assert data[0]["attributes"]["failed_findings"] == 13
assert data[0]["attributes"]["new_failed_findings"] == 5
assert data[0]["attributes"]["resources_count"] == 10
def test_compliance_watchlist_no_filters_uses_tenant_summary(
self, authenticated_client, tenant_compliance_summary_fixture
):
@@ -10151,7 +9445,7 @@ class TestLighthouseConfigViewSet:
"type": "lighthouse-configurations",
"attributes": {
"name": "OpenAI",
"api_key": "sk-fake-test-key-for-unit-testing-only",
"api_key": "sk-test1234567890T3BlbkFJtest1234567890",
"model": "gpt-4o",
"temperature": 0.7,
"max_tokens": 4000,
@@ -11613,7 +10907,7 @@ class TestLighthouseTenantConfigViewSet:
provider_config = LighthouseProviderConfiguration.objects.create(
tenant_id=tenants_fixture[0].id,
provider_type="openai",
credentials=b'{"api_key": "sk-fake-test-key-for-unit-testing-only"}',
credentials=b'{"api_key": "sk-test1234567890T3BlbkFJtest1234567890"}',
is_active=True,
)
@@ -11749,7 +11043,7 @@ class TestLighthouseProviderConfigViewSet:
"type": "lighthouse-providers",
"attributes": {
"provider_type": "testprovider",
"credentials": {"api_key": "sk-fake-test-key-1234"},
"credentials": {"api_key": "sk-testT3BlbkFJkey"},
},
}
}
@@ -11781,7 +11075,7 @@ class TestLighthouseProviderConfigViewSet:
"credentials",
[
{}, # empty credentials
{"token": "sk-fake-test-key-1234"}, # wrong key name
{"token": "sk-testT3BlbkFJkey"}, # wrong key name
{"api_key": "ks-invalid-format"}, # wrong format
],
)
@@ -11805,7 +11099,7 @@ class TestLighthouseProviderConfigViewSet:
def test_openai_valid_credentials_success(self, authenticated_client):
"""OpenAI provider with valid sk-xxx format should succeed"""
valid_key = "sk-fake-abc-test-key-xyz"
valid_key = "sk-abc123T3BlbkFJxyz456"
payload = {
"data": {
"type": "lighthouse-providers",
@@ -11830,7 +11124,7 @@ class TestLighthouseProviderConfigViewSet:
def test_openai_provider_duplicate_per_tenant(self, authenticated_client):
"""If an OpenAI provider exists for tenant, creating again should error"""
valid_key = "sk-fake-dup-test-key-456"
valid_key = "sk-dup123T3BlbkFJdup456"
payload = {
"data": {
"type": "lighthouse-providers",
@@ -11859,7 +11153,7 @@ class TestLighthouseProviderConfigViewSet:
def test_openai_patch_base_url_and_is_active(self, authenticated_client):
"""After creating, should be able to patch base_url and is_active"""
valid_key = "sk-fake-patch-test-key-456"
valid_key = "sk-patch123T3BlbkFJpatch456"
create_payload = {
"data": {
"type": "lighthouse-providers",
@@ -11899,7 +11193,7 @@ class TestLighthouseProviderConfigViewSet:
def test_openai_patch_invalid_credentials(self, authenticated_client):
"""PATCH with invalid credentials.api_key should error (400)"""
valid_key = "sk-fake-ok-test-key-456"
valid_key = "sk-ok123T3BlbkFJok456"
create_payload = {
"data": {
"type": "lighthouse-providers",
@@ -11935,7 +11229,7 @@ class TestLighthouseProviderConfigViewSet:
assert patch_resp.status_code == status.HTTP_400_BAD_REQUEST
def test_openai_get_masking_and_fields_filter(self, authenticated_client):
valid_key = "sk-fake-get-test-key-456"
valid_key = "sk-get123T3BlbkFJget456"
create_payload = {
"data": {
"type": "lighthouse-providers",
@@ -11981,7 +11275,7 @@ class TestLighthouseProviderConfigViewSet:
provider = LighthouseProviderConfiguration.objects.create(
tenant_id=tenant.id,
provider_type="openai",
credentials=b'{"api_key":"sk-fake-test-key-123"}',
credentials=b'{"api_key":"sk-test123T3BlbkFJ"}',
is_active=True,
)
-10
View File
@@ -393,21 +393,11 @@ def get_findings_metadata_no_aggregations(tenant_id: str, filtered_queryset):
categories_set.update(categories_list)
categories = sorted(categories_set)
# Aggregate groups from findings
groups = list(
filtered_queryset.exclude(resource_groups__isnull=True)
.exclude(resource_groups__exact="")
.values_list("resource_groups", flat=True)
.distinct()
.order_by("resource_groups")
)
result = {
"services": services,
"regions": regions,
"resource_types": resource_types,
"categories": categories,
"groups": groups,
}
serializer = FindingMetadataSerializer(data=result)
-127
View File
@@ -21,7 +21,6 @@ from rest_framework_simplejwt.tokens import RefreshToken
from api.db_router import MainRouter
from api.exceptions import ConflictException
from api.models import (
AttackPathsScan,
Finding,
Integration,
IntegrationProviderRelationship,
@@ -1133,109 +1132,6 @@ class ScanComplianceReportSerializer(BaseSerializerV1):
fields = ["id", "name"]
class AttackPathsScanSerializer(RLSSerializer):
state = StateEnumSerializerField(read_only=True)
provider_alias = serializers.SerializerMethodField(read_only=True)
provider_type = serializers.SerializerMethodField(read_only=True)
provider_uid = serializers.SerializerMethodField(read_only=True)
class Meta:
model = AttackPathsScan
fields = [
"id",
"state",
"progress",
"provider",
"provider_alias",
"provider_type",
"provider_uid",
"scan",
"task",
"inserted_at",
"started_at",
"completed_at",
"duration",
]
included_serializers = {
"provider": "api.v1.serializers.ProviderIncludeSerializer",
"scan": "api.v1.serializers.ScanIncludeSerializer",
"task": "api.v1.serializers.TaskSerializer",
}
def get_provider_alias(self, obj):
provider = getattr(obj, "provider", None)
return provider.alias if provider else None
def get_provider_type(self, obj):
provider = getattr(obj, "provider", None)
return provider.provider if provider else None
def get_provider_uid(self, obj):
provider = getattr(obj, "provider", None)
return provider.uid if provider else None
class AttackPathsQueryParameterSerializer(BaseSerializerV1):
name = serializers.CharField()
label = serializers.CharField()
data_type = serializers.CharField(default="string")
description = serializers.CharField(allow_null=True, required=False)
placeholder = serializers.CharField(allow_null=True, required=False)
class JSONAPIMeta:
resource_name = "attack-paths-query-parameters"
class AttackPathsQuerySerializer(BaseSerializerV1):
id = serializers.CharField()
name = serializers.CharField()
description = serializers.CharField()
provider = serializers.CharField()
parameters = AttackPathsQueryParameterSerializer(many=True)
class JSONAPIMeta:
resource_name = "attack-paths-queries"
class AttackPathsQueryRunRequestSerializer(BaseSerializerV1):
id = serializers.CharField()
parameters = serializers.DictField(
child=serializers.JSONField(), allow_empty=True, required=False
)
class JSONAPIMeta:
resource_name = "attack-paths-query-run-requests"
class AttackPathsNodeSerializer(BaseSerializerV1):
id = serializers.CharField()
labels = serializers.ListField(child=serializers.CharField())
properties = serializers.DictField(child=serializers.JSONField())
class JSONAPIMeta:
resource_name = "attack-paths-query-result-nodes"
class AttackPathsRelationshipSerializer(BaseSerializerV1):
id = serializers.CharField()
label = serializers.CharField()
source = serializers.CharField()
target = serializers.CharField()
properties = serializers.DictField(child=serializers.JSONField())
class JSONAPIMeta:
resource_name = "attack-paths-query-result-relationships"
class AttackPathsQueryResultSerializer(BaseSerializerV1):
nodes = AttackPathsNodeSerializer(many=True)
relationships = AttackPathsRelationshipSerializer(many=True)
class JSONAPIMeta:
resource_name = "attack-paths-query-results"
class ResourceTagSerializer(RLSSerializer):
"""
Serializer for the ResourceTag model
@@ -1279,7 +1175,6 @@ class ResourceSerializer(RLSSerializer):
"metadata",
"details",
"partition",
"groups",
]
extra_kwargs = {
"id": {"read_only": True},
@@ -1288,7 +1183,6 @@ class ResourceSerializer(RLSSerializer):
"metadata": {"read_only": True},
"details": {"read_only": True},
"partition": {"read_only": True},
"groups": {"read_only": True},
}
included_serializers = {
@@ -1382,7 +1276,6 @@ class ResourceMetadataSerializer(BaseSerializerV1):
services = serializers.ListField(child=serializers.CharField(), allow_empty=True)
regions = serializers.ListField(child=serializers.CharField(), allow_empty=True)
types = serializers.ListField(child=serializers.CharField(), allow_empty=True)
groups = serializers.ListField(child=serializers.CharField(), allow_empty=True)
# Temporarily disabled until we implement tag filtering in the UI
# tags = serializers.JSONField(help_text="Tags are described as key-value pairs.")
@@ -1409,7 +1302,6 @@ class FindingSerializer(RLSSerializer):
"check_id",
"check_metadata",
"categories",
"resource_groups",
"raw_result",
"inserted_at",
"updated_at",
@@ -1466,9 +1358,6 @@ class FindingMetadataSerializer(BaseSerializerV1):
child=serializers.CharField(), allow_empty=True
)
categories = serializers.ListField(child=serializers.CharField(), allow_empty=True)
groups = serializers.ListField(
child=serializers.CharField(), allow_empty=True, required=False, default=list
)
# Temporarily disabled until we implement tag filtering in the UI
# tags = serializers.JSONField(help_text="Tags are described as key-value pairs.")
@@ -2414,22 +2303,6 @@ class CategoryOverviewSerializer(BaseSerializerV1):
resource_name = "category-overviews"
class ResourceGroupOverviewSerializer(BaseSerializerV1):
"""Serializer for resource group overview aggregations."""
id = serializers.CharField(source="resource_group")
total_findings = serializers.IntegerField()
failed_findings = serializers.IntegerField()
new_failed_findings = serializers.IntegerField()
resources_count = serializers.IntegerField()
severity = serializers.JSONField(
help_text="Severity breakdown: {informational, low, medium, high, critical}"
)
class JSONAPIMeta:
resource_name = "resource-group-overviews"
class ComplianceWatchlistOverviewSerializer(BaseSerializerV1):
"""Serializer for compliance watchlist overview with FAIL-dominant aggregation."""
-4
View File
@@ -4,7 +4,6 @@ from drf_spectacular.views import SpectacularRedocView
from rest_framework_nested import routers
from api.v1.views import (
AttackPathsScanViewSet,
ComplianceOverviewViewSet,
CustomSAMLLoginView,
CustomTokenObtainView,
@@ -54,9 +53,6 @@ router.register(r"tenants", TenantViewSet, basename="tenant")
router.register(r"providers", ProviderViewSet, basename="provider")
router.register(r"provider-groups", ProviderGroupViewSet, basename="providergroup")
router.register(r"scans", ScanViewSet, basename="scan")
router.register(
r"attack-paths-scans", AttackPathsScanViewSet, basename="attack-paths-scans"
)
router.register(r"tasks", TaskViewSet, basename="task")
router.register(r"resources", ResourceViewSet, basename="resource")
router.register(r"findings", FindingViewSet, basename="finding")
+19 -379
View File
@@ -3,7 +3,6 @@ import glob
import json
import logging
import os
from collections import defaultdict
from copy import deepcopy
from datetime import datetime, timedelta, timezone
@@ -11,7 +10,6 @@ from decimal import ROUND_HALF_UP, Decimal, InvalidOperation
from urllib.parse import urljoin
import sentry_sdk
from allauth.socialaccount.models import SocialAccount, SocialApp
from allauth.socialaccount.providers.github.views import GitHubOAuth2Adapter
from allauth.socialaccount.providers.google.views import GoogleOAuth2Adapter
@@ -43,9 +41,8 @@ from django.db.models import (
Sum,
Value,
When,
Window,
)
from django.db.models.functions import Coalesce, RowNumber
from django.db.models.functions import Coalesce
from django.http import HttpResponse, QueryDict
from django.shortcuts import redirect
from django.urls import reverse
@@ -75,13 +72,23 @@ from rest_framework.generics import GenericAPIView, get_object_or_404
from rest_framework.permissions import SAFE_METHODS
from rest_framework_json_api.views import RelationshipView, Response
from rest_framework_simplejwt.exceptions import InvalidToken, TokenError
from api.attack_paths import (
database as graph_database,
get_queries_for_provider,
get_query_by_id,
views_helpers as attack_paths_views_helpers,
from tasks.beat import schedule_provider_scan
from tasks.jobs.export import get_s3_client
from tasks.tasks import (
backfill_compliance_summaries_task,
backfill_scan_resource_summaries_task,
check_integration_connection_task,
check_lighthouse_connection_task,
check_lighthouse_provider_connection_task,
check_provider_connection_task,
delete_provider_task,
delete_tenant_task,
jira_integration_task,
mute_historical_findings_task,
perform_scan_task,
refresh_lighthouse_provider_models_task,
)
from api.base_views import BaseRLSViewSet, BaseTenantViewset, BaseUserViewset
from api.compliance import (
PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE,
@@ -103,7 +110,6 @@ from api.filters import (
InvitationFilter,
LatestFindingFilter,
LatestResourceFilter,
AttackPathsScanFilter,
LighthouseProviderConfigFilter,
LighthouseProviderModelsFilter,
MembershipFilter,
@@ -113,7 +119,6 @@ from api.filters import (
ProviderGroupFilter,
ProviderSecretFilter,
ResourceFilter,
ResourceGroupOverviewFilter,
RoleFilter,
ScanFilter,
ScanSummaryFilter,
@@ -132,7 +137,6 @@ from api.models import (
Finding,
Integration,
Invitation,
AttackPathsScan,
LighthouseConfiguration,
LighthouseProviderConfiguration,
LighthouseProviderModels,
@@ -156,7 +160,6 @@ from api.models import (
SAMLToken,
Scan,
ScanCategorySummary,
ScanGroupSummary,
ScanSummary,
SeverityChoices,
StateChoices,
@@ -178,10 +181,6 @@ from api.utils import (
from api.uuid_utils import datetime_to_uuid7, uuid7_start
from api.v1.mixins import DisablePaginationMixin, PaginateByPkMixin, TaskManagementMixin
from api.v1.serializers import (
AttackPathsQueryRunRequestSerializer,
AttackPathsQuerySerializer,
AttackPathsQueryResultSerializer,
AttackPathsScanSerializer,
AttackSurfaceOverviewSerializer,
CategoryOverviewSerializer,
ComplianceOverviewAttributesSerializer,
@@ -234,7 +233,6 @@ from api.v1.serializers import (
ProviderSecretUpdateSerializer,
ProviderSerializer,
ProviderUpdateSerializer,
ResourceGroupOverviewSerializer,
ResourceMetadataSerializer,
ResourceSerializer,
RoleCreateSerializer,
@@ -264,23 +262,6 @@ from api.v1.serializers import (
UserSerializer,
UserUpdateSerializer,
)
from tasks.beat import schedule_provider_scan
from tasks.jobs.attack_paths import db_utils as attack_paths_db_utils
from tasks.jobs.export import get_s3_client
from tasks.tasks import (
backfill_compliance_summaries_task,
backfill_scan_resource_summaries_task,
check_integration_connection_task,
check_lighthouse_connection_task,
check_lighthouse_provider_connection_task,
check_provider_connection_task,
delete_provider_task,
delete_tenant_task,
jira_integration_task,
mute_historical_findings_task,
perform_scan_task,
refresh_lighthouse_provider_models_task,
)
logger = logging.getLogger(BackendLogger.API)
@@ -382,7 +363,7 @@ class SchemaView(SpectacularAPIView):
def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.18.2"
spectacular_settings.VERSION = "1.18.0"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)
@@ -424,10 +405,6 @@ class SchemaView(SpectacularAPIView):
"name": "Scan",
"description": "Endpoints for triggering manual scans and viewing scan results.",
},
{
"name": "Attack Paths",
"description": "Endpoints for Attack Paths scan status and executing Attack Paths queries.",
},
{
"name": "Schedule",
"description": "Endpoints for managing scan schedules, allowing configuration of automated "
@@ -2178,12 +2155,6 @@ class ScanViewSet(BaseRLSViewSet):
},
)
attack_paths_db_utils.create_attack_paths_scan(
tenant_id=self.request.tenant_id,
scan_id=str(scan.id),
provider_id=str(scan.provider_id),
)
prowler_task = Task.objects.get(id=task.id)
scan.task_id = task.id
scan.save(update_fields=["task_id"])
@@ -2264,188 +2235,6 @@ class TaskViewSet(BaseRLSViewSet):
)
@extend_schema_view(
list=extend_schema(
tags=["Attack Paths"],
summary="List Attack Paths scans",
description="Retrieve Attack Paths scans for the tenant with support for filtering, ordering, and pagination.",
),
retrieve=extend_schema(
tags=["Attack Paths"],
summary="Retrieve Attack Paths scan details",
description="Fetch full details for a specific Attack Paths scan.",
),
attack_paths_queries=extend_schema(
tags=["Attack Paths"],
summary="List attack paths queries",
description="Retrieve the catalog of Attack Paths queries available for this Attack Paths scan.",
responses={
200: OpenApiResponse(AttackPathsQuerySerializer(many=True)),
404: OpenApiResponse(
description="No queries found for the selected provider"
),
},
),
run_attack_paths_query=extend_schema(
tags=["Attack Paths"],
summary="Execute an Attack Paths query",
description="Execute the selected Attack Paths query against the Attack Paths graph and return the resulting subgraph.",
request=AttackPathsQueryRunRequestSerializer,
responses={
200: OpenApiResponse(AttackPathsQueryResultSerializer),
400: OpenApiResponse(
description="Bad request (e.g., Unknown Attack Paths query for the selected provider)"
),
404: OpenApiResponse(
description="No attack paths found for the given query and parameters"
),
500: OpenApiResponse(
description="Attack Paths query execution failed due to a database error"
),
},
),
)
class AttackPathsScanViewSet(BaseRLSViewSet):
queryset = AttackPathsScan.objects.all()
serializer_class = AttackPathsScanSerializer
http_method_names = ["get", "post"]
filterset_class = AttackPathsScanFilter
ordering = ["-inserted_at"]
ordering_fields = [
"inserted_at",
"started_at",
]
# RBAC required permissions
required_permissions = [Permissions.MANAGE_SCANS]
def set_required_permissions(self):
if self.request.method in SAFE_METHODS:
self.required_permissions = []
else:
self.required_permissions = [Permissions.MANAGE_SCANS]
def get_serializer_class(self):
if self.action == "run_attack_paths_query":
return AttackPathsQueryRunRequestSerializer
return super().get_serializer_class()
def get_queryset(self):
user_roles = get_role(self.request.user)
base_queryset = AttackPathsScan.objects.filter(tenant_id=self.request.tenant_id)
if user_roles.unlimited_visibility:
queryset = base_queryset
else:
queryset = base_queryset.filter(provider__in=get_providers(user_roles))
return queryset.select_related("provider", "scan", "task")
def list(self, request, *args, **kwargs):
queryset = self.filter_queryset(self.get_queryset())
latest_per_provider = queryset.annotate(
latest_scan_rank=Window(
expression=RowNumber(),
partition_by=[F("provider_id")],
order_by=[F("inserted_at").desc()],
)
).filter(latest_scan_rank=1)
page = self.paginate_queryset(latest_per_provider)
if page is not None:
serializer = self.get_serializer(page, many=True)
return self.get_paginated_response(serializer.data)
serializer = self.get_serializer(latest_per_provider, many=True)
return Response(serializer.data)
@extend_schema(exclude=True)
def create(self, request, *args, **kwargs):
raise MethodNotAllowed(method="POST")
@extend_schema(exclude=True)
def destroy(self, request, *args, **kwargs):
raise MethodNotAllowed(method="DELETE")
@action(
detail=True,
methods=["get"],
url_path="queries",
url_name="queries",
)
def attack_paths_queries(self, request, pk=None):
attack_paths_scan = self.get_object()
queries = get_queries_for_provider(attack_paths_scan.provider.provider)
if not queries:
return Response(
{"detail": "No queries found for the selected provider"},
status=status.HTTP_404_NOT_FOUND,
)
serializer = AttackPathsQuerySerializer(queries, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
@action(
detail=True,
methods=["post"],
url_path="queries/run",
url_name="queries-run",
)
def run_attack_paths_query(self, request, pk=None):
attack_paths_scan = self.get_object()
if attack_paths_scan.state != StateChoices.COMPLETED:
raise ValidationError(
{
"detail": "The Attack Paths scan must be completed before running Attack Paths queries"
}
)
if not attack_paths_scan.graph_database:
logger.error(
f"The Attack Paths Scan {attack_paths_scan.id} does not reference a graph database"
)
return Response(
{"detail": "The Attack Paths scan does not reference a graph database"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
payload = attack_paths_views_helpers.normalize_run_payload(request.data)
serializer = AttackPathsQueryRunRequestSerializer(data=payload)
serializer.is_valid(raise_exception=True)
query_definition = get_query_by_id(serializer.validated_data["id"])
if (
query_definition is None
or query_definition.provider != attack_paths_scan.provider.provider
):
raise ValidationError(
{"id": "Unknown Attack Paths query for the selected provider"}
)
parameters = attack_paths_views_helpers.prepare_query_parameters(
query_definition,
serializer.validated_data.get("parameters", {}),
attack_paths_scan.provider.uid,
)
graph = attack_paths_views_helpers.execute_attack_paths_query(
attack_paths_scan, query_definition, parameters
)
graph_database.clear_cache(attack_paths_scan.graph_database)
status_code = status.HTTP_200_OK
if not graph.get("nodes"):
status_code = status.HTTP_404_NOT_FOUND
response_serializer = AttackPathsQueryResultSerializer(graph)
return Response(response_serializer.data, status=status_code)
@extend_schema_view(
list=extend_schema(
tags=["Resource"],
@@ -2738,20 +2527,10 @@ class ResourceViewSet(PaginateByPkMixin, BaseRLSViewSet):
.order_by("resource_type")
)
# Get groups from Resource model (flatten ArrayField)
all_groups = Resource.objects.filter(
tenant_id=tenant_id,
groups__isnull=False,
).values_list("groups", flat=True)
groups = sorted(
set(g for groups_list in all_groups if groups_list for g in groups_list)
)
result = {
"services": services,
"regions": regions,
"types": resource_types,
"groups": groups,
}
serializer = self.get_serializer(data=result)
@@ -2808,20 +2587,10 @@ class ResourceViewSet(PaginateByPkMixin, BaseRLSViewSet):
.order_by("resource_type")
)
# Get groups from Resource model for resources in latest scans (flatten ArrayField)
all_groups = Resource.objects.filter(
tenant_id=tenant_id,
groups__isnull=False,
).values_list("groups", flat=True)
groups = sorted(
set(g for groups_list in all_groups if groups_list for g in groups_list)
)
result = {
"services": services,
"regions": regions,
"types": resource_types,
"groups": groups,
}
serializer = self.get_serializer(data=result)
@@ -3250,23 +3019,11 @@ class FindingViewSet(PaginateByPkMixin, BaseRLSViewSet):
categories_set.update(categories_list)
categories = sorted(categories_set)
# Get groups from ScanGroupSummary for latest scans
groups = list(
ScanGroupSummary.objects.filter(
tenant_id=tenant_id,
scan_id__in=latest_scans_queryset.values_list("id", flat=True),
)
.values_list("resource_group", flat=True)
.distinct()
.order_by("resource_group")
)
result = {
"services": services,
"regions": regions,
"resource_types": resource_types,
"categories": categories,
"groups": groups,
}
serializer = self.get_serializer(data=result)
@@ -4340,30 +4097,6 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin):
filters=True,
responses={200: CategoryOverviewSerializer(many=True)},
),
resource_groups=extend_schema(
summary="Get resource group overview",
description=(
"Retrieve aggregated resource group metrics from latest completed scans per provider. "
"Returns one row per resource group with total, failed, and new failed findings counts, "
"plus a severity breakdown showing failed findings per severity level, "
"and a count of distinct resources evaluated per group."
),
tags=["Overview"],
filters=True,
responses={200: ResourceGroupOverviewSerializer(many=True)},
),
compliance_watchlist=extend_schema(
summary="Get compliance watchlist overview",
description=(
"Retrieve compliance metrics with FAIL-dominant aggregation. "
"Without filters: uses pre-aggregated TenantComplianceSummary. "
"With provider filters: queries ProviderComplianceScore with FAIL-dominant logic "
"where any FAIL in a requirement marks it as failed."
),
tags=["Overview"],
filters=True,
responses={200: ComplianceWatchlistOverviewSerializer(many=True)},
),
)
@method_decorator(CACHE_DECORATOR, name="list")
class OverviewViewSet(BaseRLSViewSet):
@@ -4413,8 +4146,6 @@ class OverviewViewSet(BaseRLSViewSet):
return AttackSurfaceOverviewSerializer
elif self.action == "categories":
return CategoryOverviewSerializer
elif self.action == "resource_groups":
return ResourceGroupOverviewSerializer
elif self.action == "compliance_watchlist":
return ComplianceWatchlistOverviewSerializer
return super().get_serializer_class()
@@ -4430,8 +4161,6 @@ class OverviewViewSet(BaseRLSViewSet):
return DailySeveritySummaryFilter
elif self.action == "categories":
return CategoryOverviewFilter
elif self.action == "resource_groups":
return ResourceGroupOverviewFilter
elif self.action == "attack_surface":
return AttackSurfaceOverviewFilter
elif self.action == "compliance_watchlist":
@@ -5276,95 +5005,6 @@ class OverviewViewSet(BaseRLSViewSet):
status=status.HTTP_200_OK,
)
@action(
detail=False,
methods=["get"],
url_name="resource-groups",
url_path="resource-groups",
)
def resource_groups(self, request):
tenant_id = request.tenant_id
provider_filters = self._extract_provider_filters_from_params()
latest_scan_ids = self._latest_scan_ids_for_allowed_providers(
tenant_id, provider_filters
)
base_queryset = ScanGroupSummary.objects.filter(
tenant_id=tenant_id, scan_id__in=latest_scan_ids
)
provider_filter_keys = {
"provider_id",
"provider_id__in",
"provider_type",
"provider_type__in",
}
filtered_queryset = self._apply_filterset(
base_queryset,
ResourceGroupOverviewFilter,
exclude_keys=provider_filter_keys,
)
aggregation = (
filtered_queryset.values("resource_group", "severity")
.annotate(
total=Coalesce(Sum("total_findings"), 0),
failed=Coalesce(Sum("failed_findings"), 0),
new_failed=Coalesce(Sum("new_failed_findings"), 0),
)
.order_by("resource_group", "severity")
)
# Get resource_group-level resources_count:
# 1. Max per (scan, resource_group) to deduplicate within-scan severity rows
# 2. Sum across scans for cross-provider aggregation
scan_resource_group_resources = filtered_queryset.values(
"scan_id", "resource_group"
).annotate(resources=Coalesce(Max("resources_count"), 0))
resources_by_resource_group = defaultdict(int)
for row in scan_resource_group_resources:
resources_by_resource_group[row["resource_group"]] += row["resources"]
resource_group_data = defaultdict(
lambda: {
"total_findings": 0,
"failed_findings": 0,
"new_failed_findings": 0,
"resources_count": 0,
"severity": {
"informational": 0,
"low": 0,
"medium": 0,
"high": 0,
"critical": 0,
},
}
)
for row in aggregation:
grp = row["resource_group"]
sev = row["severity"]
resource_group_data[grp]["total_findings"] += row["total"]
resource_group_data[grp]["failed_findings"] += row["failed"]
resource_group_data[grp]["new_failed_findings"] += row["new_failed"]
if sev in resource_group_data[grp]["severity"]:
resource_group_data[grp]["severity"][sev] = row["failed"]
# Set resources_count from resource_group-level aggregation
for grp in resource_group_data:
resource_group_data[grp]["resources_count"] = (
resources_by_resource_group.get(grp, 0)
)
response_data = [
{"resource_group": grp, **data}
for grp, data in sorted(resource_group_data.items())
]
return Response(
self.get_serializer(response_data, many=True).data,
status=status.HTTP_200_OK,
)
@action(
detail=False,
methods=["get"],
@@ -6120,7 +5760,7 @@ class TenantApiKeyViewSet(BaseRLSViewSet):
@extend_schema(exclude=True)
def destroy(self, request, *args, **kwargs):
raise MethodNotAllowed(method="DELETE")
raise MethodNotAllowed(method="DESTROY")
@action(detail=True, methods=["delete"])
def revoke(self, request, *args, **kwargs):
-1
View File
@@ -1,7 +1,6 @@
import warnings
from celery import Celery, Task
from config.env import env
# Suppress specific warnings from django-rest-auth: https://github.com/iMerica/dj-rest-auth/issues/684
-6
View File
@@ -44,12 +44,6 @@ DATABASES = {
"HOST": env("POSTGRES_REPLICA_HOST", default=default_db_host),
"PORT": env("POSTGRES_REPLICA_PORT", default=default_db_port),
},
"neo4j": {
"HOST": env.str("NEO4J_HOST", "neo4j"),
"PORT": env.str("NEO4J_PORT", "7687"),
"USER": env.str("NEO4J_USER", "neo4j"),
"PASSWORD": env.str("NEO4J_PASSWORD", "neo4j_password"),
},
}
DATABASES["default"] = DATABASES["prowler_user"]
@@ -45,12 +45,6 @@ DATABASES = {
"HOST": env("POSTGRES_REPLICA_HOST", default=default_db_host),
"PORT": env("POSTGRES_REPLICA_PORT", default=default_db_port),
},
"neo4j": {
"HOST": env.str("NEO4J_HOST"),
"PORT": env.str("NEO4J_PORT"),
"USER": env.str("NEO4J_USER"),
"PASSWORD": env.str("NEO4J_PASSWORD"),
},
}
DATABASES["default"] = DATABASES["prowler_user"]
+11 -219
View File
@@ -1,11 +1,8 @@
import logging
from types import SimpleNamespace
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
import pytest
from allauth.socialaccount.models import SocialLogin
from django.conf import settings
from django.db import connection as django_connection
@@ -14,14 +11,13 @@ from django.urls import reverse
from django_celery_results.models import TaskResult
from rest_framework import status
from rest_framework.test import APIClient
from api.attack_paths import (
AttackPathsQueryDefinition,
AttackPathsQueryParameterDefinition,
from tasks.jobs.backfill import (
backfill_resource_scan_summaries,
backfill_scan_category_summaries,
)
from api.db_utils import rls_transaction
from api.models import (
AttackPathsScan,
AttackSurfaceOverview,
ComplianceOverview,
ComplianceRequirementOverview,
@@ -45,7 +41,6 @@ from api.models import (
SAMLDomainIndex,
Scan,
ScanCategorySummary,
ScanGroupSummary,
ScanSummary,
StateChoices,
StatusChoices,
@@ -59,11 +54,6 @@ from api.rls import Tenant
from api.v1.serializers import TokenSerializer
from prowler.lib.check.models import Severity
from prowler.lib.outputs.finding import Status
from tasks.jobs.backfill import (
backfill_resource_scan_summaries,
backfill_scan_category_summaries,
backfill_scan_resource_group_summaries,
)
TODAY = str(datetime.today().date())
API_JSON_CONTENT_TYPE = "application/vnd.api+json"
@@ -176,20 +166,22 @@ def create_test_user_rbac_no_roles(django_db_setup, django_db_blocker, tenants_f
@pytest.fixture(scope="function")
def create_test_user_rbac_limited(django_db_setup, django_db_blocker, tenants_fixture):
def create_test_user_rbac_limited(django_db_setup, django_db_blocker):
with django_db_blocker.unblock():
user = User.objects.create_user(
name="testing_limited",
email="rbac_limited@rbac.com",
password=TEST_PASSWORD,
)
tenant = tenants_fixture[0]
tenant = Tenant.objects.create(
name="Tenant Test",
)
Membership.objects.create(
user=user,
tenant=tenant,
role=Membership.RoleChoices.OWNER,
)
role = Role.objects.create(
Role.objects.create(
name="limited",
tenant_id=tenant.id,
manage_users=False,
@@ -202,7 +194,7 @@ def create_test_user_rbac_limited(django_db_setup, django_db_blocker, tenants_fi
)
UserRoleRelationship.objects.create(
user=user,
role=role,
role=Role.objects.get(name="limited"),
tenant_id=tenant.id,
)
return user
@@ -747,7 +739,6 @@ def resources_fixture(providers_fixture):
region="us-east-1",
service="ec2",
type="prowler-test",
groups=["compute"],
)
resource1.upsert_or_delete_tags(tags)
@@ -760,7 +751,6 @@ def resources_fixture(providers_fixture):
region="eu-west-1",
service="s3",
type="prowler-test",
groups=["storage"],
)
resource2.upsert_or_delete_tags(tags)
@@ -772,7 +762,6 @@ def resources_fixture(providers_fixture):
region="us-east-1",
service="ec2",
type="test",
groups=["compute"],
)
tags = [
@@ -1245,7 +1234,7 @@ def lighthouse_config_fixture(authenticated_client, tenants_fixture):
return LighthouseConfiguration.objects.create(
tenant_id=tenants_fixture[0].id,
name="OpenAI",
api_key_decoded="sk-fake-test-key-for-unit-testing-only",
api_key_decoded="sk-test1234567890T3BlbkFJtest1234567890",
model="gpt-4o",
temperature=0,
max_tokens=4000,
@@ -1394,13 +1383,11 @@ def latest_scan_finding_with_categories(
check_id="genai_iam_check",
check_metadata={"CheckId": "genai_iam_check"},
categories=["gen-ai", "iam"],
resource_groups="ai_ml",
first_seen_at="2024-01-02T00:00:00Z",
)
finding.add_resources([resource])
backfill_resource_scan_summaries(tenant_id, str(scan.id))
backfill_scan_category_summaries(tenant_id, str(scan.id))
backfill_scan_resource_group_summaries(tenant_id, str(scan.id))
return finding
@@ -1603,104 +1590,6 @@ def mute_rules_fixture(tenants_fixture, create_test_user, findings_fixture):
return mute_rule1, mute_rule2
@pytest.fixture
def create_attack_paths_scan():
"""Factory fixture to create Attack Paths scans for tests."""
def _create(
provider,
*,
scan=None,
state=StateChoices.COMPLETED,
progress=0,
graph_database="tenant-db",
**extra_fields,
):
scan_instance = scan or Scan.objects.create(
name=extra_fields.pop("scan_name", "Attack Paths Supporting Scan"),
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=extra_fields.pop("scan_state", StateChoices.COMPLETED),
tenant_id=provider.tenant_id,
)
payload = {
"tenant_id": provider.tenant_id,
"provider": provider,
"scan": scan_instance,
"state": state,
"progress": progress,
"graph_database": graph_database,
}
payload.update(extra_fields)
return AttackPathsScan.objects.create(**payload)
return _create
@pytest.fixture
def attack_paths_query_definition_factory():
"""Factory fixture for building Attack Paths query definitions."""
def _create(**overrides):
cast_type = overrides.pop("cast_type", str)
parameters = overrides.pop(
"parameters",
[
AttackPathsQueryParameterDefinition(
name="limit",
label="Limit",
cast=cast_type,
)
],
)
definition_payload = {
"id": "aws-test",
"name": "Attack Paths Test Query",
"description": "Synthetic Attack Paths definition for tests.",
"provider": "aws",
"cypher": "RETURN 1",
"parameters": parameters,
}
definition_payload.update(overrides)
return AttackPathsQueryDefinition(**definition_payload)
return _create
@pytest.fixture
def attack_paths_graph_stub_classes():
"""Provide lightweight graph element stubs for Attack Paths serialization tests."""
class AttackPathsNativeValue:
def __init__(self, value):
self._value = value
def to_native(self):
return self._value
class AttackPathsNode:
def __init__(self, element_id, labels, properties):
self.element_id = element_id
self.labels = labels
self._properties = properties
class AttackPathsRelationship:
def __init__(self, element_id, rel_type, start_node, end_node, properties):
self.element_id = element_id
self.type = rel_type
self.start_node = start_node
self.end_node = end_node
self._properties = properties
return SimpleNamespace(
NativeValue=AttackPathsNativeValue,
Node=AttackPathsNode,
Relationship=AttackPathsRelationship,
)
@pytest.fixture
def create_attack_surface_overview():
def _create(tenant, scan, attack_surface_type, total=10, failed=5, muted_failed=2):
@@ -1740,103 +1629,6 @@ def create_scan_category_summary():
return _create
@pytest.fixture(scope="function")
def findings_with_group(scans_fixture, resources_fixture):
scan = scans_fixture[0]
resource = resources_fixture[0]
finding = Finding.objects.create(
tenant_id=scan.tenant_id,
uid="finding_with_group_1",
scan=scan,
delta=None,
status=Status.FAIL,
status_extended="test status",
impact=Severity.critical,
impact_extended="test impact",
severity=Severity.critical,
raw_result={"status": Status.FAIL},
check_id="storage_check",
check_metadata={"CheckId": "storage_check"},
resource_groups="storage",
first_seen_at="2024-01-02T00:00:00Z",
)
finding.add_resources([resource])
backfill_resource_scan_summaries(str(scan.tenant_id), str(scan.id))
return finding
@pytest.fixture(scope="function")
def findings_with_multiple_groups(scans_fixture, resources_fixture):
scan = scans_fixture[0]
resource1, resource2 = resources_fixture[:2]
finding1 = Finding.objects.create(
tenant_id=scan.tenant_id,
uid="finding_multi_grp_1",
scan=scan,
delta=None,
status=Status.FAIL,
status_extended="test status",
impact=Severity.critical,
impact_extended="test impact",
severity=Severity.critical,
raw_result={"status": Status.FAIL},
check_id="storage_check",
check_metadata={"CheckId": "storage_check"},
resource_groups="storage",
first_seen_at="2024-01-02T00:00:00Z",
)
finding1.add_resources([resource1])
finding2 = Finding.objects.create(
tenant_id=scan.tenant_id,
uid="finding_multi_grp_2",
scan=scan,
delta=None,
status=Status.FAIL,
status_extended="test status 2",
impact=Severity.high,
impact_extended="test impact 2",
severity=Severity.high,
raw_result={"status": Status.FAIL},
check_id="security_check",
check_metadata={"CheckId": "security_check"},
resource_groups="security",
first_seen_at="2024-01-02T00:00:00Z",
)
finding2.add_resources([resource2])
backfill_resource_scan_summaries(str(scan.tenant_id), str(scan.id))
return finding1, finding2
@pytest.fixture
def create_scan_resource_group_summary():
def _create(
tenant,
scan,
resource_group,
severity,
total_findings=10,
failed_findings=5,
new_failed_findings=2,
resources_count=3,
):
return ScanGroupSummary.objects.create(
tenant=tenant,
scan=scan,
resource_group=resource_group,
severity=severity,
total_findings=total_findings,
failed_findings=failed_findings,
new_failed_findings=new_failed_findings,
resources_count=resources_count,
)
return _create
def get_authorization_header(access_token: str) -> dict:
return {"Authorization": f"Bearer {access_token}"}
-7
View File
@@ -7,7 +7,6 @@ from tasks.tasks import perform_scheduled_scan_task
from api.db_utils import rls_transaction
from api.exceptions import ConflictException
from api.models import Provider, Scan, StateChoices
from tasks.jobs.attack_paths import db_utils as attack_paths_db_utils
def schedule_provider_scan(provider_instance: Provider):
@@ -40,12 +39,6 @@ def schedule_provider_scan(provider_instance: Provider):
scheduled_at=datetime.now(timezone.utc),
)
attack_paths_db_utils.create_attack_paths_scan(
tenant_id=tenant_id,
scan_id=str(scheduled_scan.id),
provider_id=provider_id,
)
# Schedule the task
periodic_task_instance = PeriodicTask.objects.create(
interval=schedule,
@@ -1,7 +0,0 @@
from tasks.jobs.attack_paths.db_utils import can_provider_run_attack_paths_scan
from tasks.jobs.attack_paths.scan import run as attack_paths_scan
__all__ = [
"attack_paths_scan",
"can_provider_run_attack_paths_scan",
]
@@ -1,253 +0,0 @@
# Portions of this file are based on code from the Cartography project
# (https://github.com/cartography-cncf/cartography), which is licensed under the Apache 2.0 License.
from typing import Any
import aioboto3
import boto3
import neo4j
from cartography.config import Config as CartographyConfig
from cartography.intel import aws as cartography_aws
from celery.utils.log import get_task_logger
from api.models import (
AttackPathsScan as ProwlerAPIAttackPathsScan,
Provider as ProwlerAPIProvider,
)
from prowler.providers.common.provider import Provider as ProwlerSDKProvider
from tasks.jobs.attack_paths import db_utils, utils
logger = get_task_logger(__name__)
def start_aws_ingestion(
neo4j_session: neo4j.Session,
cartography_config: CartographyConfig,
prowler_api_provider: ProwlerAPIProvider,
prowler_sdk_provider: ProwlerSDKProvider,
attack_paths_scan: ProwlerAPIAttackPathsScan,
) -> dict[str, dict[str, str]]:
"""
Code based on Cartography version 0.122.0, specifically on `cartography.intel.aws.__init__.py`.
For the scan progress updates:
- The caller of this function (`tasks.jobs.attack_paths.scan.run`) has set it to 2.
- When the control returns to the caller, it will be set to 95.
"""
# Initialize variables common to all jobs
common_job_parameters = {
"UPDATE_TAG": cartography_config.update_tag,
"permission_relationships_file": cartography_config.permission_relationships_file,
"aws_guardduty_severity_threshold": cartography_config.aws_guardduty_severity_threshold,
"aws_cloudtrail_management_events_lookback_hours": cartography_config.aws_cloudtrail_management_events_lookback_hours,
"experimental_aws_inspector_batch": cartography_config.experimental_aws_inspector_batch,
}
boto3_session = get_boto3_session(prowler_api_provider, prowler_sdk_provider)
regions: list[str] = list(prowler_sdk_provider._enabled_regions)
requested_syncs = list(cartography_aws.RESOURCE_FUNCTIONS.keys())
sync_args = cartography_aws._build_aws_sync_kwargs(
neo4j_session,
boto3_session,
regions,
prowler_api_provider.uid,
cartography_config.update_tag,
common_job_parameters,
)
# Starting with sync functions
logger.info(f"Syncing organizations for AWS account {prowler_api_provider.uid}")
cartography_aws.organizations.sync(
neo4j_session,
{prowler_api_provider.alias: prowler_api_provider.uid},
cartography_config.update_tag,
common_job_parameters,
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 3)
# Adding an extra field
common_job_parameters["AWS_ID"] = prowler_api_provider.uid
cartography_aws._autodiscover_accounts(
neo4j_session,
boto3_session,
prowler_api_provider.uid,
cartography_config.update_tag,
common_job_parameters,
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 4)
failed_syncs = sync_aws_account(
prowler_api_provider, requested_syncs, sync_args, attack_paths_scan
)
if "permission_relationships" in requested_syncs:
logger.info(
f"Syncing function permission_relationships for AWS account {prowler_api_provider.uid}"
)
cartography_aws.RESOURCE_FUNCTIONS["permission_relationships"](**sync_args)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 88)
if "resourcegroupstaggingapi" in requested_syncs:
logger.info(
f"Syncing function resourcegroupstaggingapi for AWS account {prowler_api_provider.uid}"
)
cartography_aws.RESOURCE_FUNCTIONS["resourcegroupstaggingapi"](**sync_args)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 89)
logger.info(
f"Syncing ec2_iaminstanceprofile scoped analysis for AWS account {prowler_api_provider.uid}"
)
cartography_aws.run_scoped_analysis_job(
"aws_ec2_iaminstanceprofile.json",
neo4j_session,
common_job_parameters,
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 90)
logger.info(
f"Syncing lambda_ecr analysis for AWS account {prowler_api_provider.uid}"
)
cartography_aws.run_analysis_job(
"aws_lambda_ecr.json",
neo4j_session,
common_job_parameters,
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 91)
logger.info(f"Syncing metadata for AWS account {prowler_api_provider.uid}")
cartography_aws.merge_module_sync_metadata(
neo4j_session,
group_type="AWSAccount",
group_id=prowler_api_provider.uid,
synced_type="AWSAccount",
update_tag=cartography_config.update_tag,
stat_handler=cartography_aws.stat_handler,
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 92)
# Removing the added extra field
del common_job_parameters["AWS_ID"]
logger.info(f"Syncing cleanup_job for AWS account {prowler_api_provider.uid}")
cartography_aws.run_cleanup_job(
"aws_post_ingestion_principals_cleanup.json",
neo4j_session,
common_job_parameters,
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 93)
logger.info(f"Syncing analysis for AWS account {prowler_api_provider.uid}")
cartography_aws._perform_aws_analysis(
requested_syncs, neo4j_session, common_job_parameters
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 94)
return failed_syncs
def get_boto3_session(
prowler_api_provider: ProwlerAPIProvider, prowler_sdk_provider: ProwlerSDKProvider
) -> boto3.Session:
boto3_session = prowler_sdk_provider.session.current_session
aws_accounts_from_session = cartography_aws.organizations.get_aws_account_default(
boto3_session
)
if not aws_accounts_from_session:
raise Exception(
"No valid AWS credentials could be found. No AWS accounts can be synced."
)
aws_account_id_from_session = list(aws_accounts_from_session.values())[0]
if prowler_api_provider.uid != aws_account_id_from_session:
raise Exception(
f"Provider {prowler_api_provider.uid} doesn't match AWS account {aws_account_id_from_session}."
)
if boto3_session.region_name is None:
global_region = prowler_sdk_provider.get_global_region()
boto3_session._session.set_config_variable("region", global_region)
return boto3_session
def get_aioboto3_session(boto3_session: boto3.Session) -> aioboto3.Session:
return aioboto3.Session(botocore_session=boto3_session._session)
def sync_aws_account(
prowler_api_provider: ProwlerAPIProvider,
requested_syncs: list[str],
sync_args: dict[str, Any],
attack_paths_scan: ProwlerAPIAttackPathsScan,
) -> dict[str, str]:
current_progress = 4 # `cartography_aws._autodiscover_accounts`
max_progress = (
87 # `cartography_aws.RESOURCE_FUNCTIONS["permission_relationships"]` - 1
)
n_steps = (
len(requested_syncs) - 2
) # Excluding `permission_relationships` and `resourcegroupstaggingapi`
progress_step = (max_progress - current_progress) / n_steps
failed_syncs = {}
for func_name in requested_syncs:
if func_name in cartography_aws.RESOURCE_FUNCTIONS:
logger.info(
f"Syncing function {func_name} for AWS account {prowler_api_provider.uid}"
)
# Updating progress, not really the right place but good enough
current_progress += progress_step
db_utils.update_attack_paths_scan_progress(
attack_paths_scan, int(current_progress)
)
try:
# `ecr:image_layers` uses `aioboto3_session` instead of `boto3_session`
if func_name == "ecr:image_layers":
cartography_aws.RESOURCE_FUNCTIONS[func_name](
neo4j_session=sync_args.get("neo4j_session"),
aioboto3_session=get_aioboto3_session(
sync_args.get("boto3_session")
),
regions=sync_args.get("regions"),
current_aws_account_id=sync_args.get("current_aws_account_id"),
update_tag=sync_args.get("update_tag"),
common_job_parameters=sync_args.get("common_job_parameters"),
)
# Skip permission relationships and tags for now because they rely on data already being in the graph
elif func_name in [
"permission_relationships",
"resourcegroupstaggingapi",
]:
continue
else:
cartography_aws.RESOURCE_FUNCTIONS[func_name](**sync_args)
except Exception as e:
exception_message = utils.stringify_exception(
e, f"Exception for AWS sync function: {func_name}"
)
failed_syncs[func_name] = exception_message
logger.warning(
f"Caught exception syncing function {func_name} from AWS account {prowler_api_provider.uid}. We "
"are continuing on to the next AWS sync function.",
)
continue
else:
raise ValueError(
f'AWS sync function "{func_name}" was specified but does not exist. Did you misspell it?'
)
return failed_syncs
@@ -1,168 +0,0 @@
from datetime import datetime, timezone
from typing import Any
from django.db.models import Q
from cartography.config import Config as CartographyConfig
from api.db_utils import rls_transaction
from api.models import (
AttackPathsScan as ProwlerAPIAttackPathsScan,
Provider as ProwlerAPIProvider,
StateChoices,
)
from tasks.jobs.attack_paths.providers import is_provider_available
def can_provider_run_attack_paths_scan(tenant_id: str, provider_id: int) -> bool:
with rls_transaction(tenant_id):
prowler_api_provider = ProwlerAPIProvider.objects.get(id=provider_id)
return is_provider_available(prowler_api_provider.provider)
def create_attack_paths_scan(
tenant_id: str,
scan_id: str,
provider_id: int,
) -> ProwlerAPIAttackPathsScan | None:
if not can_provider_run_attack_paths_scan(tenant_id, provider_id):
return None
with rls_transaction(tenant_id):
attack_paths_scan = ProwlerAPIAttackPathsScan.objects.create(
tenant_id=tenant_id,
provider_id=provider_id,
scan_id=scan_id,
state=StateChoices.SCHEDULED,
started_at=datetime.now(tz=timezone.utc),
)
attack_paths_scan.save()
return attack_paths_scan
def retrieve_attack_paths_scan(
tenant_id: str,
scan_id: str,
) -> ProwlerAPIAttackPathsScan | None:
try:
with rls_transaction(tenant_id):
attack_paths_scan = ProwlerAPIAttackPathsScan.objects.get(
scan_id=scan_id,
)
return attack_paths_scan
except ProwlerAPIAttackPathsScan.DoesNotExist:
return None
def starting_attack_paths_scan(
attack_paths_scan: ProwlerAPIAttackPathsScan,
task_id: str,
cartography_config: CartographyConfig,
) -> None:
with rls_transaction(attack_paths_scan.tenant_id):
attack_paths_scan.task_id = task_id
attack_paths_scan.state = StateChoices.EXECUTING
attack_paths_scan.started_at = datetime.now(tz=timezone.utc)
attack_paths_scan.update_tag = cartography_config.update_tag
attack_paths_scan.graph_database = cartography_config.neo4j_database
attack_paths_scan.save(
update_fields=[
"task_id",
"state",
"started_at",
"update_tag",
"graph_database",
]
)
def finish_attack_paths_scan(
attack_paths_scan: ProwlerAPIAttackPathsScan,
state: StateChoices,
ingestion_exceptions: dict[str, Any],
) -> None:
with rls_transaction(attack_paths_scan.tenant_id):
now = datetime.now(tz=timezone.utc)
duration = int((now - attack_paths_scan.started_at).total_seconds())
attack_paths_scan.state = state
attack_paths_scan.progress = 100
attack_paths_scan.completed_at = now
attack_paths_scan.duration = duration
attack_paths_scan.ingestion_exceptions = ingestion_exceptions
attack_paths_scan.save(
update_fields=[
"state",
"progress",
"completed_at",
"duration",
"ingestion_exceptions",
]
)
def update_attack_paths_scan_progress(
attack_paths_scan: ProwlerAPIAttackPathsScan,
progress: int,
) -> None:
with rls_transaction(attack_paths_scan.tenant_id):
attack_paths_scan.progress = progress
attack_paths_scan.save(update_fields=["progress"])
def get_old_attack_paths_scans(
tenant_id: str,
provider_id: str,
attack_paths_scan_id: str,
) -> list[ProwlerAPIAttackPathsScan]:
"""
An `old_attack_paths_scan` is any `completed` Attack Paths scan for the same provider,
with its graph database not deleted, excluding the current Attack Paths scan.
"""
with rls_transaction(tenant_id):
completed_scans_qs = (
ProwlerAPIAttackPathsScan.objects.filter(
provider_id=provider_id,
state=StateChoices.COMPLETED,
is_graph_database_deleted=False,
)
.exclude(id=attack_paths_scan_id)
.all()
)
return list(completed_scans_qs)
def update_old_attack_paths_scan(
old_attack_paths_scan: ProwlerAPIAttackPathsScan,
) -> None:
with rls_transaction(old_attack_paths_scan.tenant_id):
old_attack_paths_scan.is_graph_database_deleted = True
old_attack_paths_scan.save(update_fields=["is_graph_database_deleted"])
def get_provider_graph_database_names(tenant_id: str, provider_id: str) -> list[str]:
"""
Return existing graph database names for a tenant/provider.
Note: For accesing the `AttackPathsScan` we need to use `all_objects` manager because the provider is soft-deleted.
"""
with rls_transaction(tenant_id):
graph_databases_names_qs = (
ProwlerAPIAttackPathsScan.all_objects.filter(
~Q(graph_database=""),
graph_database__isnull=False,
provider_id=provider_id,
is_graph_database_deleted=False,
)
.values_list("graph_database", flat=True)
.distinct()
)
return list(graph_databases_names_qs)
@@ -1,23 +0,0 @@
AVAILABLE_PROVIDERS: list[str] = [
"aws",
]
ROOT_NODE_LABELS: dict[str, str] = {
"aws": "AWSAccount",
}
NODE_UID_FIELDS: dict[str, str] = {
"aws": "arn",
}
def is_provider_available(provider_type: str) -> bool:
return provider_type in AVAILABLE_PROVIDERS
def get_root_node_label(provider_type: str) -> str:
return ROOT_NODE_LABELS.get(provider_type, "UnknownProviderAccount")
def get_node_uid_field(provider_type: str) -> str:
return NODE_UID_FIELDS.get(provider_type, "UnknownProviderUID")
@@ -1,290 +0,0 @@
from collections import defaultdict
from typing import Generator
import neo4j
from cartography.client.core.tx import run_write_query
from cartography.config import Config as CartographyConfig
from celery.utils.log import get_task_logger
from config.env import env
from tasks.jobs.attack_paths.providers import get_node_uid_field, get_root_node_label
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Finding, Provider, ResourceFindingMapping
from prowler.config import config as ProwlerConfig
logger = get_task_logger(__name__)
BATCH_SIZE = env.int("ATTACK_PATHS_FINDINGS_BATCH_SIZE", 1000)
INDEX_STATEMENTS = [
"CREATE INDEX prowler_finding_id IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.id);",
"CREATE INDEX prowler_finding_provider_uid IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.provider_uid);",
"CREATE INDEX prowler_finding_lastupdated IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.lastupdated);",
"CREATE INDEX prowler_finding_check_id IF NOT EXISTS FOR (n:ProwlerFinding) ON (n.status);",
]
INSERT_STATEMENT_TEMPLATE = """
MATCH (account:__ROOT_NODE_LABEL__ {id: $provider_uid})
UNWIND $findings_data AS finding_data
OPTIONAL MATCH (account)-->(resource_by_uid)
WHERE resource_by_uid.__NODE_UID_FIELD__ = finding_data.resource_uid
WITH account, finding_data, resource_by_uid
OPTIONAL MATCH (account)-->(resource_by_id)
WHERE resource_by_uid IS NULL
AND resource_by_id.id = finding_data.resource_uid
WITH account, finding_data, COALESCE(resource_by_uid, resource_by_id) AS resource
WHERE resource IS NOT NULL
MERGE (finding:ProwlerFinding {id: finding_data.id})
ON CREATE SET
finding.id = finding_data.id,
finding.uid = finding_data.uid,
finding.inserted_at = finding_data.inserted_at,
finding.updated_at = finding_data.updated_at,
finding.first_seen_at = finding_data.first_seen_at,
finding.scan_id = finding_data.scan_id,
finding.delta = finding_data.delta,
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.severity = finding_data.severity,
finding.check_id = finding_data.check_id,
finding.check_title = finding_data.check_title,
finding.muted = finding_data.muted,
finding.muted_reason = finding_data.muted_reason,
finding.provider_uid = $provider_uid,
finding.firstseen = timestamp(),
finding.lastupdated = $last_updated,
finding._module_name = 'cartography:prowler',
finding._module_version = $prowler_version
ON MATCH SET
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.lastupdated = $last_updated
MERGE (resource)-[rel:HAS_FINDING]->(finding)
ON CREATE SET
rel.provider_uid = $provider_uid,
rel.firstseen = timestamp(),
rel.lastupdated = $last_updated,
rel._module_name = 'cartography:prowler',
rel._module_version = $prowler_version
ON MATCH SET
rel.lastupdated = $last_updated
"""
CLEANUP_STATEMENT = """
MATCH (finding:ProwlerFinding {provider_uid: $provider_uid})
WHERE finding.lastupdated < $last_updated
WITH finding LIMIT $batch_size
DETACH DELETE finding
RETURN COUNT(finding) AS deleted_findings_count
"""
def create_indexes(neo4j_session: neo4j.Session) -> None:
"""
Code based on Cartography version 0.122.0, specifically on `cartography.intel.create_indexes.run`.
"""
logger.info("Creating indexes for Prowler Findings node types")
for statement in INDEX_STATEMENTS:
run_write_query(neo4j_session, statement)
def analysis(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
scan_id: str,
config: CartographyConfig,
) -> None:
findings_data = get_provider_last_scan_findings(prowler_api_provider, scan_id)
load_findings(neo4j_session, findings_data, prowler_api_provider, config)
cleanup_findings(neo4j_session, prowler_api_provider, config)
def get_provider_last_scan_findings(
prowler_api_provider: Provider,
scan_id: str,
) -> Generator[list[dict[str, str]], None, None]:
"""
Generator that yields batches of finding-resource pairs.
Two-step query approach per batch:
1. Paginate findings for scan (single table, indexed by scan_id)
2. Batch-fetch resource UIDs via mapping table (single join)
3. Merge and yield flat structure for Neo4j
Memory efficient: never holds more than BATCH_SIZE findings in memory.
"""
logger.info(
f"Starting findings fetch for scan {scan_id} (tenant {prowler_api_provider.tenant_id}) with batch size {BATCH_SIZE}"
)
iteration = 0
last_id = None
while True:
iteration += 1
with rls_transaction(prowler_api_provider.tenant_id, using=READ_REPLICA_ALIAS):
# Use all_objects to avoid the ActiveProviderManager's implicit JOIN
# through Scan -> Provider (to check is_deleted=False).
# The provider is already validated as active in this context.
qs = Finding.all_objects.filter(scan_id=scan_id).order_by("id")
if last_id is not None:
qs = qs.filter(id__gt=last_id)
findings_batch = list(
qs.values(
"id",
"uid",
"inserted_at",
"updated_at",
"first_seen_at",
"scan_id",
"delta",
"status",
"status_extended",
"severity",
"check_id",
"check_metadata__checktitle",
"muted",
"muted_reason",
)[:BATCH_SIZE]
)
logger.info(
f"Iteration #{iteration} fetched {len(findings_batch)} findings"
)
if not findings_batch:
logger.info(
f"No findings returned for iteration #{iteration}; stopping pagination"
)
break
last_id = findings_batch[-1]["id"]
enriched_batch = _enrich_and_flatten_batch(findings_batch)
# Yield outside the transaction
if enriched_batch:
yield enriched_batch
logger.info(f"Finished fetching findings for scan {scan_id}")
def _enrich_and_flatten_batch(
findings_batch: list[dict],
) -> list[dict[str, str]]:
"""
Fetch resource UIDs for a batch of findings and return flat structure.
One finding with 3 resources becomes 3 dicts (same output format as before).
Must be called within an RLS transaction context.
"""
finding_ids = [f["id"] for f in findings_batch]
# Single join: mapping -> resource
resource_mappings = ResourceFindingMapping.objects.filter(
finding_id__in=finding_ids
).values_list("finding_id", "resource__uid")
# Build finding_id -> [resource_uids] mapping
finding_resources = defaultdict(list)
for finding_id, resource_uid in resource_mappings:
finding_resources[finding_id].append(resource_uid)
# Flatten: one dict per (finding, resource) pair
results = []
for f in findings_batch:
resource_uids = finding_resources.get(f["id"], [])
if not resource_uids:
continue
for resource_uid in resource_uids:
results.append(
{
"resource_uid": str(resource_uid),
"id": str(f["id"]),
"uid": f["uid"],
"inserted_at": f["inserted_at"],
"updated_at": f["updated_at"],
"first_seen_at": f["first_seen_at"],
"scan_id": str(f["scan_id"]),
"delta": f["delta"],
"status": f["status"],
"status_extended": f["status_extended"],
"severity": f["severity"],
"check_id": str(f["check_id"]),
"check_title": f["check_metadata__checktitle"],
"muted": f["muted"],
"muted_reason": f["muted_reason"],
}
)
return results
def load_findings(
neo4j_session: neo4j.Session,
findings_batches: Generator[list[dict[str, str]], None, None],
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
replacements = {
"__ROOT_NODE_LABEL__": get_root_node_label(prowler_api_provider.provider),
"__NODE_UID_FIELD__": get_node_uid_field(prowler_api_provider.provider),
}
query = INSERT_STATEMENT_TEMPLATE
for replace_key, replace_value in replacements.items():
query = query.replace(replace_key, replace_value)
parameters = {
"provider_uid": str(prowler_api_provider.uid),
"last_updated": config.update_tag,
"prowler_version": ProwlerConfig.prowler_version,
}
batch_num = 0
total_records = 0
for batch in findings_batches:
batch_num += 1
batch_size = len(batch)
total_records += batch_size
parameters["findings_data"] = batch
logger.info(f"Loading findings batch {batch_num} ({batch_size} records)")
neo4j_session.run(query, parameters)
logger.info(f"Finished loading {total_records} records in {batch_num} batches")
def cleanup_findings(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
parameters = {
"provider_uid": str(prowler_api_provider.uid),
"last_updated": config.update_tag,
"batch_size": BATCH_SIZE,
}
batch = 1
deleted_count = 1
while deleted_count > 0:
logger.info(f"Cleaning findings batch {batch}")
result = neo4j_session.run(CLEANUP_STATEMENT, parameters)
deleted_count = result.single().get("deleted_findings_count", 0)
batch += 1
@@ -1,197 +0,0 @@
import logging
import time
import asyncio
from typing import Any, Callable
from cartography.config import Config as CartographyConfig
from cartography.intel import analysis as cartography_analysis
from cartography.intel import create_indexes as cartography_create_indexes
from cartography.intel import ontology as cartography_ontology
from celery.utils.log import get_task_logger
from api.attack_paths import database as graph_database
from api.db_utils import rls_transaction
from api.models import (
Provider as ProwlerAPIProvider,
StateChoices,
)
from api.utils import initialize_prowler_provider
from tasks.jobs.attack_paths import aws, db_utils, prowler, utils
# Without this Celery goes crazy with Cartography logging
logging.getLogger("cartography").setLevel(logging.ERROR)
logging.getLogger("neo4j").propagate = False
logger = get_task_logger(__name__)
CARTOGRAPHY_INGESTION_FUNCTIONS: dict[str, Callable] = {
"aws": aws.start_aws_ingestion,
}
def get_cartography_ingestion_function(provider_type: str) -> Callable | None:
return CARTOGRAPHY_INGESTION_FUNCTIONS.get(provider_type)
def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
"""
Code based on Cartography version 0.122.0, specifically on `cartography.cli.main`, `cartography.cli.CLI.main`,
`cartography.sync.run_with_config` and `cartography.sync.Sync.run`.
"""
ingestion_exceptions = {} # This will hold any exceptions raised during ingestion
# Prowler necessary objects
with rls_transaction(tenant_id):
prowler_api_provider = ProwlerAPIProvider.objects.get(scan__pk=scan_id)
prowler_sdk_provider = initialize_prowler_provider(prowler_api_provider)
# Attack Paths Scan necessary objects
cartography_ingestion_function = get_cartography_ingestion_function(
prowler_api_provider.provider
)
attack_paths_scan = db_utils.retrieve_attack_paths_scan(tenant_id, scan_id)
# Checks before starting the scan
if not cartography_ingestion_function:
ingestion_exceptions = {
"global_error": f"Provider {prowler_api_provider.provider} is not supported for Attack Paths scans"
}
if attack_paths_scan:
db_utils.finish_attack_paths_scan(
attack_paths_scan, StateChoices.COMPLETED, ingestion_exceptions
)
logger.warning(
f"Provider {prowler_api_provider.provider} is not supported for Attack Paths scans"
)
return ingestion_exceptions
else:
if not attack_paths_scan:
logger.warning(
f"No Attack Paths Scan found for scan {scan_id} and tenant {tenant_id}, let's create it then"
)
attack_paths_scan = db_utils.create_attack_paths_scan(
tenant_id, scan_id, prowler_api_provider.id
)
# While creating the Cartography configuration, attributes `neo4j_user` and `neo4j_password` are not really needed in this config object
cartography_config = CartographyConfig(
neo4j_uri=graph_database.get_uri(),
neo4j_database=graph_database.get_database_name(attack_paths_scan.id),
update_tag=int(time.time()),
)
# Starting the Attack Paths scan
db_utils.starting_attack_paths_scan(attack_paths_scan, task_id, cartography_config)
try:
logger.info(
f"Creating Neo4j database {cartography_config.neo4j_database} for tenant {prowler_api_provider.tenant_id}"
)
graph_database.create_database(cartography_config.neo4j_database)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 1)
logger.info(
f"Starting Cartography ({attack_paths_scan.id}) for "
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
with graph_database.get_session(
cartography_config.neo4j_database
) as neo4j_session:
# Indexes creation
cartography_create_indexes.run(neo4j_session, cartography_config)
prowler.create_indexes(neo4j_session)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 2)
# The real scan, where iterates over cloud services
ingestion_exceptions = _call_within_event_loop(
cartography_ingestion_function,
neo4j_session,
cartography_config,
prowler_api_provider,
prowler_sdk_provider,
attack_paths_scan,
)
# Post-processing: Just keeping it to be more Cartography compliant
logger.info(
f"Syncing Cartography ontology for AWS account {prowler_api_provider.uid}"
)
cartography_ontology.run(neo4j_session, cartography_config)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 95)
logger.info(
f"Syncing Cartography analysis for AWS account {prowler_api_provider.uid}"
)
cartography_analysis.run(neo4j_session, cartography_config)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 96)
# Adding Prowler nodes and relationships
logger.info(
f"Syncing Prowler analysis for AWS account {prowler_api_provider.uid}"
)
prowler.analysis(
neo4j_session, prowler_api_provider, scan_id, cartography_config
)
logger.info(
f"Clearing Neo4j cache for database {cartography_config.neo4j_database}"
)
graph_database.clear_cache(cartography_config.neo4j_database)
logger.info(
f"Completed Cartography ({attack_paths_scan.id}) for "
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
# Handling databases changes
old_attack_paths_scans = db_utils.get_old_attack_paths_scans(
prowler_api_provider.tenant_id,
prowler_api_provider.id,
attack_paths_scan.id,
)
for old_attack_paths_scan in old_attack_paths_scans:
graph_database.drop_database(old_attack_paths_scan.graph_database)
db_utils.update_old_attack_paths_scan(old_attack_paths_scan)
db_utils.finish_attack_paths_scan(
attack_paths_scan, StateChoices.COMPLETED, ingestion_exceptions
)
return ingestion_exceptions
except Exception as e:
exception_message = utils.stringify_exception(e, "Cartography failed")
logger.error(exception_message)
ingestion_exceptions["global_cartography_error"] = exception_message
# Handling databases changes
graph_database.drop_database(cartography_config.neo4j_database)
db_utils.finish_attack_paths_scan(
attack_paths_scan, StateChoices.FAILED, ingestion_exceptions
)
raise
def _call_within_event_loop(fn, *args, **kwargs):
"""
Cartography needs a running event loop, so assuming there is none (Celery task or even regular DRF endpoint),
let's create a new one and set it as the current event loop for this thread.
"""
loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(loop)
return fn(*args, **kwargs)
finally:
try:
loop.run_until_complete(loop.shutdown_asyncgens())
except Exception as e:
logger.warning(f"Failed to shutdown async generators cleanly: {e}")
loop.close()
asyncio.set_event_loop(None)
@@ -1,10 +0,0 @@
import traceback
from datetime import datetime, timezone
def stringify_exception(exception: Exception, context: str) -> str:
timestamp = datetime.now(tz=timezone.utc)
exception_traceback = traceback.TracebackException.from_exception(exception)
traceback_string = "".join(exception_traceback.format())
return f"{timestamp} - {context}\n{traceback_string}"
+2 -89
View File
@@ -2,13 +2,13 @@ from collections import defaultdict
from datetime import timedelta
from celery.utils.log import get_task_logger
from django.db.models import OuterRef, Subquery, Sum
from django.db.models import Sum
from django.utils import timezone
from tasks.jobs.queries import (
COMPLIANCE_UPSERT_PROVIDER_SCORE_SQL,
COMPLIANCE_UPSERT_TENANT_SUMMARY_ALL_SQL,
)
from tasks.jobs.scan import aggregate_category_counts, aggregate_resource_group_counts
from tasks.jobs.scan import aggregate_category_counts
from api.db_router import READ_REPLICA_ALIAS, MainRouter
from api.db_utils import (
@@ -28,7 +28,6 @@ from api.models import (
ResourceScanSummary,
Scan,
ScanCategorySummary,
ScanGroupSummary,
ScanSummary,
StateChoices,
)
@@ -357,92 +356,6 @@ def backfill_scan_category_summaries(tenant_id: str, scan_id: str):
return {"status": "backfilled", "categories_count": len(category_counts)}
def backfill_scan_resource_group_summaries(tenant_id: str, scan_id: str):
"""
Backfill ScanGroupSummary for a completed scan.
Aggregates resource group counts from all findings in the scan and creates
one ScanGroupSummary row per (resource_group, severity) combination.
Args:
tenant_id: Target tenant UUID
scan_id: Scan UUID to backfill
Returns:
dict: Status indicating whether backfill was performed
"""
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
if ScanGroupSummary.objects.filter(
tenant_id=tenant_id, scan_id=scan_id
).exists():
return {"status": "already backfilled"}
if not Scan.objects.filter(
tenant_id=tenant_id,
id=scan_id,
state__in=(StateChoices.COMPLETED, StateChoices.FAILED),
).exists():
return {"status": "scan is not completed"}
resource_group_counts: dict[tuple[str, str], dict[str, int]] = {}
group_resources_cache: dict[str, set] = {}
# Get findings with their first resource UID via annotation
resource_uid_subquery = ResourceFindingMapping.objects.filter(
finding_id=OuterRef("id"), tenant_id=tenant_id
).values("resource__uid")[:1]
for finding in (
Finding.all_objects.filter(tenant_id=tenant_id, scan_id=scan_id)
.annotate(resource_uid=Subquery(resource_uid_subquery))
.values(
"resource_groups",
"severity",
"status",
"delta",
"muted",
"resource_uid",
)
):
aggregate_resource_group_counts(
resource_group=finding.get("resource_groups"),
severity=finding.get("severity"),
status=finding.get("status"),
delta=finding.get("delta"),
muted=finding.get("muted", False),
resource_uid=finding.get("resource_uid") or "",
cache=resource_group_counts,
group_resources_cache=group_resources_cache,
)
if not resource_group_counts:
return {"status": "no resource groups to backfill"}
# Compute group-level resource counts (same value for all severity rows in a group)
group_resource_counts = {
grp: len(uids) for grp, uids in group_resources_cache.items()
}
resource_group_summaries = [
ScanGroupSummary(
tenant_id=tenant_id,
scan_id=scan_id,
resource_group=grp,
severity=severity,
total_findings=counts["total"],
failed_findings=counts["failed"],
new_failed_findings=counts["new_failed"],
resources_count=group_resource_counts.get(grp, 0),
)
for (grp, severity), counts in resource_group_counts.items()
]
with rls_transaction(tenant_id):
ScanGroupSummary.objects.bulk_create(
resource_group_summaries, batch_size=500, ignore_conflicts=True
)
return {"status": "backfilled", "resource_groups_count": len(resource_group_counts)}
def backfill_provider_compliance_scores(tenant_id: str) -> dict:
"""
Backfill ProviderComplianceScore from latest completed scan per provider.
+2 -24
View File
@@ -1,19 +1,9 @@
from celery.utils.log import get_task_logger
from django.db import DatabaseError
from api.attack_paths import database as graph_database
from api.db_router import MainRouter
from api.db_utils import batch_delete, rls_transaction
from api.models import (
AttackPathsScan,
Finding,
Provider,
Resource,
Scan,
ScanSummary,
Tenant,
)
from tasks.jobs.attack_paths.db_utils import get_provider_graph_database_names
from api.models import Finding, Provider, Resource, Scan, ScanSummary, Tenant
logger = get_task_logger(__name__)
@@ -33,27 +23,16 @@ def delete_provider(tenant_id: str, pk: str):
Raises:
Provider.DoesNotExist: If no instance with the provided primary key exists.
"""
# Delete the Attack Paths' graph databases related to the provider
graph_database_names = get_provider_graph_database_names(tenant_id, pk)
try:
for graph_database_name in graph_database_names:
graph_database.drop_database(graph_database_name)
except graph_database.GraphDatabaseQueryException as gdb_error:
logger.error(f"Error deleting Provider databases: {gdb_error}")
raise
# Get all provider related data and delete them in batches
with rls_transaction(tenant_id):
instance = Provider.all_objects.get(pk=pk)
deletion_summary = {}
deletion_steps = [
("Scan Summaries", ScanSummary.all_objects.filter(scan__provider=instance)),
("Findings", Finding.all_objects.filter(scan__provider=instance)),
("Resources", Resource.all_objects.filter(provider=instance)),
("Scans", Scan.all_objects.filter(provider=instance)),
("AttackPathsScans", AttackPathsScan.all_objects.filter(provider=instance)),
]
deletion_summary = {}
for step_name, queryset in deletion_steps:
try:
_, step_summary = batch_delete(tenant_id, queryset)
@@ -69,7 +48,6 @@ def delete_provider(tenant_id: str, pk: str):
except DatabaseError as db_error:
logger.error(f"Error deleting Provider: {db_error}")
raise
return deletion_summary
+1 -117
View File
@@ -45,7 +45,6 @@ from api.models import (
ResourceTag,
Scan,
ScanCategorySummary,
ScanGroupSummary,
ScanSummary,
StateChoices,
)
@@ -128,50 +127,6 @@ def aggregate_category_counts(
cache[key]["new_failed"] += 1
def aggregate_resource_group_counts(
resource_group: str | None,
severity: str,
status: str,
delta: str | None,
muted: bool,
resource_uid: str,
cache: dict[tuple[str, str], dict[str, int]],
group_resources_cache: dict[str, set],
) -> None:
"""
Increment resource group counters in-place for a finding.
Args:
resource_group: Resource group from check metadata (e.g., "database", "compute").
severity: Severity level (e.g., "high", "medium").
status: Finding status as string ("FAIL", "PASS").
delta: Delta value as string ("new", "changed") or None.
muted: Whether the finding is muted.
resource_uid: Unique identifier for the resource to count distinct resources.
cache: Dict {(resource_group, severity): {"total", "failed", "new_failed"}} to update.
group_resources_cache: Dict {resource_group: set(resource_uids)} for group-level resource tracking.
"""
if not resource_group:
return
is_failed = status == "FAIL" and not muted
is_new_failed = is_failed and delta == "new"
key = (resource_group, severity)
if key not in cache:
cache[key] = {"total": 0, "failed": 0, "new_failed": 0}
if not muted:
cache[key]["total"] += 1
if is_failed:
cache[key]["failed"] += 1
if is_new_failed:
cache[key]["new_failed"] += 1
# Track resources at GROUP level (not per-severity) to avoid over-counting
if resource_uid and not muted:
group_resources_cache.setdefault(resource_group, set()).add(resource_uid)
def _get_attack_surface_mapping_from_provider(provider_type: str) -> dict:
global _ATTACK_SURFACE_MAPPING_CACHE
@@ -483,8 +438,6 @@ def _process_finding_micro_batch(
scan_resource_cache: set,
mute_rules_cache: dict,
scan_categories_cache: dict[tuple[str, str], dict[str, int]],
scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]],
group_resources_cache: dict[str, set],
) -> None:
"""
Process a micro-batch of findings and persist them using bulk operations.
@@ -506,8 +459,6 @@ def _process_finding_micro_batch(
scan_resource_cache: Set of tuples used to create `ResourceScanSummary` rows.
mute_rules_cache: Map of finding UID -> mute reason gathered before the scan.
scan_categories_cache: Dict tracking category counts {(category, severity): {"total", "failed", "new_failed"}}.
scan_resource_groups_cache: Dict tracking resource group counts {(resource_group, severity): {"total", "failed", "new_failed"}}.
group_resources_cache: Dict tracking unique resources per group {resource_group: set(resource_uids)}.
"""
# Accumulate objects for bulk operations
findings_to_create = []
@@ -548,8 +499,6 @@ def _process_finding_micro_batch(
with rls_transaction(tenant_id):
resource_uid = finding.resource_uid
if resource_uid not in resource_cache:
check_metadata = finding.get_metadata()
group = check_metadata.get("resourcegroup") or None
resource_instance, _ = Resource.objects.get_or_create(
tenant_id=tenant_id,
provider=provider_instance,
@@ -559,7 +508,6 @@ def _process_finding_micro_batch(
"service": finding.service_name,
"type": finding.resource_type,
"name": finding.resource_name,
"groups": [group] if group else None,
},
)
resource_cache[resource_uid] = resource_instance
@@ -580,8 +528,6 @@ def _process_finding_micro_batch(
# Track resource field changes (defer save)
updated = False
check_metadata = finding.get_metadata()
group = check_metadata.get("resourcegroup") or None
if finding.region and resource_instance.region != finding.region:
resource_instance.region = finding.region
updated = True
@@ -602,11 +548,6 @@ def _process_finding_micro_batch(
if resource_instance.partition != finding.partition:
resource_instance.partition = finding.partition
updated = True
if group and (
not resource_instance.groups or group not in resource_instance.groups
):
resource_instance.groups = (resource_instance.groups or []) + [group]
updated = True
if updated:
dirty_resources[resource_uid] = resource_instance
@@ -692,7 +633,6 @@ def _process_finding_micro_batch(
muted_reason=muted_reason,
compliance=finding.compliance,
categories=check_metadata.get("categories", []) or [],
resource_groups=check_metadata.get("resourcegroup") or None,
)
findings_to_create.append(finding_instance)
resource_denormalized_data.append((finding_instance, resource_instance))
@@ -717,18 +657,6 @@ def _process_finding_micro_batch(
cache=scan_categories_cache,
)
# Track resource groups with counts for ScanGroupSummary
aggregate_resource_group_counts(
resource_group=check_metadata.get("resourcegroup") or None,
severity=finding.severity.value,
status=status.value,
delta=delta.value if delta else None,
muted=is_muted,
resource_uid=resource_instance.uid if resource_instance else "",
cache=scan_resource_groups_cache,
group_resources_cache=group_resources_cache,
)
# Bulk operations within single transaction
with rls_transaction(tenant_id):
# Bulk create findings
@@ -786,15 +714,7 @@ def _process_finding_micro_batch(
tenant_id=tenant_id,
model=Resource,
objects=list(dirty_resources.values()),
fields=[
"metadata",
"details",
"partition",
"region",
"service",
"type",
"groups",
],
fields=["metadata", "details", "partition", "region", "service", "type"],
batch_size=1000,
)
@@ -837,8 +757,6 @@ def perform_prowler_scan(
unique_resources = set()
scan_resource_cache: set[tuple[str, str, str, str]] = set()
scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {}
scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {}
group_resources_cache: dict[str, set] = {}
start_time = time.time()
exc = None
@@ -929,8 +847,6 @@ def perform_prowler_scan(
scan_resource_cache=scan_resource_cache,
mute_rules_cache=mute_rules_cache,
scan_categories_cache=scan_categories_cache,
scan_resource_groups_cache=scan_resource_groups_cache,
group_resources_cache=group_resources_cache,
)
# Update scan progress
@@ -1017,38 +933,6 @@ def perform_prowler_scan(
sentry_sdk.capture_exception(cat_exception)
logger.error(f"Error storing categories for scan {scan_id}: {cat_exception}")
try:
if scan_resource_groups_cache:
# Compute group-level resource counts (same value for all severity rows in a group)
group_resource_counts = {
grp: len(uids) for grp, uids in group_resources_cache.items()
}
resource_group_summaries = [
ScanGroupSummary(
tenant_id=tenant_id,
scan_id=scan_id,
resource_group=grp,
severity=severity,
total_findings=counts["total"],
failed_findings=counts["failed"],
new_failed_findings=counts["new_failed"],
resources_count=group_resource_counts.get(grp, 0),
)
for (
grp,
severity,
), counts in scan_resource_groups_cache.items()
]
with rls_transaction(tenant_id):
ScanGroupSummary.objects.bulk_create(
resource_group_summaries, batch_size=500, ignore_conflicts=True
)
except Exception as rg_exception:
sentry_sdk.capture_exception(rg_exception)
logger.error(
f"Error storing resource groups for scan {scan_id}: {rg_exception}"
)
serializer = ScanTaskSerializer(instance=scan_instance)
return serializer.data
+46 -86
View File
@@ -8,17 +8,12 @@ from celery.utils.log import get_task_logger
from config.celery import RLSTask
from config.django.base import DJANGO_FINDINGS_BATCH_SIZE, DJANGO_TMP_OUTPUT_DIRECTORY
from django_celery_beat.models import PeriodicTask
from tasks.jobs.attack_paths import (
attack_paths_scan,
can_provider_run_attack_paths_scan,
)
from tasks.jobs.backfill import (
backfill_compliance_summaries,
backfill_daily_severity_summaries,
backfill_provider_compliance_scores,
backfill_resource_scan_summaries,
backfill_scan_category_summaries,
backfill_scan_resource_group_summaries,
)
from tasks.jobs.connection import (
check_integration_connection,
@@ -52,11 +47,7 @@ from tasks.jobs.scan import (
perform_prowler_scan,
update_provider_compliance_scores,
)
from tasks.utils import (
_get_or_create_scheduled_scan,
batched,
get_next_execution_datetime,
)
from tasks.utils import batched, get_next_execution_datetime
from api.compliance import get_compliance_frameworks
from api.db_router import READ_REPLICA_ALIAS
@@ -161,11 +152,6 @@ def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str)
),
).apply_async()
if can_provider_run_attack_paths_scan(tenant_id, provider_id):
perform_attack_paths_scan_task.apply_async(
kwargs={"tenant_id": tenant_id, "scan_id": scan_id}
)
@shared_task(base=RLSTask, name="provider-connection-check")
@set_tenant
@@ -278,38 +264,44 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
periodic_task_instance = PeriodicTask.objects.get(
name=f"scan-perform-scheduled-{provider_id}"
)
executing_scan = (
Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
)
.order_by("-started_at")
.first()
)
if executing_scan:
logger.warning(
f"Scheduled scan already executing for provider {provider_id}. Skipping."
)
return ScanTaskSerializer(instance=executing_scan).data
executed_scan = Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
task__task_runner_task__task_id=task_id,
).first()
).order_by("completed_at")
if executed_scan:
# Duplicated task execution due to visibility timeout
if (
Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
scheduler_task_id=periodic_task_instance.id,
scheduled_at__date=datetime.now(timezone.utc).date(),
).exists()
or executed_scan.exists()
):
# Duplicated task execution due to visibility timeout or scan is already running
logger.warning(f"Duplicated scheduled scan for provider {provider_id}.")
return ScanTaskSerializer(instance=executed_scan).data
try:
affected_scan = executed_scan.first()
if not affected_scan:
raise ValueError(
"Error retrieving affected scan details after detecting duplicated scheduled "
"scan."
)
# Return the affected scan details to avoid losing data
serializer = ScanTaskSerializer(instance=affected_scan)
except Exception as duplicated_scan_exception:
logger.error(
f"Duplicated scheduled scan for provider {provider_id}. Error retrieving affected scan details: "
f"{str(duplicated_scan_exception)}"
)
raise duplicated_scan_exception
return serializer.data
interval = periodic_task_instance.interval
next_scan_datetime = get_next_execution_datetime(task_id, provider_id)
current_scan_datetime = next_scan_datetime - timedelta(
**{interval.period: interval.every}
)
# TEMPORARY WORKAROUND: Clean up orphan scans from transaction isolation issue
_cleanup_orphan_scheduled_scans(
@@ -318,12 +310,19 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
scheduler_task_id=periodic_task_instance.id,
)
scan_instance = _get_or_create_scheduled_scan(
scan_instance, _ = Scan.objects.get_or_create(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
scheduler_task_id=periodic_task_instance.id,
scheduled_at=current_scan_datetime,
defaults={
"state": StateChoices.SCHEDULED,
"name": "Daily scheduled scan",
"scheduled_at": next_scan_datetime - timedelta(days=1),
},
)
scan_instance.task_id = task_id
scan_instance.save()
@@ -333,19 +332,18 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
scan_id=str(scan_instance.id),
provider_id=provider_id,
)
except Exception as e:
raise e
finally:
with rls_transaction(tenant_id):
now = datetime.now(timezone.utc)
if next_scan_datetime <= now:
interval_delta = timedelta(**{interval.period: interval.every})
while next_scan_datetime <= now:
next_scan_datetime += interval_delta
_get_or_create_scheduled_scan(
Scan.objects.get_or_create(
tenant_id=tenant_id,
name="Daily scheduled scan",
provider_id=provider_id,
scheduler_task_id=periodic_task_instance.id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
scheduled_at=next_scan_datetime,
update_state=True,
scheduler_task_id=periodic_task_instance.id,
)
_perform_scan_complete_tasks(tenant_id, str(scan_instance.id), provider_id)
@@ -359,29 +357,6 @@ def perform_scan_summary_task(tenant_id: str, scan_id: str):
return aggregate_findings(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(
base=RLSTask,
bind=True,
name="attack-paths-scan-perform",
queue="attack-paths-scans",
)
def perform_attack_paths_scan_task(self, tenant_id: str, scan_id: str):
"""
Execute an Attack Paths scan for the given provider within the current tenant RLS context.
Args:
self: The task instance (automatically passed when bind=True).
tenant_id (str): The tenant identifier for RLS context.
scan_id (str): The Prowler scan identifier for obtaining the tenant and provider context.
Returns:
Any: The result from `attack_paths_scan`, including any per-scan failure details.
"""
return attack_paths_scan(
tenant_id=tenant_id, scan_id=scan_id, task_id=self.request.id
)
@shared_task(name="tenant-deletion", queue="deletion", autoretry_for=(Exception,))
def delete_tenant_task(tenant_id: str):
return delete_tenant(pk=tenant_id)
@@ -638,21 +613,6 @@ def backfill_scan_category_summaries_task(tenant_id: str, scan_id: str):
return backfill_scan_category_summaries(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(name="backfill-scan-resource-group-summaries", queue="backfill")
@handle_provider_deletion
def backfill_scan_resource_group_summaries_task(tenant_id: str, scan_id: str):
"""
Backfill ScanGroupSummary for a completed scan.
Aggregates unique resource groups from findings and creates a summary row.
Args:
tenant_id (str): The tenant identifier.
scan_id (str): The scan identifier.
"""
return backfill_scan_resource_group_summaries(tenant_id=tenant_id, scan_id=scan_id)
@shared_task(name="backfill-provider-compliance-scores", queue="backfill")
def backfill_provider_compliance_scores_task(tenant_id: str):
"""
@@ -1,708 +0,0 @@
from contextlib import nullcontext
from types import SimpleNamespace
from unittest.mock import MagicMock, call, patch
import pytest
from tasks.jobs.attack_paths import prowler as prowler_module
from tasks.jobs.attack_paths.scan import run as attack_paths_run
from api.models import (
AttackPathsScan,
Finding,
Provider,
Resource,
ResourceFindingMapping,
Scan,
StateChoices,
StatusChoices,
)
from prowler.lib.check.models import Severity
@pytest.mark.django_db
class TestAttackPathsRun:
def test_run_success_flow(self, tenants_fixture, providers_fixture, scans_fixture):
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan = scans_fixture[0]
scan.provider = provider
scan.save()
attack_paths_scan = AttackPathsScan.objects.create(
tenant_id=tenant.id,
provider=provider,
scan=scan,
state=StateChoices.SCHEDULED,
)
mock_session = MagicMock()
session_ctx = MagicMock()
session_ctx.__enter__.return_value = mock_session
session_ctx.__exit__.return_value = False
ingestion_result = {"organizations": "warning"}
ingestion_fn = MagicMock(return_value=ingestion_result)
with (
patch(
"tasks.jobs.attack_paths.scan.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
),
patch(
"tasks.jobs.attack_paths.scan.initialize_prowler_provider",
return_value=MagicMock(_enabled_regions=["us-east-1"]),
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
return_value="db-scan-id",
) as mock_get_db_name,
patch(
"tasks.jobs.attack_paths.scan.graph_database.create_database"
) as mock_create_db,
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
) as mock_get_session,
patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache"),
patch(
"tasks.jobs.attack_paths.scan.cartography_create_indexes.run"
) as mock_cartography_indexes,
patch(
"tasks.jobs.attack_paths.scan.cartography_analysis.run"
) as mock_cartography_analysis,
patch(
"tasks.jobs.attack_paths.scan.cartography_ontology.run"
) as mock_cartography_ontology,
patch(
"tasks.jobs.attack_paths.scan.prowler.create_indexes"
) as mock_prowler_indexes,
patch(
"tasks.jobs.attack_paths.scan.prowler.analysis"
) as mock_prowler_analysis,
patch(
"tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan",
return_value=attack_paths_scan,
) as mock_retrieve_scan,
patch(
"tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan"
) as mock_starting,
patch(
"tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress"
) as mock_update_progress,
patch(
"tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan"
) as mock_finish,
patch(
"tasks.jobs.attack_paths.scan.get_cartography_ingestion_function",
return_value=ingestion_fn,
) as mock_get_ingestion,
patch(
"tasks.jobs.attack_paths.scan._call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
) as mock_event_loop,
):
result = attack_paths_run(str(tenant.id), str(scan.id), "task-123")
assert result == ingestion_result
mock_retrieve_scan.assert_called_once_with(str(tenant.id), str(scan.id))
mock_starting.assert_called_once()
config = mock_starting.call_args[0][2]
assert config.neo4j_database == "db-scan-id"
mock_create_db.assert_called_once_with("db-scan-id")
mock_get_session.assert_called_once_with("db-scan-id")
mock_cartography_indexes.assert_called_once_with(mock_session, config)
mock_prowler_indexes.assert_called_once_with(mock_session)
mock_cartography_analysis.assert_called_once_with(mock_session, config)
mock_cartography_ontology.assert_called_once_with(mock_session, config)
mock_prowler_analysis.assert_called_once_with(
mock_session,
provider,
str(scan.id),
config,
)
mock_get_ingestion.assert_called_once_with(provider.provider)
mock_event_loop.assert_called_once()
mock_update_progress.assert_any_call(attack_paths_scan, 1)
mock_update_progress.assert_any_call(attack_paths_scan, 2)
mock_update_progress.assert_any_call(attack_paths_scan, 95)
mock_finish.assert_called_once_with(
attack_paths_scan, StateChoices.COMPLETED, ingestion_result
)
mock_get_db_name.assert_called_once_with(attack_paths_scan.id)
def test_run_failure_marks_scan_failed(
self, tenants_fixture, providers_fixture, scans_fixture
):
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan = scans_fixture[0]
scan.provider = provider
scan.save()
attack_paths_scan = AttackPathsScan.objects.create(
tenant_id=tenant.id,
provider=provider,
scan=scan,
state=StateChoices.SCHEDULED,
)
mock_session = MagicMock()
session_ctx = MagicMock()
session_ctx.__enter__.return_value = mock_session
session_ctx.__exit__.return_value = False
ingestion_fn = MagicMock(side_effect=RuntimeError("ingestion boom"))
with (
patch(
"tasks.jobs.attack_paths.scan.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
),
patch(
"tasks.jobs.attack_paths.scan.initialize_prowler_provider",
return_value=MagicMock(_enabled_regions=["us-east-1"]),
),
patch("tasks.jobs.attack_paths.scan.graph_database.get_uri"),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
return_value="db-scan-id",
),
patch("tasks.jobs.attack_paths.scan.graph_database.create_database"),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
),
patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run"),
patch("tasks.jobs.attack_paths.scan.cartography_analysis.run"),
patch("tasks.jobs.attack_paths.scan.prowler.create_indexes"),
patch("tasks.jobs.attack_paths.scan.prowler.analysis"),
patch(
"tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan",
return_value=attack_paths_scan,
),
patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan"),
patch(
"tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress"
),
patch(
"tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan"
) as mock_finish,
patch(
"tasks.jobs.attack_paths.scan.get_cartography_ingestion_function",
return_value=ingestion_fn,
),
patch(
"tasks.jobs.attack_paths.scan._call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
),
patch(
"tasks.jobs.attack_paths.scan.utils.stringify_exception",
return_value="Cartography failed: ingestion boom",
),
):
with pytest.raises(RuntimeError, match="ingestion boom"):
attack_paths_run(str(tenant.id), str(scan.id), "task-456")
failure_args = mock_finish.call_args[0]
assert failure_args[0] is attack_paths_scan
assert failure_args[1] == StateChoices.FAILED
assert failure_args[2] == {
"global_cartography_error": "Cartography failed: ingestion boom"
}
def test_run_returns_early_for_unsupported_provider(self, tenants_fixture):
tenant = tenants_fixture[0]
provider = Provider.objects.create(
provider=Provider.ProviderChoices.GCP,
uid="gcp-account",
alias="gcp",
tenant_id=tenant.id,
)
scan = Scan.objects.create(
name="GCP Scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.AVAILABLE,
tenant_id=tenant.id,
)
with (
patch(
"tasks.jobs.attack_paths.scan.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
),
patch(
"tasks.jobs.attack_paths.scan.initialize_prowler_provider",
return_value=MagicMock(),
),
patch(
"tasks.jobs.attack_paths.scan.get_cartography_ingestion_function",
return_value=None,
) as mock_get_ingestion,
patch(
"tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan"
) as mock_retrieve,
):
mock_retrieve.return_value = None
result = attack_paths_run(str(tenant.id), str(scan.id), "task-789")
assert result == {
"global_error": "Provider gcp is not supported for Attack Paths scans"
}
mock_get_ingestion.assert_called_once_with(provider.provider)
mock_retrieve.assert_called_once_with(str(tenant.id), str(scan.id))
@pytest.mark.django_db
class TestAttackPathsProwlerHelpers:
def test_create_indexes_executes_all_statements(self):
mock_session = MagicMock()
with patch("tasks.jobs.attack_paths.prowler.run_write_query") as mock_run_write:
prowler_module.create_indexes(mock_session)
assert mock_run_write.call_count == len(prowler_module.INDEX_STATEMENTS)
mock_run_write.assert_has_calls(
[call(mock_session, stmt) for stmt in prowler_module.INDEX_STATEMENTS]
)
def test_load_findings_batches_requests(self, providers_fixture):
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
# Create a generator that yields two batches
def findings_generator():
yield [{"id": "1", "resource_uid": "r-1"}]
yield [{"id": "2", "resource_uid": "r-2"}]
config = SimpleNamespace(update_tag=12345)
mock_session = MagicMock()
with (
patch(
"tasks.jobs.attack_paths.prowler.get_root_node_label",
return_value="AWSAccount",
),
patch(
"tasks.jobs.attack_paths.prowler.get_node_uid_field",
return_value="arn",
),
):
prowler_module.load_findings(
mock_session, findings_generator(), provider, config
)
assert mock_session.run.call_count == 2
for call_args in mock_session.run.call_args_list:
params = call_args.args[1]
assert params["provider_uid"] == str(provider.uid)
assert params["last_updated"] == config.update_tag
assert "findings_data" in params
def test_cleanup_findings_runs_batches(self, providers_fixture):
provider = providers_fixture[0]
config = SimpleNamespace(update_tag=1024)
mock_session = MagicMock()
first_batch = MagicMock()
first_batch.single.return_value = {"deleted_findings_count": 3}
second_batch = MagicMock()
second_batch.single.return_value = {"deleted_findings_count": 0}
mock_session.run.side_effect = [first_batch, second_batch]
prowler_module.cleanup_findings(mock_session, provider, config)
assert mock_session.run.call_count == 2
params = mock_session.run.call_args.args[1]
assert params["provider_uid"] == str(provider.uid)
assert params["last_updated"] == config.update_tag
def test_get_provider_last_scan_findings_returns_latest_scan_data(
self,
tenants_fixture,
providers_fixture,
):
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
resource = Resource.objects.create(
tenant_id=tenant.id,
provider=provider,
uid="resource-uid",
name="Resource",
region="us-east-1",
service="ec2",
type="instance",
)
older_scan = Scan.objects.create(
name="Older",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
)
old_finding = Finding.objects.create(
tenant_id=tenant.id,
uid="older-finding",
scan=older_scan,
delta=Finding.DeltaChoices.NEW,
status=StatusChoices.PASS,
status_extended="ok",
severity=Severity.low,
impact=Severity.low,
impact_extended="",
raw_result={},
check_id="check-old",
check_metadata={"checktitle": "Old"},
first_seen_at=older_scan.inserted_at,
)
ResourceFindingMapping.objects.create(
tenant_id=tenant.id,
resource=resource,
finding=old_finding,
)
latest_scan = Scan.objects.create(
name="Latest",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
)
finding = Finding.objects.create(
tenant_id=tenant.id,
uid="finding-uid",
scan=latest_scan,
delta=Finding.DeltaChoices.NEW,
status=StatusChoices.FAIL,
status_extended="failed",
severity=Severity.high,
impact=Severity.high,
impact_extended="",
raw_result={},
check_id="check-1",
check_metadata={"checktitle": "Check title"},
first_seen_at=latest_scan.inserted_at,
)
ResourceFindingMapping.objects.create(
tenant_id=tenant.id,
resource=resource,
finding=finding,
)
latest_scan.refresh_from_db()
with patch(
"tasks.jobs.attack_paths.prowler.rls_transaction",
new=lambda *args, **kwargs: nullcontext(),
), patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"default",
):
# Generator yields batches, collect all findings from all batches
findings_batches = prowler_module.get_provider_last_scan_findings(
provider,
str(latest_scan.id),
)
findings_data = []
for batch in findings_batches:
findings_data.extend(batch)
assert len(findings_data) == 1
finding_dict = findings_data[0]
assert finding_dict["id"] == str(finding.id)
assert finding_dict["resource_uid"] == resource.uid
assert finding_dict["check_title"] == "Check title"
assert finding_dict["scan_id"] == str(latest_scan.id)
def test_enrich_and_flatten_batch_single_resource(
self,
tenants_fixture,
providers_fixture,
):
"""One finding + one resource = one output dict"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
resource = Resource.objects.create(
tenant_id=tenant.id,
provider=provider,
uid="resource-uid-1",
name="Resource 1",
region="us-east-1",
service="ec2",
type="instance",
)
scan = Scan.objects.create(
name="Test Scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
)
finding = Finding.objects.create(
tenant_id=tenant.id,
uid="finding-uid",
scan=scan,
delta=Finding.DeltaChoices.NEW,
status=StatusChoices.FAIL,
status_extended="failed",
severity=Severity.high,
impact=Severity.high,
impact_extended="",
raw_result={},
check_id="check-1",
check_metadata={"checktitle": "Check title"},
first_seen_at=scan.inserted_at,
)
ResourceFindingMapping.objects.create(
tenant_id=tenant.id,
resource=resource,
finding=finding,
)
# Simulate the dict returned by .values()
finding_dict = {
"id": finding.id,
"uid": finding.uid,
"inserted_at": finding.inserted_at,
"updated_at": finding.updated_at,
"first_seen_at": finding.first_seen_at,
"scan_id": scan.id,
"delta": finding.delta,
"status": finding.status,
"status_extended": finding.status_extended,
"severity": finding.severity,
"check_id": finding.check_id,
"check_metadata__checktitle": finding.check_metadata["checktitle"],
"muted": finding.muted,
"muted_reason": finding.muted_reason,
}
# _enrich_and_flatten_batch queries ResourceFindingMapping directly
# No RLS mock needed - test DB doesn't enforce RLS policies
with patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"default",
):
result = prowler_module._enrich_and_flatten_batch([finding_dict])
assert len(result) == 1
assert result[0]["resource_uid"] == resource.uid
assert result[0]["id"] == str(finding.id)
assert result[0]["status"] == "FAIL"
def test_enrich_and_flatten_batch_multiple_resources(
self,
tenants_fixture,
providers_fixture,
):
"""One finding + three resources = three output dicts"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
resources = []
for i in range(3):
resource = Resource.objects.create(
tenant_id=tenant.id,
provider=provider,
uid=f"resource-uid-{i}",
name=f"Resource {i}",
region="us-east-1",
service="ec2",
type="instance",
)
resources.append(resource)
scan = Scan.objects.create(
name="Test Scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
)
finding = Finding.objects.create(
tenant_id=tenant.id,
uid="finding-uid",
scan=scan,
delta=Finding.DeltaChoices.NEW,
status=StatusChoices.FAIL,
status_extended="failed",
severity=Severity.high,
impact=Severity.high,
impact_extended="",
raw_result={},
check_id="check-1",
check_metadata={"checktitle": "Check title"},
first_seen_at=scan.inserted_at,
)
# Map finding to all 3 resources
for resource in resources:
ResourceFindingMapping.objects.create(
tenant_id=tenant.id,
resource=resource,
finding=finding,
)
finding_dict = {
"id": finding.id,
"uid": finding.uid,
"inserted_at": finding.inserted_at,
"updated_at": finding.updated_at,
"first_seen_at": finding.first_seen_at,
"scan_id": scan.id,
"delta": finding.delta,
"status": finding.status,
"status_extended": finding.status_extended,
"severity": finding.severity,
"check_id": finding.check_id,
"check_metadata__checktitle": finding.check_metadata["checktitle"],
"muted": finding.muted,
"muted_reason": finding.muted_reason,
}
# _enrich_and_flatten_batch queries ResourceFindingMapping directly
# No RLS mock needed - test DB doesn't enforce RLS policies
with patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"default",
):
result = prowler_module._enrich_and_flatten_batch([finding_dict])
assert len(result) == 3
result_resource_uids = {r["resource_uid"] for r in result}
assert result_resource_uids == {r.uid for r in resources}
# All should have same finding data
for r in result:
assert r["id"] == str(finding.id)
assert r["status"] == "FAIL"
def test_enrich_and_flatten_batch_no_resources_skips(
self,
tenants_fixture,
providers_fixture,
):
"""Finding without resources should be skipped"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan = Scan.objects.create(
name="Test Scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
)
finding = Finding.objects.create(
tenant_id=tenant.id,
uid="orphan-finding",
scan=scan,
delta=Finding.DeltaChoices.NEW,
status=StatusChoices.FAIL,
status_extended="failed",
severity=Severity.high,
impact=Severity.high,
impact_extended="",
raw_result={},
check_id="check-1",
check_metadata={"checktitle": "Check title"},
first_seen_at=scan.inserted_at,
)
# Note: No ResourceFindingMapping created
finding_dict = {
"id": finding.id,
"uid": finding.uid,
"inserted_at": finding.inserted_at,
"updated_at": finding.updated_at,
"first_seen_at": finding.first_seen_at,
"scan_id": scan.id,
"delta": finding.delta,
"status": finding.status,
"status_extended": finding.status_extended,
"severity": finding.severity,
"check_id": finding.check_id,
"check_metadata__checktitle": finding.check_metadata["checktitle"],
"muted": finding.muted,
"muted_reason": finding.muted_reason,
}
# Mock logger to verify no warning is emitted
with (
patch(
"tasks.jobs.attack_paths.prowler.READ_REPLICA_ALIAS",
"default",
),
patch("tasks.jobs.attack_paths.prowler.logger") as mock_logger,
):
result = prowler_module._enrich_and_flatten_batch([finding_dict])
assert len(result) == 0
mock_logger.warning.assert_not_called()
def test_generator_is_lazy(self, providers_fixture):
"""Generator should not execute queries until iterated"""
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
scan_id = "some-scan-id"
with (
patch("tasks.jobs.attack_paths.prowler.rls_transaction") as mock_rls,
patch("tasks.jobs.attack_paths.prowler.Finding") as mock_finding,
):
# Create generator but don't iterate
prowler_module.get_provider_last_scan_findings(provider, scan_id)
# Nothing should be called yet
mock_rls.assert_not_called()
mock_finding.objects.filter.assert_not_called()
def test_load_findings_empty_generator(self, providers_fixture):
"""Empty generator should not call neo4j"""
provider = providers_fixture[0]
provider.provider = Provider.ProviderChoices.AWS
provider.save()
mock_session = MagicMock()
config = SimpleNamespace(update_tag=12345)
def empty_gen():
return
yield # Make it a generator
with (
patch(
"tasks.jobs.attack_paths.prowler.get_root_node_label",
return_value="AWSAccount",
),
patch(
"tasks.jobs.attack_paths.prowler.get_node_uid_field",
return_value="arn",
),
):
prowler_module.load_findings(mock_session, empty_gen(), provider, config)
mock_session.run.assert_not_called()
@@ -8,7 +8,6 @@ from tasks.jobs.backfill import (
backfill_provider_compliance_scores,
backfill_resource_scan_summaries,
backfill_scan_category_summaries,
backfill_scan_resource_group_summaries,
)
from api.models import (
@@ -17,7 +16,6 @@ from api.models import (
ResourceScanSummary,
Scan,
ScanCategorySummary,
ScanGroupSummary,
StateChoices,
)
from prowler.lib.check.models import Severity
@@ -267,94 +265,6 @@ class TestBackfillScanCategorySummaries:
assert summary.new_failed_findings == 1
@pytest.fixture(scope="function")
def findings_with_group_fixture(scans_fixture, resources_fixture):
scan = scans_fixture[0]
resource = resources_fixture[0]
finding = Finding.objects.create(
tenant_id=scan.tenant_id,
uid="finding_with_group",
scan=scan,
delta="new",
status=Status.FAIL,
status_extended="test status",
impact=Severity.high,
impact_extended="test impact",
severity=Severity.high,
raw_result={"status": Status.FAIL},
check_id="test_check",
check_metadata={"CheckId": "test_check"},
resource_groups="ai_ml",
first_seen_at="2024-01-02T00:00:00Z",
)
finding.add_resources([resource])
return finding
@pytest.fixture(scope="function")
def scan_resource_group_summary_fixture(scans_fixture):
scan = scans_fixture[0]
return ScanGroupSummary.objects.create(
tenant_id=scan.tenant_id,
scan=scan,
resource_group="existing-group",
severity=Severity.high,
total_findings=1,
failed_findings=0,
new_failed_findings=0,
resources_count=1,
)
@pytest.mark.django_db
class TestBackfillScanGroupSummaries:
def test_already_backfilled(self, scan_resource_group_summary_fixture):
tenant_id = scan_resource_group_summary_fixture.tenant_id
scan_id = scan_resource_group_summary_fixture.scan_id
result = backfill_scan_resource_group_summaries(str(tenant_id), str(scan_id))
assert result == {"status": "already backfilled"}
def test_not_completed_scan(self, get_not_completed_scans):
for scan in get_not_completed_scans:
result = backfill_scan_resource_group_summaries(
str(scan.tenant_id), str(scan.id)
)
assert result == {"status": "scan is not completed"}
def test_no_resource_groups_to_backfill(self, scans_fixture):
scan = scans_fixture[1] # Failed scan with no findings
result = backfill_scan_resource_group_summaries(
str(scan.tenant_id), str(scan.id)
)
assert result == {"status": "no resource groups to backfill"}
def test_successful_backfill(self, findings_with_group_fixture):
finding = findings_with_group_fixture
tenant_id = str(finding.tenant_id)
scan_id = str(finding.scan_id)
result = backfill_scan_resource_group_summaries(tenant_id, scan_id)
# 1 resource group × 1 severity = 1 row
assert result == {"status": "backfilled", "resource_groups_count": 1}
summaries = ScanGroupSummary.objects.filter(
tenant_id=tenant_id, scan_id=scan_id
)
assert summaries.count() == 1
summary = summaries.first()
assert summary.resource_group == "ai_ml"
assert summary.severity == Severity.high
assert summary.total_findings == 1
assert summary.failed_findings == 1
assert summary.new_failed_findings == 1
assert summary.resources_count == 1
@pytest.mark.django_db
class TestBackfillProviderComplianceScores:
def test_no_completed_scans(self, tenants_fixture):
@@ -82,7 +82,7 @@ def test_check_provider_connection_exception(
[
{
"name": "OpenAI",
"api_key_decoded": "sk-fake-test-key-for-unit-testing-only",
"api_key_decoded": "sk-test1234567890T3BlbkFJtest1234567890",
"model": "gpt-4o",
"temperature": 0,
"max_tokens": 4000,
+30 -98
View File
@@ -1,60 +1,27 @@
from unittest.mock import call, patch
import pytest
from django.core.exceptions import ObjectDoesNotExist
from tasks.jobs.deletion import delete_provider, delete_tenant
from api.models import Provider, Tenant
from tasks.jobs.deletion import delete_provider, delete_tenant
@pytest.mark.django_db
class TestDeleteProvider:
def test_delete_provider_success(self, providers_fixture):
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
graph_db_names = ["graph-db-1", "graph-db-2"]
mock_get_provider_graph_database_names.return_value = graph_db_names
instance = providers_fixture[0]
tenant_id = str(instance.tenant_id)
result = delete_provider(tenant_id, instance.id)
instance = providers_fixture[0]
tenant_id = str(instance.tenant_id)
result = delete_provider(tenant_id, instance.id)
assert result
with pytest.raises(ObjectDoesNotExist):
Provider.objects.get(pk=instance.id)
mock_get_provider_graph_database_names.assert_called_once_with(
tenant_id, instance.id
)
mock_drop_database.assert_has_calls(
[call(graph_db_name) for graph_db_name in graph_db_names]
)
assert result
with pytest.raises(ObjectDoesNotExist):
Provider.objects.get(pk=instance.id)
def test_delete_provider_does_not_exist(self, tenants_fixture):
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
graph_db_names = ["graph-db-1"]
mock_get_provider_graph_database_names.return_value = graph_db_names
tenant_id = str(tenants_fixture[0].id)
non_existent_pk = "babf6796-cfcc-4fd3-9dcf-88d012247645"
tenant_id = str(tenants_fixture[0].id)
non_existent_pk = "babf6796-cfcc-4fd3-9dcf-88d012247645"
with pytest.raises(ObjectDoesNotExist):
delete_provider(tenant_id, non_existent_pk)
mock_get_provider_graph_database_names.assert_called_once_with(
tenant_id, non_existent_pk
)
mock_drop_database.assert_has_calls(
[call(graph_db_name) for graph_db_name in graph_db_names]
)
with pytest.raises(ObjectDoesNotExist):
delete_provider(tenant_id, non_existent_pk)
@pytest.mark.django_db
@@ -63,68 +30,33 @@ class TestDeleteTenant:
"""
Test successful deletion of a tenant and its related data.
"""
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
tenant = tenants_fixture[0]
providers = list(Provider.objects.filter(tenant_id=tenant.id))
tenant = tenants_fixture[0]
providers = Provider.objects.filter(tenant_id=tenant.id)
graph_db_names_per_provider = [
[f"graph-db-{provider.id}"] for provider in providers
]
mock_get_provider_graph_database_names.side_effect = (
graph_db_names_per_provider
)
# Ensure the tenant and related providers exist before deletion
assert Tenant.objects.filter(id=tenant.id).exists()
assert providers.exists()
# Ensure the tenant and related providers exist before deletion
assert Tenant.objects.filter(id=tenant.id).exists()
assert providers
# Call the function and validate the result
deletion_summary = delete_tenant(tenant.id)
# Call the function and validate the result
deletion_summary = delete_tenant(tenant.id)
assert deletion_summary is not None
assert not Tenant.objects.filter(id=tenant.id).exists()
assert not Provider.objects.filter(tenant_id=tenant.id).exists()
expected_calls = [
call(provider.tenant_id, provider.id) for provider in providers
]
mock_get_provider_graph_database_names.assert_has_calls(
expected_calls, any_order=True
)
assert mock_get_provider_graph_database_names.call_count == len(
expected_calls
)
expected_drop_calls = [
call(graph_db_name[0]) for graph_db_name in graph_db_names_per_provider
]
mock_drop_database.assert_has_calls(expected_drop_calls, any_order=True)
assert mock_drop_database.call_count == len(expected_drop_calls)
assert deletion_summary is not None
assert not Tenant.objects.filter(id=tenant.id).exists()
assert not Provider.objects.filter(tenant_id=tenant.id).exists()
def test_delete_tenant_with_no_providers(self, tenants_fixture):
"""
Test deletion of a tenant with no related providers.
"""
with patch(
"tasks.jobs.deletion.get_provider_graph_database_names"
) as mock_get_provider_graph_database_names, patch(
"tasks.jobs.deletion.graph_database.drop_database"
) as mock_drop_database:
tenant = tenants_fixture[1] # Assume this tenant has no providers
providers = Provider.objects.filter(tenant_id=tenant.id)
tenant = tenants_fixture[1] # Assume this tenant has no providers
providers = Provider.objects.filter(tenant_id=tenant.id)
# Ensure the tenant exists but has no related providers
assert Tenant.objects.filter(id=tenant.id).exists()
assert not providers.exists()
# Ensure the tenant exists but has no related providers
assert Tenant.objects.filter(id=tenant.id).exists()
assert not providers.exists()
# Call the function and validate the result
deletion_summary = delete_tenant(tenant.id)
# Call the function and validate the result
deletion_summary = delete_tenant(tenant.id)
assert deletion_summary == {} # No providers, so empty summary
assert not Tenant.objects.filter(id=tenant.id).exists()
mock_get_provider_graph_database_names.assert_not_called()
mock_drop_database.assert_not_called()
assert deletion_summary == {} # No providers, so empty summary
assert not Tenant.objects.filter(id=tenant.id).exists()
-16
View File
@@ -1380,8 +1380,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache: set[tuple[str, str, str, str]] = set()
mute_rules_cache = {}
scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {}
scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {}
group_resources_cache: dict[str, set] = {}
with (
patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction),
@@ -1400,8 +1398,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache,
mute_rules_cache,
scan_categories_cache,
scan_resource_groups_cache,
group_resources_cache,
)
created_finding = Finding.objects.get(uid=finding.uid)
@@ -1495,8 +1491,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache: set[tuple[str, str, str, str]] = set()
mute_rules_cache = {finding.uid: "Muted via rule"}
scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {}
scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {}
group_resources_cache: dict[str, set] = {}
with (
patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction),
@@ -1515,8 +1509,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache,
mute_rules_cache,
scan_categories_cache,
scan_resource_groups_cache,
group_resources_cache,
)
existing_resource.refresh_from_db()
@@ -1625,8 +1617,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache: set[tuple[str, str, str, str]] = set()
mute_rules_cache = {}
scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {}
scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {}
group_resources_cache: dict[str, set] = {}
with (
patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction),
@@ -1646,8 +1636,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache,
mute_rules_cache,
scan_categories_cache,
scan_resource_groups_cache,
group_resources_cache,
)
# Verify the long UID finding was NOT created
@@ -1725,8 +1713,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache: set[tuple[str, str, str, str]] = set()
mute_rules_cache = {}
scan_categories_cache: dict[tuple[str, str], dict[str, int]] = {}
scan_resource_groups_cache: dict[tuple[str, str], dict[str, int]] = {}
group_resources_cache: dict[str, set] = {}
with (
patch("tasks.jobs.scan.rls_transaction", new=noop_rls_transaction),
@@ -1745,8 +1731,6 @@ class TestProcessFindingMicroBatch:
scan_resource_cache,
mute_rules_cache,
scan_categories_cache,
scan_resource_groups_cache,
group_resources_cache,
)
# finding1: PASS, severity=low, categories=["gen-ai", "security"]
-283
View File
@@ -1,13 +1,10 @@
import uuid
from contextlib import contextmanager
from datetime import datetime, timezone
from unittest.mock import MagicMock, patch
import openai
import pytest
from botocore.exceptions import ClientError
from django_celery_beat.models import IntervalSchedule, PeriodicTask
from django_celery_results.models import TaskResult
from tasks.jobs.lighthouse_providers import (
_create_bedrock_client,
_extract_bedrock_credentials,
@@ -18,8 +15,6 @@ from tasks.tasks import (
check_integrations_task,
check_lighthouse_provider_connection_task,
generate_outputs_task,
perform_attack_paths_scan_task,
perform_scheduled_scan_task,
refresh_lighthouse_provider_models_task,
s3_integration_task,
security_hub_integration_task,
@@ -31,7 +26,6 @@ from api.models import (
LighthouseProviderModels,
Scan,
StateChoices,
Task,
)
@@ -743,12 +737,8 @@ class TestScanCompleteTasks:
@patch("tasks.tasks.generate_outputs_task.si")
@patch("tasks.tasks.generate_compliance_reports_task.si")
@patch("tasks.tasks.check_integrations_task.si")
@patch("tasks.tasks.perform_attack_paths_scan_task.apply_async")
@patch("tasks.tasks.can_provider_run_attack_paths_scan", return_value=False)
def test_scan_complete_tasks(
self,
mock_can_run_attack_paths,
mock_attack_paths_task,
mock_check_integrations_task,
mock_compliance_reports_task,
mock_outputs_task,
@@ -803,67 +793,6 @@ class TestScanCompleteTasks:
scan_id="scan-id",
)
# Attack Paths task should be skipped when provider cannot run it
mock_attack_paths_task.assert_not_called()
class TestAttackPathsTasks:
@staticmethod
@contextmanager
def _override_task_request(task, **attrs):
request = task.request
sentinel = object()
previous = {key: getattr(request, key, sentinel) for key in attrs}
for key, value in attrs.items():
setattr(request, key, value)
try:
yield
finally:
for key, prev in previous.items():
if prev is sentinel:
if hasattr(request, key):
delattr(request, key)
else:
setattr(request, key, prev)
def test_perform_attack_paths_scan_task_calls_runner(self):
with (
patch("tasks.tasks.attack_paths_scan") as mock_attack_paths_scan,
self._override_task_request(
perform_attack_paths_scan_task, id="celery-task-id"
),
):
mock_attack_paths_scan.return_value = {"status": "ok"}
result = perform_attack_paths_scan_task.run(
tenant_id="tenant-id", scan_id="scan-id"
)
mock_attack_paths_scan.assert_called_once_with(
tenant_id="tenant-id", scan_id="scan-id", task_id="celery-task-id"
)
assert result == {"status": "ok"}
def test_perform_attack_paths_scan_task_propagates_exception(self):
with (
patch(
"tasks.tasks.attack_paths_scan",
side_effect=RuntimeError("Exception to propagate"),
) as mock_attack_paths_scan,
self._override_task_request(
perform_attack_paths_scan_task, id="celery-task-error"
),
):
with pytest.raises(RuntimeError, match="Exception to propagate"):
perform_attack_paths_scan_task.run(
tenant_id="tenant-id", scan_id="scan-id"
)
mock_attack_paths_scan.assert_called_once_with(
tenant_id="tenant-id", scan_id="scan-id", task_id="celery-task-error"
)
@pytest.mark.django_db
class TestCheckIntegrationsTask:
@@ -2139,215 +2068,3 @@ class TestCleanupOrphanScheduledScans:
assert not Scan.objects.filter(id=orphan_scan.id).exists()
assert Scan.objects.filter(id=scheduled_scan.id).exists()
assert Scan.objects.filter(id=available_scan_other_task.id).exists()
@pytest.mark.django_db
class TestPerformScheduledScanTask:
"""Unit tests for perform_scheduled_scan_task."""
@staticmethod
@contextmanager
def _override_task_request(task, **attrs):
request = task.request
sentinel = object()
previous = {key: getattr(request, key, sentinel) for key in attrs}
for key, value in attrs.items():
setattr(request, key, value)
try:
yield
finally:
for key, prev in previous.items():
if prev is sentinel:
if hasattr(request, key):
delattr(request, key)
else:
setattr(request, key, prev)
def _create_periodic_task(self, provider_id, tenant_id, interval_hours=24):
interval, _ = IntervalSchedule.objects.get_or_create(
every=interval_hours, period="hours"
)
return PeriodicTask.objects.create(
name=f"scan-perform-scheduled-{provider_id}",
task="scan-perform-scheduled",
interval=interval,
kwargs=f'{{"tenant_id": "{tenant_id}", "provider_id": "{provider_id}"}}',
enabled=True,
)
def _create_task_result(self, tenant_id, task_id):
task_result = TaskResult.objects.create(
task_id=task_id,
task_name="scan-perform-scheduled",
status="STARTED",
date_created=datetime.now(timezone.utc),
)
Task.objects.create(
id=task_id, task_runner_task=task_result, tenant_id=tenant_id
)
return task_result
def test_skip_when_scheduled_scan_executing(
self, tenants_fixture, providers_fixture
):
"""Skip a scheduled run when another scheduled scan is already executing."""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
periodic_task = self._create_periodic_task(provider.id, tenant.id)
task_id = str(uuid.uuid4())
self._create_task_result(tenant.id, task_id)
executing_scan = Scan.objects.create(
tenant_id=tenant.id,
provider=provider,
name="Daily scheduled scan",
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
scheduler_task_id=periodic_task.id,
)
with (
patch("tasks.tasks.perform_prowler_scan") as mock_scan,
patch("tasks.tasks._perform_scan_complete_tasks") as mock_complete_tasks,
self._override_task_request(perform_scheduled_scan_task, id=task_id),
):
result = perform_scheduled_scan_task.run(
tenant_id=str(tenant.id), provider_id=str(provider.id)
)
mock_scan.assert_not_called()
mock_complete_tasks.assert_not_called()
assert result["id"] == str(executing_scan.id)
assert result["state"] == StateChoices.EXECUTING
assert (
Scan.objects.filter(
tenant_id=tenant.id,
provider=provider,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
).count()
== 0
)
def test_creates_next_scheduled_scan_after_completion(
self, tenants_fixture, providers_fixture
):
"""Create a next scheduled scan after a successful run completes."""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
self._create_periodic_task(provider.id, tenant.id)
task_id = str(uuid.uuid4())
self._create_task_result(tenant.id, task_id)
def _complete_scan(tenant_id, scan_id, provider_id):
other_scheduled = Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
).exclude(id=scan_id)
assert not other_scheduled.exists()
scan_instance = Scan.objects.get(id=scan_id)
scan_instance.state = StateChoices.COMPLETED
scan_instance.save()
return {"status": "ok"}
with (
patch("tasks.tasks.perform_prowler_scan", side_effect=_complete_scan),
patch("tasks.tasks._perform_scan_complete_tasks"),
self._override_task_request(perform_scheduled_scan_task, id=task_id),
):
perform_scheduled_scan_task.run(
tenant_id=str(tenant.id), provider_id=str(provider.id)
)
scheduled_scans = Scan.objects.filter(
tenant_id=tenant.id,
provider=provider,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
)
assert scheduled_scans.count() == 1
assert scheduled_scans.first().scheduled_at > datetime.now(timezone.utc)
assert (
Scan.objects.filter(
tenant_id=tenant.id,
provider=provider,
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
).count()
== 1
)
assert (
Scan.objects.filter(
tenant_id=tenant.id,
provider=provider,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.COMPLETED,
).count()
== 1
)
def test_dedupes_multiple_scheduled_scans_before_run(
self, tenants_fixture, providers_fixture
):
"""Ensure duplicated scheduled scans are removed before executing."""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
periodic_task = self._create_periodic_task(provider.id, tenant.id)
task_id = str(uuid.uuid4())
self._create_task_result(tenant.id, task_id)
scheduled_scan = Scan.objects.create(
tenant_id=tenant.id,
provider=provider,
name="Daily scheduled scan",
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
scheduled_at=datetime.now(timezone.utc),
scheduler_task_id=periodic_task.id,
)
duplicate_scan = Scan.objects.create(
tenant_id=tenant.id,
provider=provider,
name="Daily scheduled scan",
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.AVAILABLE,
scheduled_at=scheduled_scan.scheduled_at,
scheduler_task_id=periodic_task.id,
)
def _complete_scan(tenant_id, scan_id, provider_id):
other_scheduled = Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
).exclude(id=scan_id)
assert not other_scheduled.exists()
scan_instance = Scan.objects.get(id=scan_id)
scan_instance.state = StateChoices.COMPLETED
scan_instance.save()
return {"status": "ok"}
with (
patch("tasks.tasks.perform_prowler_scan", side_effect=_complete_scan),
patch("tasks.tasks._perform_scan_complete_tasks"),
self._override_task_request(perform_scheduled_scan_task, id=task_id),
):
perform_scheduled_scan_task.run(
tenant_id=str(tenant.id), provider_id=str(provider.id)
)
assert not Scan.objects.filter(id=duplicate_scan.id).exists()
assert Scan.objects.filter(id=scheduled_scan.id).exists()
assert (
Scan.objects.filter(
tenant_id=tenant.id,
provider=provider,
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
).count()
== 1
)
-59
View File
@@ -5,10 +5,6 @@ from enum import Enum
from django_celery_beat.models import PeriodicTask
from django_celery_results.models import TaskResult
from api.models import Scan, StateChoices
SCHEDULED_SCAN_NAME = "Daily scheduled scan"
class CustomEncoder(json.JSONEncoder):
def default(self, o):
@@ -75,58 +71,3 @@ def batched(iterable, batch_size):
batch = []
yield batch, True
def _get_or_create_scheduled_scan(
tenant_id: str,
provider_id: str,
scheduler_task_id: int,
scheduled_at: datetime,
update_state: bool = False,
) -> Scan:
"""
Get or create a scheduled scan, cleaning up duplicates if found.
Args:
tenant_id: The tenant ID.
provider_id: The provider ID.
scheduler_task_id: The PeriodicTask ID.
scheduled_at: The scheduled datetime for the scan.
update_state: If True, also reset state to SCHEDULED when updating.
Returns:
The scan instance to use.
"""
scheduled_scans = list(
Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
scheduler_task_id=scheduler_task_id,
).order_by("scheduled_at", "inserted_at")
)
if scheduled_scans:
scan_instance = scheduled_scans[0]
if len(scheduled_scans) > 1:
Scan.objects.filter(id__in=[s.id for s in scheduled_scans[1:]]).delete()
needs_update = scan_instance.scheduled_at != scheduled_at
if update_state and scan_instance.state != StateChoices.SCHEDULED:
scan_instance.state = StateChoices.SCHEDULED
scan_instance.name = SCHEDULED_SCAN_NAME
needs_update = True
if needs_update:
scan_instance.scheduled_at = scheduled_at
scan_instance.save()
return scan_instance
return Scan.objects.create(
tenant_id=tenant_id,
name=SCHEDULED_SCAN_NAME,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.SCHEDULED,
scheduled_at=scheduled_at,
scheduler_task_id=scheduler_task_id,
)
@@ -1,20 +0,0 @@
# prowler/contrib/aws/simulate_policy_client.py
from typing import Optional
from prowler.contrib.aws.simulate_policy.simulate_policy_service import IamSimulator
from prowler.providers.common.provider import Provider
_iam_simulator_client: Optional[IamSimulator] = None
def get_iam_simulator_client() -> IamSimulator:
global _iam_simulator_client
if _iam_simulator_client is None:
provider = Provider.get_global_provider()
if provider is None:
# Fail fast with a clear message if somehow called too early
raise RuntimeError(
"Global Provider is not initialized yet for IAM simulator."
)
_iam_simulator_client = IamSimulator(provider)
return _iam_simulator_client
@@ -1,200 +0,0 @@
# prowler/contrib/aws/simulate_policy_service.py
import json
import logging
from typing import Dict, List, Optional, Tuple
from botocore.exceptions import ClientError
from prowler.providers.common.provider import Provider
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# ======================================================================
# PURPOSE
# ----------------------------------------------------------------------
# This module provides a precise way to test IAM actions programmatically.
# It replicates the behaviour of the AWS CLI command:
# aws iam simulate-principal-policy --policy-source-arn arn:aws:iam::<account>:role/<role> --action-names <action>
#
# Use this when you need to validate whether a specific IAM role allows or denies
# certain actions against given resources.
#
# ======================================================================
# CLI ANALOGUE
# ----------------------------------------------------------------------
# Example equivalent CLI command:
# aws iam simulate-principal-policy \
# --policy-source-arn arn:aws:iam::278419598935:role/your-role \
# --action-names datazone:AcceptPredictions
#
# ======================================================================
# DOCUMENTATION
# ----------------------------------------------------------------------
# AWS IAM Policy Simulator:
# https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies_testing-policies.html
#
# IAM Condition Keys:
# https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_condition-keys.html
#
# Related AWS SDK discussion:
# https://github.com/aws/aws-sdk/issues/102
#
# ======================================================================
# LIMITATIONS
# ----------------------------------------------------------------------
# - The IAM Policy Simulator does NOT evaluate Service Control Policies (SCPs)
# that include conditions. This is a limitation of the API.
# - In environments where SCPs contain conditions, use
# `is_action_allowed_simulate_custom_policy` instead.
# - In environments without SCP conditions, `is_action_allowed_simulate_principal_policy`
# works as expected.
#
# ======================================================================
# USAGE
# ----------------------------------------------------------------------
# In your custom check:
#
# from prowler.contrib.aws.simulate_policy.simulate_policy_client import get_iam_simulator_client
#
# iam_sim = get_iam_simulator_client()
# policy_data = iam_sim.get_role_policy_data(role_name=role_name)
# iam_sim.is_action_allowed_simulate_custom_policy(
# policy_data=policy_data,
# action_names=[action],
# resource_arns=["*"]
# )
#
#
# ======================================================================
class IamSimulator:
"""
Helper for IAM Policy Simulator:
- simulate_principal_policy
- simulate_custom_policy
- collect role inline/managed policies
"""
def __init__(self, provider: Provider) -> None:
boto3_session = provider.session.current_session
# IAM is a global service. Region is optional; we can use the provider's global region
# to stay consistent across partitions.
try:
region_name = provider.get_global_region()
except AttributeError:
# Fallback if provider lacks the helper (older trees)
region_name = boto3_session.region_name or "us-east-1"
self.iam = boto3_session.client("iam", region_name=region_name)
def is_action_allowed_simulate_principal_policy(
self,
principal_arn: str,
action_names: List[str],
resource_arns: Optional[List[str]] = None,
) -> Tuple[bool, Dict]:
if resource_arns is None:
resource_arns = ["*"]
try:
resp = self.iam.simulate_principal_policy(
PolicySourceArn=principal_arn,
ActionNames=action_names,
ResourceArns=resource_arns,
)
allowed = any(
r.get("EvalDecision") == "allowed"
for r in resp.get("EvaluationResults", [])
)
return allowed, resp
except ClientError as e:
logger.error("simulate_principal_policy failed: %s", e, exc_info=True)
return False, {"error": str(e)}
def get_role_policy_data(self, role_name: str) -> Dict[str, List]:
inline_names: List[str] = []
inline_docs: List[Dict] = []
managed_names: List[str] = []
managed_docs: List[Dict] = []
# Inline policies
inline_resp = self.iam.list_role_policies(RoleName=role_name)
inline_names = inline_resp.get("PolicyNames", [])
for pname in inline_names:
pol_resp = self.iam.get_role_policy(RoleName=role_name, PolicyName=pname)
inline_docs.append(pol_resp["PolicyDocument"]) # dict
# Managed policies
managed_resp = self.iam.list_attached_role_policies(RoleName=role_name)
for attached in managed_resp.get("AttachedPolicies", []):
managed_names.append(attached["PolicyName"])
pol_meta = self.iam.get_policy(PolicyArn=attached["PolicyArn"])["Policy"]
pol_ver = self.iam.get_policy_version(
PolicyArn=attached["PolicyArn"], VersionId=pol_meta["DefaultVersionId"]
)
managed_docs.append(pol_ver["PolicyVersion"]["Document"]) # dict
return {
"inline_policy_names": inline_names,
"inline_policy_data": inline_docs,
"managed_policy_names": managed_names,
"managed_policy_data": managed_docs,
}
def is_action_allowed_simulate_custom_policy(
self,
policy_data: Dict[str, List],
action_names: List[str],
resource_arns: Optional[List[str]] = None,
) -> Tuple[bool, Dict]:
names = policy_data.get("inline_policy_names", []) + policy_data.get(
"managed_policy_names", []
)
docs = policy_data.get("inline_policy_data", []) + policy_data.get(
"managed_policy_data", []
)
results: Dict[str, List] = {"policies": []}
any_allowed = False
if resource_arns is None:
resource_arns = ["*"]
for idx, doc in enumerate(docs):
name = names[idx] if idx < len(names) else f"policy_{idx}"
try:
sim_resp = self.iam.simulate_custom_policy(
PolicyInputList=[json.dumps(doc)],
ActionNames=action_names,
ResourceArns=resource_arns,
)
except ClientError as e:
logger.error(
"simulate_custom_policy failed for %s: %s", name, e, exc_info=True
)
results["policies"].append({"policy_name": name, "error": str(e)})
continue
per_action = []
for ev in sim_resp.get("EvaluationResults", []):
decision = ev.get(
"EvalDecision"
) # allowed | explicitDeny | implicitDeny
per_action.append(
{
"action": ev.get("EvalActionName"),
"decision": decision,
"matching_statements": ev.get("MatchedStatements", []),
"missing_context_values": ev.get("MissingContextValues", []),
}
)
if decision == "allowed":
any_allowed = True
results["policies"].append({"policy_name": name, "evaluations": per_action})
return any_allowed, results
+1 -46
View File
@@ -1,7 +1,6 @@
services:
api-dev:
hostname: "prowler-api"
image: prowler-api-dev
build:
context: ./api
dockerfile: Dockerfile
@@ -25,8 +24,6 @@ services:
condition: service_healthy
valkey:
condition: service_healthy
neo4j:
condition: service_healthy
entrypoint:
- "/home/prowler/docker-entrypoint.sh"
- "dev"
@@ -88,41 +85,7 @@ services:
timeout: 5s
retries: 3
neo4j:
image: graphstack/dozerdb:5.26.3.0
hostname: "neo4j"
volumes:
- ./_data/neo4j:/data
environment:
# We can't add our .env file because some of our current variables are not compatible with Neo4j env vars
# Auth
- NEO4J_AUTH=${NEO4J_USER}/${NEO4J_PASSWORD}
# Memory limits
- NEO4J_dbms_max__databases=${NEO4J_DBMS_MAX__DATABASES:-1000}
- NEO4J_server_memory_pagecache_size=${NEO4J_SERVER_MEMORY_PAGECACHE_SIZE:-1G}
- NEO4J_server_memory_heap_initial__size=${NEO4J_SERVER_MEMORY_HEAP_INITIAL__SIZE:-1G}
- NEO4J_server_memory_heap_max__size=${NEO4J_SERVER_MEMORY_HEAP_MAX__SIZE:-1G}
# APOC
- apoc.export.file.enabled=${NEO4J_POC_EXPORT_FILE_ENABLED:-true}
- apoc.import.file.enabled=${NEO4J_APOC_IMPORT_FILE_ENABLED:-true}
- apoc.import.file.use_neo4j_config=${NEO4J_APOC_IMPORT_FILE_USE_NEO4J_CONFIG:-true}
- "NEO4J_PLUGINS=${NEO4J_PLUGINS:-[\"apoc\"]}"
- "NEO4J_dbms_security_procedures_allowlist=${NEO4J_DBMS_SECURITY_PROCEDURES_ALLOWLIST:-apoc.*}"
- "NEO4J_dbms_security_procedures_unrestricted=${NEO4J_DBMS_SECURITY_PROCEDURES_UNRESTRICTED:-apoc.*}"
# Networking
- "dbms.connector.bolt.listen_address=${NEO4J_DBMS_CONNECTOR_BOLT_LISTEN_ADDRESS:-0.0.0.0:7687}"
# 7474 is the UI port
ports:
- 7474:7474
- ${NEO4J_PORT:-7687}:7687
healthcheck:
test: ["CMD", "wget", "--no-verbose", "http://localhost:7474"]
interval: 10s
timeout: 10s
retries: 10
worker-dev:
image: prowler-api-dev
build:
context: ./api
dockerfile: Dockerfile
@@ -133,23 +96,17 @@ services:
- path: .env
required: false
volumes:
- ./api/src/backend:/home/prowler/backend
- ./api/pyproject.toml:/home/prowler/pyproject.toml
- ./api/docker-entrypoint.sh:/home/prowler/docker-entrypoint.sh
- outputs:/tmp/prowler_api_output
- "outputs:/tmp/prowler_api_output"
depends_on:
valkey:
condition: service_healthy
postgres:
condition: service_healthy
neo4j:
condition: service_healthy
entrypoint:
- "/home/prowler/docker-entrypoint.sh"
- "worker"
worker-beat:
image: prowler-api-dev
build:
context: ./api
dockerfile: Dockerfile
@@ -164,8 +121,6 @@ services:
condition: service_healthy
postgres:
condition: service_healthy
neo4j:
condition: service_healthy
entrypoint:
- "../docker-entrypoint.sh"
- "beat"
-33
View File
@@ -21,8 +21,6 @@ services:
condition: service_healthy
valkey:
condition: service_healthy
neo4j:
condition: service_healthy
entrypoint:
- "/home/prowler/docker-entrypoint.sh"
- "prod"
@@ -74,37 +72,6 @@ services:
timeout: 5s
retries: 3
neo4j:
image: graphstack/dozerdb:5.26.3.0
hostname: "neo4j"
volumes:
- ./_data/neo4j:/data
environment:
# We can't add our .env file because some of our current variables are not compatible with Neo4j env vars
# Auth
- NEO4J_AUTH=${NEO4J_USER}/${NEO4J_PASSWORD}
# Memory limits
- NEO4J_dbms_max__databases=${NEO4J_DBMS_MAX__DATABASES:-1000}
- NEO4J_server_memory_pagecache_size=${NEO4J_SERVER_MEMORY_PAGECACHE_SIZE:-1G}
- NEO4J_server_memory_heap_initial__size=${NEO4J_SERVER_MEMORY_HEAP_INITIAL__SIZE:-1G}
- NEO4J_server_memory_heap_max__size=${NEO4J_SERVER_MEMORY_HEAP_MAX__SIZE:-1G}
# APOC
- apoc.export.file.enabled=${NEO4J_POC_EXPORT_FILE_ENABLED:-true}
- apoc.import.file.enabled=${NEO4J_APOC_IMPORT_FILE_ENABLED:-true}
- apoc.import.file.use_neo4j_config=${NEO4J_APOC_IMPORT_FILE_USE_NEO4J_CONFIG:-true}
- "NEO4J_PLUGINS=${NEO4J_PLUGINS:-[\"apoc\"]}"
- "NEO4J_dbms_security_procedures_allowlist=${NEO4J_DBMS_SECURITY_PROCEDURES_ALLOWLIST:-apoc.*}"
- "NEO4J_dbms_security_procedures_unrestricted=${NEO4J_DBMS_SECURITY_PROCEDURES_UNRESTRICTED:-apoc.*}"
# Networking
- "dbms.connector.bolt.listen_address=${NEO4J_DBMS_CONNECTOR_BOLT_LISTEN_ADDRESS:-0.0.0.0:7687}"
ports:
- ${NEO4J_PORT:-7687}:7687
healthcheck:
test: ["CMD", "wget", "--no-verbose", "http://localhost:7474"]
interval: 10s
timeout: 10s
retries: 10
worker:
image: prowlercloud/prowler-api:${PROWLER_API_VERSION:-stable}
env_file:
+11
View File
@@ -0,0 +1,11 @@
---
title: 'Contact Us'
---
For technical support or any type of inquiries, you are very welcome to:
- Reach out to community members on the [**Prowler Slack channel**](https://goto.prowler.com/slack)
- Open an Issue or a Pull Request in our [**GitHub repository**](https://github.com/prowler-cloud/prowler).
We will appreciate all types of feedback and contribution, Prowler would not be the same without our vibrant community! 😃
+4 -7
View File
@@ -128,10 +128,8 @@ flowchart TB
P5["prowler-mcp"]
P6["prowler-provider"]
P7["prowler-compliance"]
P8["prowler-compliance-review"]
P9["prowler-docs"]
P10["prowler-pr"]
P11["prowler-ci"]
P8["prowler-docs"]
P9["prowler-pr"]
end
subgraph TESTING["Testing Skills"]
@@ -142,7 +140,6 @@ flowchart TB
subgraph META["Meta Skills"]
M1["skill-creator"]
M2["skill-sync"]
end
end
@@ -192,9 +189,9 @@ flowchart TB
| Type | Skills |
|------|--------|
| **Generic** | typescript, react-19, nextjs-15, tailwind-4, pytest, playwright, django-drf, zod-4, zustand-5, ai-sdk-5 |
| **Prowler** | prowler, prowler-sdk-check, prowler-api, prowler-ui, prowler-mcp, prowler-provider, prowler-compliance, prowler-compliance-review, prowler-docs, prowler-pr, prowler-ci |
| **Prowler** | prowler, prowler-sdk-check, prowler-api, prowler-ui, prowler-mcp, prowler-provider, prowler-compliance, prowler-docs, prowler-pr |
| **Testing** | prowler-test-sdk, prowler-test-api, prowler-test-ui |
| **Meta** | skill-creator, skill-sync |
| **Meta** | skill-creator |
## Skill Structure
+4 -29
View File
@@ -248,13 +248,6 @@
"user-guide/providers/mongodbatlas/authentication"
]
},
{
"group": "Cloudflare",
"pages": [
"user-guide/providers/cloudflare/getting-started-cloudflare",
"user-guide/providers/cloudflare/authentication"
]
},
{
"group": "LLM",
"pages": [
@@ -331,28 +324,14 @@
},
{
"tab": "Security",
"groups": [
{
"group": "Security & Compliance",
"pages": [
"security/index",
"security/software-security"
]
},
{
"group": "Prowler Cloud",
"pages": [
"security/encryption",
"security/data-regions",
"security/networking"
]
}
"pages": [
"security"
]
},
{
"tab": "Support",
"tab": "Contact Us",
"pages": [
"support"
"contact"
]
},
{
@@ -473,10 +452,6 @@
{
"source": "/projects/prowler-open-source/en/latest/tutorials/:slug*",
"destination": "/user-guide/tutorials/:slug*"
},
{
"source": "/contact",
"destination": "/support"
}
]
}
@@ -115,8 +115,8 @@ To update the environment file:
Edit the `.env` file and change version values:
```env
PROWLER_UI_VERSION="5.17.1"
PROWLER_API_VERSION="5.17.1"
PROWLER_UI_VERSION="5.16.0"
PROWLER_API_VERSION="5.16.0"
```
<Note>
-1
View File
@@ -32,7 +32,6 @@ The supported providers right now are:
| [M365](/user-guide/providers/microsoft365/getting-started-m365) | Official | Tenants | UI, API, CLI |
| [Github](/user-guide/providers/github/getting-started-github) | Official | Organizations / Repositories | UI, API, CLI |
| [Oracle Cloud](/user-guide/providers/oci/getting-started-oci) | Official | Tenancies / Compartments | UI, API, CLI |
| [Cloudflare](/user-guide/providers/cloudflare/getting-started-cloudflare) | Official | Accounts | CLI |
| [Infra as Code](/user-guide/providers/iac/getting-started-iac) | Official | Repositories | UI, API, CLI |
| [MongoDB Atlas](/user-guide/providers/mongodbatlas/getting-started-mongodbatlas) | Official | Organizations | UI, API, CLI |
| [LLM](/user-guide/providers/llm/getting-started-llm) | Official | Models | CLI |
+163
View File
@@ -0,0 +1,163 @@
---
title: 'Security'
---
## Compliance and Trust
We publish our live SOC 2 Type 2 Compliance data at [https://trust.prowler.com](https://trust.prowler.com)
As an **AWS Partner**, we have passed the [AWS Foundation Technical Review (FTR)](https://aws.amazon.com/partners/foundational-technical-review/).
## Encryption (Prowler Cloud)
We use encryption everywhere possible. The data and communications used by **Prowler Cloud** are **encrypted at-rest** and **in-transit**.
## Data Retention Policy (Prowler Cloud)
Prowler Cloud is GDPR compliant in regards to personal data and the ["right to be forgotten"](https://gdpr.eu/right-to-be-forgotten/). When a user deletes their account their user information will be deleted from Prowler Cloud online and backup systems within 10 calendar days.
## Software Security
We follow a **security-by-design approach** throughout our software development lifecycle. All changes go through automated checks at every stage, from local development to production deployment.
We enforce [pre-commit](https://github.com/prowler-cloud/prowler/blob/master/.pre-commit-config.yaml) validations to catch issues early, and [our CI/CD pipelines](https://github.com/prowler-cloud/prowler/tree/master/.github) include multiple security gates to ensure code quality, secure configurations, and compliance with internal standards.
Our container registries are continuously scanned for vulnerabilities, with findings automatically reported to our security team for assessment and remediation. This process evolves alongside our stack as we adopt new languages, frameworks, and technologies, ensuring our security practices remain comprehensive, proactive, and adaptable.
### Static Application Security Testing (SAST)
We employ multiple SAST tools across our codebase to identify security vulnerabilities, code quality issues, and potential bugs during development:
#### CodeQL Analysis
- **Scope**: UI (JavaScript/TypeScript), API (Python), and SDK (Python)
- **Frequency**: On every push and pull request, plus daily scheduled scans
- **Integration**: Results uploaded to GitHub Security tab via SARIF format
- **Purpose**: Identifies security vulnerabilities, coding errors, and potential exploits in source code
#### Python Security Scanners
- **Bandit**: Detects common security issues in Python code (SQL injection, hardcoded passwords, etc.)
- Configured to ignore test files and report only high-severity issues
- Runs on both SDK and API codebases
- **Pylint**: Static code analysis with security-focused checks
- Integrated into pre-commit hooks and CI/CD pipelines
#### Code Quality & Dead Code Detection
- **Vulture**: Identifies unused code that could indicate incomplete implementations or security gaps
- **Flake8**: Style guide enforcement with security-relevant checks
- **Shellcheck**: Security and correctness checks for shell scripts
### Software Composition Analysis (SCA)
We continuously monitor our dependencies for known vulnerabilities and ensure timely updates:
#### Dependency Vulnerability Scanning
- **Safety**: Scans Python dependencies against known vulnerability databases
- Runs on every commit via pre-commit hooks
- Integrated into CI/CD for SDK and API
- Configured with selective ignores for tracked exceptions
- **Trivy**: Multi-purpose scanner for containers and dependencies
- Scans all container images (UI, API, SDK, MCP Server)
- Checks for vulnerabilities in OS packages and application dependencies
- Reports findings to GitHub Security tab
#### Automated Dependency Updates
- **Dependabot**: Automated pull requests for dependency updates
- **Python (pip)**: Monthly updates for SDK
- **GitHub Actions**: Monthly updates for workflow dependencies
- **Docker**: Monthly updates for base images
- Temporarily paused for API and UI to maintain stability during active development
- **Security-first approach**: Even when paused, Dependabot automatically creates pull requests for security vulnerabilities, ensuring critical security patches are never delayed
### Container Security
All container images are scanned before deployment:
- **Trivy Vulnerability Scanning**:
- Scans images for vulnerabilities and misconfigurations
- Generates SARIF reports uploaded to GitHub Security tab
- Creates PR comments with scan summaries
- Configurable to fail builds on critical findings
- Reports include CVE counts and remediation guidance
- **Hadolint**: Dockerfile linting to enforce best practices
- Validates Dockerfile syntax and structure
- Ensures secure image building practices
### Secrets Detection
We protect against accidental exposure of sensitive credentials:
- **TruffleHog**: Scans entire codebase and Git history for secrets
- Runs on every push and pull request
- Pre-commit hook prevents committing secrets
- Detects high-entropy strings, API keys, tokens, and credentials
- Configured to report verified and unknown findings
### Security Monitoring
- **GitHub Security Tab**: Centralized view of all security findings from CodeQL, Trivy, and other SARIF-compatible tools
- **Artifact Retention**: Security scan reports retained for post-deployment analysis
- **PR Comments**: Automated security feedback on pull requests for rapid remediation
## Reporting Vulnerabilities
At Prowler, we consider the security of our open source software and systems a top priority. But no matter how much effort we put into system security, there can still be vulnerabilities present.
If you discover a vulnerability, we would like to know about it so we can take steps to address it as quickly as possible. We would like to ask you to help us better protect our users, our clients and our systems.
When reporting vulnerabilities, please consider (1) attack scenario / exploitability, and (2) the security impact of the bug. The following issues are considered out of scope:
- Social engineering support or attacks requiring social engineering.
- Clickjacking on pages with no sensitive actions.
- Cross-Site Request Forgery (CSRF) on unauthenticated forms or forms with no sensitive actions.
- Attacks requiring Man-In-The-Middle (MITM) or physical access to a user's device.
- Previously known vulnerable libraries without a working Proof of Concept (PoC).
- Comma Separated Values (CSV) injection without demonstrating a vulnerability.
- Missing best practices in SSL/TLS configuration.
- Any activity that could lead to the disruption of service (DoS).
- Rate limiting or brute force issues on non-authentication endpoints.
- Missing best practices in Content Security Policy (CSP).
- Missing HttpOnly or Secure flags on cookies.
- Configuration of or missing security headers.
- Missing email best practices, such as invalid, incomplete, or missing SPF/DKIM/DMARC records.
- Vulnerabilities only affecting users of outdated or unpatched browsers (less than two stable versions behind).
- Software version disclosure, banner identification issues, or descriptive error messages.
- Tabnabbing.
- Issues that require unlikely user interaction.
- Improper logout functionality and improper session timeout.
- CORS misconfiguration without an exploitation scenario.
- Broken link hijacking.
- Automated scanning results (e.g., sqlmap, Burp active scanner) that have not been manually verified.
- Content spoofing and text injection issues without a clear attack vector.
- Email spoofing without exploiting security flaws.
- Dead links or broken links.
- User enumeration.
Testing guidelines:
- Do not run automated scanners on other customer projects. Running automated scanners can run up costs for our users. Aggressively configured scanners might inadvertently disrupt services, exploit vulnerabilities, lead to system instability or breaches and violate Terms of Service from our upstream providers. Our own security systems won't be able to distinguish hostile reconnaissance from whitehat research. If you wish to run an automated scanner, notify us at support@prowler.com and only run it on your own Prowler app project. Do NOT attack Prowler in usage of other customers.
- Do not take advantage of the vulnerability or problem you have discovered, for example by downloading more data than necessary to demonstrate the vulnerability or deleting or modifying other people's data.
Reporting guidelines:
- File a report through our Support Desk at https://support.prowler.com
- If it is about a lack of a security functionality, please file a feature request instead at https://github.com/prowler-cloud/prowler/issues
- Do provide sufficient information to reproduce the problem, so we will be able to resolve it as quickly as possible.
- If you have further questions and want direct interaction with the Prowler team, please contact us at via our Community Slack at goto.prowler.com/slack.
Disclosure guidelines:
- In order to protect our users and customers, do not reveal the problem to others until we have researched, addressed and informed our affected customers.
- If you want to publicly share your research about Prowler at a conference, in a blog or any other public forum, you should share a draft with us for review and approval at least 30 days prior to the publication date. Please note that the following should not be included:
- Data regarding any Prowler user or customer projects.
- Prowler customers' data.
- Information about Prowler employees, contractors or partners.
What we promise:
- We will respond to your report within 5 business days with our evaluation of the report and an expected resolution date.
- If you have followed the instructions above, we will not take any legal action against you in regard to the report.
- We will handle your report with strict confidentiality, and not pass on your personal details to third parties without your permission.
- We will keep you informed of the progress towards resolving the problem.
- In the public information concerning the problem reported, we will give your name as the discoverer of the problem (unless you desire otherwise).
We strive to resolve all problems as quickly as possible, and we would like to play an active role in the ultimate publication on the problem after it is resolved.
-25
View File
@@ -1,25 +0,0 @@
---
title: 'Data Regions & Availability'
---
Prowler Cloud runs on AWS with high availability built in.
## Regions
| Region | URL | Location |
|--------|-----|----------|
| **EU** | [cloud.prowler.com](https://cloud.prowler.com) | Ireland (`eu-west-1`) |
## Business Continuity
| Control | Details |
|---------|---------|
| **High Availability** | Multi-AZ databases and load-balanced stateless application layer on AWS |
| **Disaster Recovery** | Encrypted backups, tested regularly |
| **[RPO](https://en.wikipedia.org/wiki/Recovery_point_objective)** | 24 hours |
| **[RTO](https://en.wikipedia.org/wiki/Recovery_time_objective)** | 2 hours |
| **Status** | [status.prowler.com](https://status.prowler.com) — uptime history and incidents |
## Contact
For questions about data regions and availability, visit the [Support page](/support).
-25
View File
@@ -1,25 +0,0 @@
---
title: 'Encryption'
---
Prowler Cloud uses encryption everywhere possible. All data and communications are encrypted at rest and in transit.
## Encryption at Rest
All data stored in Prowler Cloud is encrypted at rest using AES-256 encryption, including:
- **Database contents:** All scan results, findings, and configuration data.
- **File storage:** Reports, exports, and uploaded files.
- **Backups:** All backup data is encrypted.
## Encryption in Transit
All communications with Prowler Cloud are encrypted in transit using TLS 1.2 or higher, including:
- **API requests:** All REST API communications.
- **Web application traffic:** Browser-to-server connections.
- **Internal service communication:** Service-to-service traffic within the platform.
## Contact
For questions regarding encryption, visit the [Support page](/support).
-76
View File
@@ -1,76 +0,0 @@
---
title: 'Security & Compliance'
---
**Prowler secures itself with Prowler.** As an open-source cloud security platform trusted by thousands of organizations, Prowler applies the same rigorous security standards internally that customers achieve externally.
All security tooling, configurations, and CI/CD pipelines are publicly available in the [Prowler GitHub repository](https://github.com/prowler-cloud/prowler). Transparency is fundamental to open-source security.
## Software Security
All Prowler code goes through the same security pipeline, whether running on Prowler Cloud or self-managed infrastructure: DAST, SAST, SCA, container scanning, and secrets detection on every build.
<Card title="Software Security" icon="code" href="/security/software-security">
Security tools and practices applied to all Prowler code.
</Card>
## Prowler Cloud vs Self-Managed
| | Prowler Cloud | Self-Managed |
|--|---------------|--------------|
| **Deployment** | Fully managed SaaS | Own infrastructure |
| **Region** | EU (Ireland) | Any region or provider |
| **Compliance** | SOC 2 Type II, AWS FTR | Organization responsibility |
| **Data Control** | Prowler managed | Full control |
| **Encryption** | AES-256 at rest, TLS 1.2+ in transit | Configurable |
| **Backups** | Automated | Organization responsibility |
| **Updates** | Automatic | Manual |
<Note>
Self-Managed includes Prowler App and Prowler CLI. They can run anywhere — any cloud provider, any region, on-premises, or air-gapped environments. Full control over data residency and infrastructure decisions. See the [Prowler App Installation Guide](/getting-started/installation/prowler-app) to get started.
</Note>
---
## Prowler Cloud
This section covers security and compliance for **Prowler Cloud**, the managed infrastructure.
### Trust & Compliance
Prowler Cloud holds compliance certifications and undergoes regular audits.
| Certification | Status |
|---------------|--------|
| **SOC 2 Type II** | [View on Trust Portal](https://trust.prowler.com) |
| **AWS Foundational Technical Review (FTR)** | Passed — [Details](https://aws.amazon.com/partners/foundational-technical-review/) |
Compliance data and reports: [trust.prowler.com](https://trust.prowler.com)
### Security
<Columns cols={3}>
<Card title="Encryption" icon="lock" href="/security/encryption">
Data encrypted at rest (AES-256) and in transit (TLS 1.2+).
</Card>
<Card title="Data Regions" icon="globe" href="/security/data-regions">
EU-hosted infrastructure with high availability and disaster recovery.
</Card>
<Card title="Networking" icon="network-wired" href="/security/networking">
Static egress IPs for firewall allowlisting.
</Card>
</Columns>
### Privacy
Prowler Cloud is GDPR compliant in regard to the ["right to be forgotten"](https://gdpr.eu/right-to-be-forgotten/). When an account is deleted, user information is removed from online and backup systems within 10 calendar days.
---
## Report a Vulnerability
Found a security issue? Report it through the [responsible disclosure](https://prowler.com/.well-known/security.txt) process.
## Contact
For security inquiries or general support, visit the [Support page](/support).
-21
View File
@@ -1,21 +0,0 @@
---
title: 'Networking'
---
## Egress IP Addresses
Prowler Cloud makes outbound API calls to scan cloud provider accounts and connect to integrations. Allowlist these IPs in firewalls or security groups to restrict access to Prowler Cloud only.
| Region | IP Address |
|--------|------------|
| EU (Ireland) | `52.48.254.174` |
Resolve the egress IP via DNS:
```bash
dig egress.prowler.com +short
```
## Contact
For questions about networking, visit the [Support page](/support).
-97
View File
@@ -1,97 +0,0 @@
---
title: 'Software Security'
---
Prowler follows a **security-by-design approach** throughout the software development lifecycle. All changes go through automated checks at every stage, from local development to production deployment.
[Pre-commit](https://github.com/prowler-cloud/prowler/blob/master/.pre-commit-config.yaml) validations catch issues early, and [CI/CD pipelines](https://github.com/prowler-cloud/prowler/tree/master/.github) include multiple security gates ensuring code quality, secure configurations, and compliance with internal standards.
Container registries are continuously scanned for vulnerabilities, with findings automatically reported to the security team for assessment and remediation. This process evolves alongside the stack as new languages, frameworks, and technologies are adopted, ensuring security practices remain comprehensive, proactive, and adaptable.
## Static Application Security Testing (SAST)
Multiple SAST tools are employed across the codebase to identify security vulnerabilities, code quality issues, and potential bugs during development.
### CodeQL Analysis
- **Scope:** UI (JavaScript/TypeScript), API (Python), and SDK (Python)
- **Frequency:** On every push and pull request, plus daily scheduled scans
- **Integration:** Results uploaded to GitHub Security tab via SARIF format
- **Purpose:** Identifies security vulnerabilities, coding errors, and potential exploits in source code
### Python Security Scanners
- **Bandit:** Detects common security issues in Python code (SQL injection, hardcoded passwords, etc.)
- Configured to ignore test files and report only high-severity issues
- Runs on both SDK and API codebases
- **Pylint:** Static code analysis with security-focused checks
- Integrated into pre-commit hooks and CI/CD pipelines
### Code Quality & Dead Code Detection
- **Vulture:** Identifies unused code that could indicate incomplete implementations or security gaps
- **Flake8:** Style guide enforcement with security-relevant checks
- **Shellcheck:** Security and correctness checks for shell scripts
## Software Composition Analysis (SCA)
Dependencies are continuously monitored for known vulnerabilities with timely updates ensured.
### Dependency Vulnerability Scanning
- **Safety:** Scans Python dependencies against known vulnerability databases
- Runs on every commit via pre-commit hooks
- Integrated into CI/CD for SDK and API
- Configured with selective ignores for tracked exceptions
- **Trivy:** Multi-purpose scanner for containers and dependencies
- Scans all container images (UI, API, SDK, MCP Server)
- Checks for vulnerabilities in OS packages and application dependencies
- Reports findings to GitHub Security tab
### Automated Dependency Updates
- **Dependabot:** Automated pull requests for dependency updates
- **Python (pip):** Monthly updates for SDK
- **GitHub Actions:** Monthly updates for workflow dependencies
- **Docker:** Monthly updates for base images
- Temporarily paused for API and UI to maintain stability during active development
- **Security-first approach:** Even when paused, Dependabot automatically creates pull requests for security vulnerabilities, ensuring critical security patches are never delayed
## Container Security
All container images are scanned before deployment.
### Trivy Vulnerability Scanning
- Scans images for vulnerabilities and misconfigurations
- Generates SARIF reports uploaded to GitHub Security tab
- Creates PR comments with scan summaries
- Configurable to fail builds on critical findings
- Reports include CVE counts and remediation guidance
### Hadolint
- Validates Dockerfile syntax and structure
- Ensures secure image building practices
## Secrets Detection
Prowler protects against accidental exposure of sensitive credentials.
### TruffleHog
- Scans entire codebase and Git history for secrets
- Runs on every push and pull request
- Pre-commit hook prevents committing secrets
- Detects high-entropy strings, API keys, tokens, and credentials
- Configured to report verified and unknown findings
## Security Monitoring
- **GitHub Security Tab:** Centralized view of all security findings from CodeQL, Trivy, and other SARIF-compatible tools
- **Artifact Retention:** Security scan reports retained for post-deployment analysis
- **PR Comments:** Automated security feedback on pull requests for rapid remediation
## Contact
For questions regarding software security, visit the [Support page](/support).
-62
View File
@@ -1,62 +0,0 @@
---
title: 'Support'
description: 'Get help with Prowler'
---
## Lighthouse AI
Lighthouse AI is a Cloud Security Analyst chatbot powered by [Prowler MCP](/getting-started/products/prowler-mcp), your 24/7 virtual cloud security analyst. It can:
- **Query your security data**: Findings, compliance status, resources, and remediation guidance
- **Search Prowler Hub**: Over 1,000 security checks and 70+ compliance frameworks
- **Access documentation**: Search and retrieve Prowler docs contextually
Available in Prowler Cloud and Prowler App.
[Learn more about Lighthouse AI](/getting-started/products/prowler-lighthouse-ai)
## Support Desk
> Available to **Prowler Cloud** customers.
For Prowler Cloud customers, submit support requests through our support desk. We'll route your request to the right team and respond via email.
<Card title="Submit a request" icon="ticket" href="https://support.prowler.com">
Contact our support team
</Card>
## GitHub Discussions
Prowler is Open Source. If you have a question, it's likely someone else has it too. We'd love to answer in the open on GitHub whenever possible.
<CardGroup cols={3}>
<Card title="Ask a question" icon="circle-question" href="https://github.com/prowler-cloud/prowler/discussions">
Get help from the community
</Card>
<Card title="Report a bug" icon="bug" href="https://github.com/prowler-cloud/prowler/issues/new?template=bug_report.yml">
Found something wrong? Let us know
</Card>
<Card title="Suggest a feature" icon="lightbulb" href="https://github.com/prowler-cloud/prowler/issues/new?template=feature-request.yml">
Share your ideas for improvements
</Card>
</CardGroup>
## Community Slack
Join our Slack workspace to connect with the Prowler community, ask questions, and get help from other users and the Prowler team.
<Card title="Join Prowler Slack" icon="slack" href="https://goto.prowler.com/slack">
Connect with the community
</Card>
## Office Hours
Join our open calls to discuss what you're building, ask questions, and connect with the Prowler team and community.
Office Hours sessions are announced on [LinkedIn](https://www.linkedin.com/company/prowler-security/). Recordings of previous sessions are available on [YouTube](https://www.youtube.com/playlist?list=PLIwvjRXuMGkE-BDYXmUR2TXYQ7agxtuB1).
## Security
To report a vulnerability or for security-related inquiries, contact [security@prowler.com](mailto:security@prowler.com).
See also: [Responsible Disclosure](https://prowler.com/.well-known/security.txt)
@@ -30,7 +30,6 @@ Assign the following Microsoft Graph permissions:
- `Directory.Read.All`
- `Policy.Read.All`
- `UserAuthenticationMethod.Read.All` (optional, for multifactor authentication (MFA) checks)
- `AuditLog.Read.All` (optional, for multifactor authentication (MFA) checks)
<Note>
Replace `Directory.Read.All` with `Domain.Read.All` for more restrictive permissions. Note that Entra checks related to DirectoryRoles and GetUsers will not run with this permission.
@@ -52,7 +51,6 @@ Replace `Directory.Read.All` with `Domain.Read.All` for more restrictive permiss
- `Directory.Read.All`
- `Policy.Read.All`
- `UserAuthenticationMethod.Read.All`
- `AuditLog.Read.All`
![Permission Screenshots](/images/providers/domain-permission.png)
@@ -64,7 +62,7 @@ Replace `Directory.Read.All` with `Domain.Read.All` for more restrictive permiss
1. To grant permissions to a Service Principal, execute the following command in a terminal:
```console
az ad app permission add --id {appId} --api 00000003-0000-0000-c000-000000000000 --api-permissions 7ab1d382-f21e-4acd-a863-ba3e13f7da61=Role 246dd0d5-5bd0-4def-940b-0421030a5b68=Role 38d9df27-64da-44fd-b7c5-a6fbac20248f=Role b0afded3-3588-46d8-8b3d-9842eff778da=Role
az ad app permission add --id {appId} --api 00000003-0000-0000-c000-000000000000 --api-permissions 7ab1d382-f21e-4acd-a863-ba3e13f7da61=Role 246dd0d5-5bd0-4def-940b-0421030a5b68=Role 38d9df27-64da-44fd-b7c5-a6fbac20248f=Role
```
</Tab>
</Tabs>
@@ -377,7 +375,7 @@ The ProwlerRole is a custom role required for specific security checks. First, c
#### Step 4: (Optional) Assign Microsoft Graph Permissions
For Entra ID (Azure AD) checks, the Managed Identity needs Microsoft Graph API permissions: `Directory.Read.All`, `Policy.Read.All`, and optionally `UserAuthenticationMethod.Read.All` and `AuditLog.Read.All`.
For Entra ID (Azure AD) checks, the Managed Identity needs Microsoft Graph API permissions: `Directory.Read.All`, `Policy.Read.All`, and optionally `UserAuthenticationMethod.Read.All`.
<Note>
Assigning Microsoft Graph API permissions to a Managed Identity requires Azure CLI or PowerShell - it cannot be done through the Azure Portal's standard role assignment interface.
@@ -1,146 +0,0 @@
---
title: 'Cloudflare Authentication'
---
Prowler for Cloudflare supports the following authentication methods:
- [**API Token**](#api-token-recommended) (**Recommended**)
- [**API Key and Email (Legacy)**](#api-key-and-email-legacy)
## Required Permissions
Prowler requires read-only access to your Cloudflare zones and their settings. The following permissions are needed:
| Permission | Description |
|------------|-------------|
| `Zone:Read` | Read access to zone settings and configurations |
| `Zone Settings:Read` | Read access to zone security settings (SSL/TLS, HSTS, etc.) |
| `DNS:Read` | Read access to DNS records (for DNSSEC checks) |
<Warning>
Ensure your API Token or API Key has access to all zones you want to scan. If permissions are missing, some checks may fail or return incomplete results.
</Warning>
## API Token (Recommended)
API Tokens are the recommended authentication method because they:
- Can be scoped to specific permissions and zones
- Are more secure than global API keys
- Can be easily rotated without affecting other integrations
### Step 1: Create an API Token
1. **Log into Cloudflare Dashboard**
- Go to [https://dash.cloudflare.com](https://dash.cloudflare.com) and sign in
2. **Navigate to API Tokens**
- Click on your profile icon in the top right corner
- Select **My Profile**
- Click on the **API Tokens** tab
3. **Create a Custom Token**
- Click **Create Token**
- Select **Create Custom Token** (at the bottom)
4. **Configure Token Permissions**
Give your token a descriptive name (e.g., "Prowler Security Scanner") and add the [required permissions](#required-permissions) listed above.
5. **Set Zone Resources**
- Under **Zone Resources**, select either:
- **Include → All zones** (to scan all zones in your account)
- **Include → Specific zone** (to limit access to specific zones)
6. **Create and Copy Token**
- Click **Continue to summary**
- Review the permissions and click **Create Token**
- **Copy the token immediately** - Cloudflare will only show it once
### Step 2: Store the Token Securely
Store your API token as an environment variable:
```bash
export CLOUDFLARE_API_TOKEN="your-api-token-here"
```
<Warning>
Never commit API tokens to version control or share them in plain text. Use environment variables or a secrets manager.
</Warning>
## API Key and Email (Legacy)
API Keys provide full access to your Cloudflare account. While supported, this method is less secure than API Tokens because it grants broader permissions.
### Step 1: Get Your API Key
1. **Log into Cloudflare Dashboard**
- Go to [https://dash.cloudflare.com](https://dash.cloudflare.com) and sign in
2. **Navigate to API Tokens**
- Click on your profile icon in the top right corner
- Select **My Profile**
- Click on the **API Tokens** tab
3. **View Global API Key**
- Scroll down to the **API Keys** section
- Click **View** next to **Global API Key**
- Enter your password to reveal the key
- Copy the API key
### Step 2: Store Credentials Securely
Store both your API key and email as environment variables:
```bash
export CLOUDFLARE_API_KEY="your-api-key-here"
export CLOUDFLARE_API_EMAIL="your-email@example.com"
```
<Note>
The email must be the same email address used to log into your Cloudflare account.
</Note>
## Best Practices
### Security Recommendations
- **Use API Tokens instead of API Keys** - Tokens can be scoped to specific permissions
- **Use environment variables** - Never hardcode credentials in scripts or commands
- **Rotate credentials regularly** - Create new tokens periodically and revoke old ones
- **Use least privilege** - Only grant the minimum permissions needed
- **Monitor token usage** - Review the Cloudflare audit log for suspicious activity
<Warning>
**Use only one authentication method at a time.** If both API Token and API Key + Email are set, Prowler will use the API Token and log an error message.
</Warning>
## Troubleshooting
### "Missing X-Auth-Email header" Error
This error occurs when using API Key authentication without providing the email address. Ensure both `CLOUDFLARE_API_KEY` and `CLOUDFLARE_API_EMAIL` are set.
### "Authentication error" or "Permission denied"
- Verify your API Token or API Key is correct and not expired
- Check that your token has the [required permissions](#required-permissions)
- Ensure your token has access to the zones you're trying to scan
### "Both API Token and API Key and Email credentials are set"
This warning appears when all three environment variables are set:
- `CLOUDFLARE_API_TOKEN`
- `CLOUDFLARE_API_KEY`
- `CLOUDFLARE_API_EMAIL`
To resolve, unset the credentials you don't want to use:
```bash
# To use API Token only (recommended)
unset CLOUDFLARE_API_KEY
unset CLOUDFLARE_API_EMAIL
# Or to use API Key and Email only
unset CLOUDFLARE_API_TOKEN
```
@@ -1,108 +0,0 @@
---
title: 'Getting Started with Cloudflare'
---
import { VersionBadge } from "/snippets/version-badge.mdx";
<VersionBadge version="5.17.0" />
Prowler for Cloudflare allows you to scan your Cloudflare zones for security misconfigurations, including SSL/TLS settings, DNSSEC, HSTS, and more.
## Prerequisites
Before running Prowler with the Cloudflare provider, ensure you have:
1. A Cloudflare account with at least one zone
2. One of the following authentication methods configured (see [Authentication](/user-guide/providers/cloudflare/authentication)):
- An **API Token** (recommended)
- An **API Key + Email** (legacy)
## Quick Start
### Step 1: Set Up Authentication
The recommended method is using an API Token via environment variable:
```bash
export CLOUDFLARE_API_TOKEN="your-api-token-here"
```
Alternatively, use API Key + Email:
```bash
export CLOUDFLARE_API_KEY="your-api-key-here"
export CLOUDFLARE_API_EMAIL="your-email@example.com"
```
### Step 2: Run Prowler
Run a scan across all your Cloudflare zones:
```bash
prowler cloudflare
```
That's it! Prowler will automatically discover all zones in your account and run security checks against them.
## Authentication
Prowler reads Cloudflare credentials from environment variables. Set your credentials before running Prowler:
**API Token (Recommended):**
```bash
export CLOUDFLARE_API_TOKEN="your-api-token-here"
prowler cloudflare
```
**API Key + Email (Legacy):**
```bash
export CLOUDFLARE_API_KEY="your-api-key-here"
export CLOUDFLARE_API_EMAIL="your-email@example.com"
prowler cloudflare
```
## Filtering Zones
By default, Prowler scans all zones accessible with your credentials:
```bash
prowler cloudflare
```
To scan only specific zones, use the `-f`, `--region`, or `--filter-region` argument:
```bash
prowler cloudflare -f example.com
```
You can specify multiple zones:
```bash
prowler cloudflare -f example.com example.org
```
You can also use zone IDs instead of domain names:
```bash
prowler cloudflare -f 023e105f4ecef8ad9ca31a8372d0c353
```
## Configuration
Prowler uses a configuration file to customize provider behavior. The Cloudflare configuration includes:
```yaml
cloudflare:
# Maximum number of retries for API requests (default is 2)
max_retries: 2
```
To use a custom configuration:
```bash
prowler cloudflare --config-file /path/to/config.yaml
```
## Next Steps
- [Authentication](/user-guide/providers/cloudflare/authentication) - Detailed guide on creating API tokens and keys
+1 -13
View File
@@ -1,18 +1,6 @@
# Prowler MCP Server - AI Agent Ruleset
> **Skills Reference**: See [`prowler-mcp`](../skills/prowler-mcp/SKILL.md)
### Auto-invoke Skills
When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Action | Skill |
|--------|-------|
| Add changelog entry for a PR or feature | `prowler-changelog` |
| Create PR that requires changelog entry | `prowler-changelog` |
| Review changelog format and conventions | `prowler-changelog` |
| Update CHANGELOG.md in any component | `prowler-changelog` |
| Working on MCP server tools | `prowler-mcp` |
> **Skills Reference**: For detailed patterns, use the [`prowler-mcp`](../skills/prowler-mcp/SKILL.md) skill.
## Project Overview
Generated
+89 -177
View File
@@ -741,7 +741,7 @@ version = "4.9.0"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.9"
groups = ["main", "dev"]
groups = ["main"]
files = [
{file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"},
{file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"},
@@ -1512,46 +1512,46 @@ files = [
[[package]]
name = "boto3"
version = "1.40.61"
version = "1.39.15"
description = "The AWS SDK for Python"
optional = false
python-versions = ">=3.9"
groups = ["main", "dev"]
files = [
{file = "boto3-1.40.61-py3-none-any.whl", hash = "sha256:6b9c57b2a922b5d8c17766e29ed792586a818098efe84def27c8f582b33f898c"},
{file = "boto3-1.40.61.tar.gz", hash = "sha256:d6c56277251adf6c2bdd25249feae625abe4966831676689ff23b4694dea5b12"},
{file = "boto3-1.39.15-py3-none-any.whl", hash = "sha256:38fc54576b925af0075636752de9974e172c8a2cf7133400e3e09b150d20fb6a"},
{file = "boto3-1.39.15.tar.gz", hash = "sha256:b4483625f0d8c35045254dee46cd3c851bbc0450814f20b9b25bee1b5c0d8409"},
]
[package.dependencies]
botocore = ">=1.40.61,<1.41.0"
botocore = ">=1.39.15,<1.40.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.14.0,<0.15.0"
s3transfer = ">=0.13.0,<0.14.0"
[package.extras]
crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
version = "1.40.61"
version = "1.39.15"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.9"
groups = ["main", "dev"]
files = [
{file = "botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7"},
{file = "botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd"},
{file = "botocore-1.39.15-py3-none-any.whl", hash = "sha256:eb9cfe918ebfbfb8654e1b153b29f0c129d586d2c0d7fb4032731d49baf04cff"},
{file = "botocore-1.39.15.tar.gz", hash = "sha256:2aa29a717f14f8c7ca058c2e297aaed0aa10ecea24b91514eee802814d1b7600"},
]
[package.dependencies]
jmespath = ">=0.7.1,<2.0.0"
python-dateutil = ">=2.1,<3.0.0"
urllib3 = [
{version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""},
{version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""},
{version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""},
]
[package.extras]
crt = ["awscrt (==0.27.6)"]
crt = ["awscrt (==0.23.8)"]
[[package]]
name = "cachetools"
@@ -1878,26 +1878,6 @@ click = ">=4.0"
[package.extras]
dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"]
[[package]]
name = "cloudflare"
version = "4.3.1"
description = "The official Python library for the cloudflare API"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "cloudflare-4.3.1-py3-none-any.whl", hash = "sha256:6927135a5ee5633d6e2e1952ca0484745e933727aeeb189996d2ad9d292071c6"},
{file = "cloudflare-4.3.1.tar.gz", hash = "sha256:b1e1c6beeb8d98f63bfe0a1cba874fc4e22e000bcc490544f956c689b3b5b258"},
]
[package.dependencies]
anyio = ">=3.5.0,<5"
distro = ">=1.7.0,<2"
httpx = ">=0.23.0,<1"
pydantic = ">=1.9.0,<3"
sniffio = "*"
typing-extensions = ">=4.10,<5"
[[package]]
name = "colorama"
version = "0.4.6"
@@ -2188,18 +2168,6 @@ files = [
{file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"},
]
[[package]]
name = "distro"
version = "1.9.0"
description = "Distro - an OS platform information API"
optional = false
python-versions = ">=3.6"
groups = ["main"]
files = [
{file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
]
[[package]]
name = "dnspython"
version = "2.7.0"
@@ -2378,29 +2346,20 @@ testing = ["hatch", "pre-commit", "pytest", "tox"]
[[package]]
name = "filelock"
version = "3.19.1"
version = "3.12.4"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.9"
python-versions = ">=3.8"
groups = ["main", "dev"]
markers = "python_version < \"3.10\""
files = [
{file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"},
{file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"},
{file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"},
{file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"},
]
[[package]]
name = "filelock"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
groups = ["main", "dev"]
markers = "python_version >= \"3.10\""
files = [
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[package.extras]
docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"]
testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"]
typing = ["typing-extensions (>=4.7.1) ; python_version < \"3.11\""]
[[package]]
name = "flake8"
@@ -2715,7 +2674,7 @@ version = "0.16.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
groups = ["main"]
files = [
{file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"},
{file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"},
@@ -2755,7 +2714,7 @@ version = "1.0.9"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
groups = ["main"]
files = [
{file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"},
{file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"},
@@ -2792,7 +2751,7 @@ version = "0.28.1"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
groups = ["main"]
files = [
{file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
{file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
@@ -2973,18 +2932,6 @@ files = [
{file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
]
[[package]]
name = "joblib"
version = "1.5.3"
description = "Lightweight pipelining with Python functions"
optional = false
python-versions = ">=3.9"
groups = ["dev"]
files = [
{file = "joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713"},
{file = "joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3"},
]
[[package]]
name = "joserfc"
version = "1.2.2"
@@ -3948,32 +3895,6 @@ extra = ["lxml (>=4.6)", "pydot (>=3.0.1)", "pygraphviz (>=1.14)", "sympy (>=1.1
test = ["pytest (>=7.2)", "pytest-cov (>=4.0)", "pytest-xdist (>=3.0)"]
test-extras = ["pytest-mpl", "pytest-randomly"]
[[package]]
name = "nltk"
version = "3.9.2"
description = "Natural Language Toolkit"
optional = false
python-versions = ">=3.9"
groups = ["dev"]
files = [
{file = "nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a"},
{file = "nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419"},
]
[package.dependencies]
click = "*"
joblib = "*"
regex = ">=2021.8.3"
tqdm = "*"
[package.extras]
all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"]
corenlp = ["requests"]
machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"]
plot = ["matplotlib"]
tgrep = ["pyparsing"]
twitter = ["twython"]
[[package]]
name = "nodeenv"
version = "1.9.1"
@@ -4536,6 +4457,36 @@ files = [
{file = "protobuf-6.31.1.tar.gz", hash = "sha256:d8cac4c982f0b957a4dc73a80e2ea24fab08e679c0de9deb835f4a12d69aca9a"},
]
[[package]]
name = "psutil"
version = "6.0.0"
description = "Cross-platform lib for process and system monitoring in Python."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
groups = ["dev"]
files = [
{file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"},
{file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"},
{file = "psutil-6.0.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c"},
{file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3"},
{file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c"},
{file = "psutil-6.0.0-cp27-none-win32.whl", hash = "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35"},
{file = "psutil-6.0.0-cp27-none-win_amd64.whl", hash = "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1"},
{file = "psutil-6.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0"},
{file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0"},
{file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd"},
{file = "psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132"},
{file = "psutil-6.0.0-cp36-cp36m-win32.whl", hash = "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14"},
{file = "psutil-6.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c"},
{file = "psutil-6.0.0-cp37-abi3-win32.whl", hash = "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d"},
{file = "psutil-6.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3"},
{file = "psutil-6.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0"},
{file = "psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2"},
]
[package.extras]
test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""]
[[package]]
name = "py-iam-expand"
version = "0.1.0"
@@ -4585,14 +4536,14 @@ dev = ["black (==22.6.0)", "flake8", "mypy", "pytest"]
[[package]]
name = "pyasn1"
version = "0.6.2"
version = "0.6.1"
description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf"},
{file = "pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b"},
{file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
{file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
]
[[package]]
@@ -4629,7 +4580,7 @@ description = "C parser in Python"
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
markers = "implementation_name != \"PyPy\" and platform_python_implementation != \"PyPy\""
markers = "platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\""
files = [
{file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
{file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
@@ -5674,14 +5625,14 @@ files = [
[[package]]
name = "s3transfer"
version = "0.14.0"
version = "0.13.1"
description = "An Amazon S3 Transfer Manager"
optional = false
python-versions = ">=3.9"
groups = ["main", "dev"]
files = [
{file = "s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456"},
{file = "s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125"},
{file = "s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724"},
{file = "s3transfer-0.13.1.tar.gz", hash = "sha256:c3fdba22ba1bd367922f27ec8032d6a1cf5f10c934fb5d68cf60fd5a23d936cf"},
]
[package.dependencies]
@@ -5692,35 +5643,34 @@ crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
[[package]]
name = "safety"
version = "3.7.0"
description = "Scan dependencies for known vulnerabilities and licenses."
version = "3.2.9"
description = "Checks installed dependencies for known vulnerabilities and licenses."
optional = false
python-versions = ">=3.9"
python-versions = ">=3.7"
groups = ["dev"]
files = [
{file = "safety-3.7.0-py3-none-any.whl", hash = "sha256:65e71db45eb832e8840e3456333d44c23927423753d5610596a09e909a66d2bf"},
{file = "safety-3.7.0.tar.gz", hash = "sha256:daec15a393cafc32b846b7ef93f9c952a1708863e242341ab5bde2e4beabb54e"},
{file = "safety-3.2.9-py3-none-any.whl", hash = "sha256:5e199c057550dc6146c081084274279dfb98c17735193b028db09a55ea508f1a"},
{file = "safety-3.2.9.tar.gz", hash = "sha256:494bea752366161ac9e0742033d2a82e4dc51d7c788be42e0ecf5f3ef36b8071"},
]
[package.dependencies]
authlib = ">=1.2.0"
click = ">=8.0.2"
dparse = ">=0.6.4"
filelock = ">=3.16.1,<4.0"
httpx = "*"
Authlib = ">=1.2.0"
Click = ">=8.0.2"
dparse = ">=0.6.4b0"
filelock = ">=3.12.2,<3.13.0"
jinja2 = ">=3.1.0"
marshmallow = ">=3.15.0"
nltk = ">=3.9"
packaging = ">=21.0"
pydantic = ">=2.6.0"
psutil = ">=6.0.0,<6.1.0"
pydantic = ">=1.10.12"
requests = "*"
ruamel-yaml = ">=0.17.21"
safety-schemas = "0.0.16"
tenacity = ">=8.1.0"
tomli = {version = "*", markers = "python_version < \"3.11\""}
tomlkit = "*"
typer = ">=0.16.0"
rich = "*"
"ruamel.yaml" = ">=0.17.21"
safety-schemas = ">=0.0.4"
setuptools = ">=65.5.1"
typer = "*"
typing-extensions = ">=4.7.1"
urllib3 = ">=1.26.5"
[package.extras]
github = ["pygithub (>=1.43.3)"]
@@ -5729,20 +5679,20 @@ spdx = ["spdx-tools (>=0.8.2)"]
[[package]]
name = "safety-schemas"
version = "0.0.16"
version = "0.0.5"
description = "Schemas for Safety tools"
optional = false
python-versions = ">=3.8"
python-versions = ">=3.7"
groups = ["dev"]
files = [
{file = "safety_schemas-0.0.16-py3-none-any.whl", hash = "sha256:6760515d3fd1e6535b251cd73014bd431d12fe0bfb8b6e8880a9379b5ab7aa44"},
{file = "safety_schemas-0.0.16.tar.gz", hash = "sha256:3bb04d11bd4b5cc79f9fa183c658a6a8cf827a9ceec443a5ffa6eed38a50a24e"},
{file = "safety_schemas-0.0.5-py3-none-any.whl", hash = "sha256:6ac9eb71e60f0d4e944597c01dd48d6d8cd3d467c94da4aba3702a05a3a6ab4f"},
{file = "safety_schemas-0.0.5.tar.gz", hash = "sha256:0de5fc9a53d4423644a8ce9a17a2e474714aa27e57f3506146e95a41710ff104"},
]
[package.dependencies]
dparse = ">=0.6.4"
dparse = ">=0.6.4b0"
packaging = ">=21.0"
pydantic = ">=2.6.0"
pydantic = "*"
ruamel-yaml = ">=0.17.21"
typing-extensions = ">=4.7.1"
@@ -5827,18 +5777,18 @@ files = [
[[package]]
name = "slack-sdk"
version = "3.39.0"
version = "3.34.0"
description = "The Slack API Platform SDK for Python"
optional = false
python-versions = ">=3.7"
python-versions = ">=3.6"
groups = ["main"]
files = [
{file = "slack_sdk-3.39.0-py2.py3-none-any.whl", hash = "sha256:b1556b2f5b8b12b94e5ea3f56c4f2c7f04462e4e1013d325c5764ff118044fa8"},
{file = "slack_sdk-3.39.0.tar.gz", hash = "sha256:6a56be10dc155c436ff658c6b776e1c082e29eae6a771fccf8b0a235822bbcb1"},
{file = "slack_sdk-3.34.0-py2.py3-none-any.whl", hash = "sha256:c61f57f310d85be83466db5a98ab6ae3bb2e5587437b54fa0daa8fae6a0feffa"},
{file = "slack_sdk-3.34.0.tar.gz", hash = "sha256:ff61db7012160eed742285ea91f11c72b7a38a6500a7f6c5335662b4bc6b853d"},
]
[package.extras]
optional = ["SQLAlchemy (>=1.4,<3)", "aiodns (>1.0)", "aiohttp (>=3.7.3,<4)", "boto3 (<=2)", "websocket-client (>=1,<2)", "websockets (>=9.1,<16)"]
optional = ["SQLAlchemy (>=1.4,<3)", "aiodns (>1.0)", "aiohttp (>=3.7.3,<4)", "boto3 (<=2)", "websocket-client (>=1,<2)", "websockets (>=9.1,<15)"]
[[package]]
name = "sniffio"
@@ -5846,7 +5796,7 @@ version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
groups = ["main", "dev"]
groups = ["main"]
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -5912,22 +5862,6 @@ files = [
[package.extras]
widechars = ["wcwidth"]
[[package]]
name = "tenacity"
version = "9.1.2"
description = "Retry code until it succeeds"
optional = false
python-versions = ">=3.9"
groups = ["dev"]
files = [
{file = "tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138"},
{file = "tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb"},
]
[package.extras]
doc = ["reno", "sphinx"]
test = ["pytest", "tornado (>=4.5)", "typeguard"]
[[package]]
name = "tldextract"
version = "5.3.0"
@@ -6005,28 +5939,6 @@ files = [
{file = "tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1"},
]
[[package]]
name = "tqdm"
version = "4.67.1"
description = "Fast, Extensible Progress Meter"
optional = false
python-versions = ">=3.7"
groups = ["dev"]
files = [
{file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
{file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
]
[package.dependencies]
colorama = {version = "*", markers = "platform_system == \"Windows\""}
[package.extras]
dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"]
discord = ["requests"]
notebook = ["ipywidgets (>=6)"]
slack = ["slack-sdk"]
telegram = ["requests"]
[[package]]
name = "typer"
version = "0.16.0"
@@ -6614,4 +6526,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">3.9.1,<3.13"
content-hash = "adfc2da2c6e3e803f7a151b9697dbc3f461366a03e4504eb97498cbc72b2e48c"
content-hash = "1559a8799915bf0372eef07396e1dc40802911ef07ae92997cd260d9fe596ba3"
-23
View File
@@ -7,29 +7,6 @@
> - [`prowler-compliance`](../skills/prowler-compliance/SKILL.md) - Compliance framework structure
> - [`pytest`](../skills/pytest/SKILL.md) - Generic pytest patterns
### Auto-invoke Skills
When performing these actions, ALWAYS invoke the corresponding skill FIRST:
| Action | Skill |
|--------|-------|
| Add changelog entry for a PR or feature | `prowler-changelog` |
| Adding new providers | `prowler-provider` |
| Adding services to existing providers | `prowler-provider` |
| Create PR that requires changelog entry | `prowler-changelog` |
| Creating new checks | `prowler-sdk-check` |
| Creating/updating compliance frameworks | `prowler-compliance` |
| Mapping checks to compliance controls | `prowler-compliance` |
| Mocking AWS with moto in tests | `prowler-test-sdk` |
| Review changelog format and conventions | `prowler-changelog` |
| Reviewing compliance framework PRs | `prowler-compliance-review` |
| Update CHANGELOG.md in any component | `prowler-changelog` |
| Updating existing checks and metadata | `prowler-sdk-check` |
| Writing Prowler SDK tests | `prowler-test-sdk` |
| Writing Python tests with pytest | `pytest` |
---
## Project Overview
The Prowler SDK is the core Python engine powering cloud security assessments across AWS, Azure, GCP, Kubernetes, GitHub, M365, and more. It includes 1000+ security checks and 30+ compliance frameworks.
+15 -69
View File
@@ -2,12 +2,11 @@
All notable changes to the **Prowler SDK** are documented in this file.
## [5.17.0] (Prowler v5.17.0)
## [5.17.0] (Prowler UNRELEASED)
### Added
- AI Skills pack for AI coding assistants (Claude Code, OpenCode, Codex) following agentskills.io standard [(#9728)](https://github.com/prowler-cloud/prowler/pull/9728)
- Prowler ThreatScore for the Alibaba Cloud provider [(#9511)](https://github.com/prowler-cloud/prowler/pull/9511)
- Add Prowler ThreatScore for the Alibaba Cloud provider [(#9511)](https://github.com/prowler-cloud/prowler/pull/9511)
- `compute_instance_group_multiple_zones` check for GCP provider [(#9566)](https://github.com/prowler-cloud/prowler/pull/9566)
- `compute_instance_group_autohealing_enabled` check for GCP provider [(#9690)](https://github.com/prowler-cloud/prowler/pull/9690)
- Support AWS European Sovereign Cloud [(#9649)](https://github.com/prowler-cloud/prowler/pull/9649)
@@ -16,20 +15,14 @@ All notable changes to the **Prowler SDK** are documented in this file.
- `ResourceGroup` field to all check metadata for resource classification [(#9656)](https://github.com/prowler-cloud/prowler/pull/9656)
- `compute_configuration_changes` check for GCP provider to detect Compute Engine configuration changes in Cloud Audit Logs [(#9698)](https://github.com/prowler-cloud/prowler/pull/9698)
- `compute_instance_group_load_balancer_attached` check for GCP provider [(#9695)](https://github.com/prowler-cloud/prowler/pull/9695)
- `Cloudflare` provider with critical security checks [(#9423)](https://github.com/prowler-cloud/prowler/pull/9423)
- CloudFlare `TLS/SSL`, `records` and `email` checks for `zone` service [(#9424)](https://github.com/prowler-cloud/prowler/pull/9424)
- `compute_instance_single_network_interface` check for GCP provider [(#9702)](https://github.com/prowler-cloud/prowler/pull/9702)
- `compute_image_not_publicly_shared` check for GCP provider [(#9718)](https://github.com/prowler-cloud/prowler/pull/9718)
- `compute_snapshot_not_outdated` check for GCP provider [(#9774)](https://github.com/prowler-cloud/prowler/pull/9774)
- `compute_project_os_login_2fa_enabled` check for GCP provider [(#9839)](https://github.com/prowler-cloud/prowler/pull/9839)
- `compute_instance_on_host_maintenance_migrate` check for GCP provider [(#9834)](https://github.com/prowler-cloud/prowler/pull/9834)
- CIS 1.12 compliance framework for Kubernetes [(#9778)](https://github.com/prowler-cloud/prowler/pull/9778)
- CIS 6.0 for M365 provider [(#9779)](https://github.com/prowler-cloud/prowler/pull/9779)
- CIS 5.0 compliance framework for the Azure provider [(#9777)](https://github.com/prowler-cloud/prowler/pull/9777)
- `Cloudflare` Bot protection, WAF, Privacy, Anti-Scraping and Zone configuration checks [(#9425)](https://github.com/prowler-cloud/prowler/pull/9425)
- Container Image provider (POC) using Trivy for vulnerability and secret scanning
### Changed
- Update AWS Step Functions service metadata to new format [(#9432)](https://github.com/prowler-cloud/prowler/pull/9432)
- Update AWS Route 53 service metadata to new format [(#9406)](https://github.com/prowler-cloud/prowler/pull/9406)
- Update AWS SQS service metadata to new format [(#9429)](https://github.com/prowler-cloud/prowler/pull/9429)
@@ -54,29 +47,20 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Update AWS DataSync service metadata to new format [(#8854)](https://github.com/prowler-cloud/prowler/pull/8854)
- Update AWS RDS service metadata to new format [(#9551)](https://github.com/prowler-cloud/prowler/pull/9551)
- Update AWS Bedrock service metadata to new format [(#8827)](https://github.com/prowler-cloud/prowler/pull/8827)
- Update AWS IAM service metadata to new format [(#9550)](https://github.com/prowler-cloud/prowler/pull/9550)
- Enhance `user_registration_details` perfomance and user `mfa` evaluation [(#9236)](https://github.com/prowler-cloud/prowler/pull/9236)
- Update AWS Cognito service metadata to new format [(#8853)](https://github.com/prowler-cloud/prowler/pull/8853)
- Update AWS EC2 service metadata to new format [(#9549)](https://github.com/prowler-cloud/prowler/pull/9549)
- Update Azure AI Search service metadata to new format [(#9087)](https://github.com/prowler-cloud/prowler/pull/9087)
- Update Azure AKS service metadata to new format [(#9611)](https://github.com/prowler-cloud/prowler/pull/9611)
- Update Azure API Management service metadata to new format [(#9612)](https://github.com/prowler-cloud/prowler/pull/9612)
---
## [5.16.2] (Prowler v5.16.2) (UNRELEASED)
### Fixed
- OCI authentication error handling and validation [(#9738)](https://github.com/prowler-cloud/prowler/pull/9738)
- AWS EC2 SG library [(#9216)](https://github.com/prowler-cloud/prowler/pull/9216)
### Security
- `safety` to `3.7.0` and `filelock` to `3.20.3` due to [Safety vulnerability 82754 (CVE-2025-68146)](https://data.safetycli.com/v/82754/97c/) [(#9816)](https://github.com/prowler-cloud/prowler/pull/9816)
- `pyasn1` to v0.6.2 to address [CVE-2026-23490](https://nvd.nist.gov/vuln/detail/CVE-2026-23490) [(#9817)](https://github.com/prowler-cloud/prowler/pull/9817)
- Fix OCI authentication error handling and validation [(#9738)](https://github.com/prowler-cloud/prowler/pull/9738)
- Fixup AWS EC2 SG library [(#9216)](https://github.com/prowler-cloud/prowler/pull/9216)
---
## [5.16.1] (Prowler v5.16.1)
### Fixed
- ZeroDivision error from Prowler ThreatScore [(#9653)](https://github.com/prowler-cloud/prowler/pull/9653)
---
@@ -84,12 +68,10 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.16.0] (Prowler v5.16.0)
### Added
- `privilege-escalation` and `ec2-imdsv1` categories for AWS checks [(#9537)](https://github.com/prowler-cloud/prowler/pull/9537)
- Supported IaC formats and scanner documentation for the IaC provider [(#9553)](https://github.com/prowler-cloud/prowler/pull/9553)
### Changed
- Update AWS Glue service metadata to new format [(#9258)](https://github.com/prowler-cloud/prowler/pull/9258)
- Update AWS Kafka service metadata to new format [(#9261)](https://github.com/prowler-cloud/prowler/pull/9261)
- Update AWS KMS service metadata to new format [(#9263)](https://github.com/prowler-cloud/prowler/pull/9263)
@@ -102,7 +84,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Update AWS WAF v2 service metadata to new format [(#9481)](https://github.com/prowler-cloud/prowler/pull/9481)
### Fixed
- Fix typo `trustboundaries` category to `trust-boundaries` [(#9536)](https://github.com/prowler-cloud/prowler/pull/9536)
- Fix incorrect `bedrock-agent` regional availability, now using official AWS docs instead of copying from `bedrock`
- Store MongoDB Atlas provider regions as lowercase [(#9554)](https://github.com/prowler-cloud/prowler/pull/9554)
@@ -113,7 +94,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.15.1] (Prowler v5.15.1)
### Fixed
- Fix false negative in AWS `apigateway_restapi_logging_enabled` check by refining stage logging evaluation to ensure logging level is not set to "OFF" [(#9304)](https://github.com/prowler-cloud/prowler/pull/9304)
---
@@ -121,7 +101,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.15.0] (Prowler v5.15.0)
### Added
- `cloudstorage_uses_vpc_service_controls` check for GCP provider [(#9256)](https://github.com/prowler-cloud/prowler/pull/9256)
- Alibaba Cloud provider with CIS 2.0 benchmark [(#9329)](https://github.com/prowler-cloud/prowler/pull/9329)
- `repository_immutable_releases_enabled` check for GitHub provider [(#9162)](https://github.com/prowler-cloud/prowler/pull/9162)
@@ -135,7 +114,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
- RBI Cyber Security Framework compliance for Azure provider [(#8822)](https://github.com/prowler-cloud/prowler/pull/8822)
### Changed
- Update AWS Macie service metadata to new format [(#9265)](https://github.com/prowler-cloud/prowler/pull/9265)
- Update AWS Lightsail service metadata to new format [(#9264)](https://github.com/prowler-cloud/prowler/pull/9264)
- Update AWS GuardDuty service metadata to new format [(#9259)](https://github.com/prowler-cloud/prowler/pull/9259)
@@ -145,7 +123,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Update AWS Lightsail service metadata to new format [(#9264)](https://github.com/prowler-cloud/prowler/pull/9264)
### Fixed
- Fix duplicate requirement IDs in ISO 27001:2013 AWS compliance framework by adding unique letter suffixes
- Removed incorrect threat-detection category from checks metadata [(#9489)](https://github.com/prowler-cloud/prowler/pull/9489)
- GCP `cloudstorage_uses_vpc_service_controls` check to handle VPC Service Controls blocked API access [(#9478)](https://github.com/prowler-cloud/prowler/pull/9478)
@@ -155,7 +132,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.14.2] (Prowler v5.14.2)
### Fixed
- Custom check folder metadata validation [(#9335)](https://github.com/prowler-cloud/prowler/pull/9335)
- Pin `alibabacloud-gateway-oss-util` to version 0.0.3 to address missing dependency [(#9487)](https://github.com/prowler-cloud/prowler/pull/9487)
@@ -164,7 +140,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.14.1] (Prowler v5.14.1)
### Fixed
- `sharepoint_external_sharing_managed` check to handle external sharing disabled at organization level [(#9298)](https://github.com/prowler-cloud/prowler/pull/9298)
- Support multiple Exchange mailbox policies in M365 `exchange_mailbox_policy_additional_storage_restricted` check [(#9241)](https://github.com/prowler-cloud/prowler/pull/9241)
@@ -173,7 +148,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.14.0] (Prowler v5.14.0)
### Added
- GitHub provider check `organization_default_repository_permission_strict` [(#8785)](https://github.com/prowler-cloud/prowler/pull/8785)
- Add OCI mapping to scan and check classes [(#8927)](https://github.com/prowler-cloud/prowler/pull/8927)
- `codepipeline_project_repo_private` check for AWS provider [(#5915)](https://github.com/prowler-cloud/prowler/pull/5915)
@@ -199,7 +173,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Add branch name to IaC provider region [(#9296)](https://github.com/prowler-cloud/prowler/pull/9295)
### Changed
- Update AWS Direct Connect service metadata to new format [(#8855)](https://github.com/prowler-cloud/prowler/pull/8855)
- Update AWS DRS service metadata to new format [(#8870)](https://github.com/prowler-cloud/prowler/pull/8870)
- Update AWS DynamoDB service metadata to new format [(#8871)](https://github.com/prowler-cloud/prowler/pull/8871)
@@ -233,10 +206,9 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Update AWS ECS service metadata to new format [(#8888)](https://github.com/prowler-cloud/prowler/pull/8888)
- Update AWS Kinesis service metadata to new format [(#9262)](https://github.com/prowler-cloud/prowler/pull/9262)
- Update AWS DocumentDB service metadata to new format [(#8862)](https://github.com/prowler-cloud/prowler/pull/8862)
- Adapt IaC provider to be used in the Prowler App [(#8751)](https://github.com/prowler-cloud/prowler/pull/8751)
### Fixed
- Check `check_name` has no `resource_name` error for GCP provider [(#9169)](https://github.com/prowler-cloud/prowler/pull/9169)
- Depth Truncation and parsing error in PowerShell queries [(#9181)](https://github.com/prowler-cloud/prowler/pull/9181)
- False negative in `iam_role_cross_service_confused_deputy_prevention` check [(#9213)](https://github.com/prowler-cloud/prowler/pull/9213)
@@ -254,7 +226,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.13.1] (Prowler v5.13.1)
### Fixed
- Add `resource_name` for checks under `logging` for the GCP provider [(#9023)](https://github.com/prowler-cloud/prowler/pull/9023)
- Fix `ec2_instance_with_outdated_ami` check to handle None AMIs [(#9046)](https://github.com/prowler-cloud/prowler/pull/9046)
- Handle timestamp when transforming compliance findings in CCC [(#9042)](https://github.com/prowler-cloud/prowler/pull/9042)
@@ -263,10 +234,14 @@ All notable changes to the **Prowler SDK** are documented in this file.
---
### Changed
- Adapt IaC provider to be used in the Prowler App [(#8751)](https://github.com/prowler-cloud/prowler/pull/8751)
---
## [5.13.0] (Prowler v5.13.0)
### Added
- Support for AdditionalURLs in outputs [(#8651)](https://github.com/prowler-cloud/prowler/pull/8651)
- Support for markdown metadata fields in Dashboard [(#8667)](https://github.com/prowler-cloud/prowler/pull/8667)
- `ec2_instance_with_outdated_ami` check for AWS provider [(#6910)](https://github.com/prowler-cloud/prowler/pull/6910)
@@ -309,7 +284,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
### Fixed
- Fix SNS topics showing empty AWS_ResourceID in Quick Inventory output [(#8762)](https://github.com/prowler-cloud/prowler/issues/8762)
- Fix HTML Markdown output for long strings [(#8803)](https://github.com/prowler-cloud/prowler/pull/8803)
- Prowler ThreatScore scoring calculation CLI [(#8582)](https://github.com/prowler-cloud/prowler/pull/8582)
@@ -326,7 +300,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.12.1] (Prowler v5.12.1)
### Fixed
- Replaced old check id with new ones for compliance files [(#8682)](https://github.com/prowler-cloud/prowler/pull/8682)
- `firehose_stream_encrypted_at_rest` check false positives and new api call in kafka service [(#8599)](https://github.com/prowler-cloud/prowler/pull/8599)
- Replace defender rules policies key to use old name [(#8702)](https://github.com/prowler-cloud/prowler/pull/8702)
@@ -336,7 +309,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.12.0] (Prowler v5.12.0)
### Added
- Add more fields for the Jira ticket and handle custom fields errors [(#8601)](https://github.com/prowler-cloud/prowler/pull/8601)
- Support labels on Jira tickets [(#8603)](https://github.com/prowler-cloud/prowler/pull/8603)
- Add finding url and tenant info inside Jira tickets [(#8607)](https://github.com/prowler-cloud/prowler/pull/8607)
@@ -360,11 +332,9 @@ All notable changes to the **Prowler SDK** are documented in this file.
- `projects_network_access_list_exposed_to_internet` - Ensure project network access list is not exposed to internet
### Changed
- Rename ftp and mongo checks to follow pattern `ec2_securitygroup_allow_ingress_from_internet_to_tcp_port_*` [(#8293)](https://github.com/prowler-cloud/prowler/pull/8293)
### Fixed
- Renamed `AdditionalUrls` to `AdditionalURLs` field in CheckMetadata [(#8639)](https://github.com/prowler-cloud/prowler/pull/8639)
- TypeError from Python 3.9 in Security Hub module by updating type annotations [(#8619)](https://github.com/prowler-cloud/prowler/pull/8619)
- KeyError when SecurityGroups field is missing in MemoryDB check [(#8666)](https://github.com/prowler-cloud/prowler/pull/8666)
@@ -375,7 +345,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.11.0] (Prowler v5.11.0)
### Added
- Certificate authentication for M365 provider [(#8404)](https://github.com/prowler-cloud/prowler/pull/8404)
- `vm_sufficient_daily_backup_retention_period` check for Azure provider [(#8200)](https://github.com/prowler-cloud/prowler/pull/8200)
- `vm_jit_access_enabled` check for Azure provider [(#8202)](https://github.com/prowler-cloud/prowler/pull/8202)
@@ -390,12 +359,10 @@ All notable changes to the **Prowler SDK** are documented in this file.
- GCP `--skip-api-check` command line flag [(#8575)](https://github.com/prowler-cloud/prowler/pull/8575)
### Changed
- Refine kisa isms-p compliance mapping [(#8479)](https://github.com/prowler-cloud/prowler/pull/8479)
- Improve AWS Security Hub region check using multiple threads [(#8365)](https://github.com/prowler-cloud/prowler/pull/8365)
### Fixed
- Resource metadata error in `s3_bucket_shadow_resource_vulnerability` check [(#8572)](https://github.com/prowler-cloud/prowler/pull/8572)
- GitHub App authentication through API fails with auth_method validation error [(#8587)](https://github.com/prowler-cloud/prowler/pull/8587)
- AWS resource-arn filtering [(#8533)](https://github.com/prowler-cloud/prowler/pull/8533)
@@ -409,7 +376,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.10.2] (Prowler v5.10.2)
### Fixed
- Order requirements by ID in Prowler ThreatScore AWS compliance framework [(#8495)](https://github.com/prowler-cloud/prowler/pull/8495)
- Add explicit resource name to GCP and Azure Defender checks [(#8352)](https://github.com/prowler-cloud/prowler/pull/8352)
- Validation errors in Azure and M365 providers [(#8353)](https://github.com/prowler-cloud/prowler/pull/8353)
@@ -424,7 +390,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.10.1] (Prowler v5.10.1)
### Fixed
- Remove invalid requirements from CIS 1.0 for GitHub provider [(#8472)](https://github.com/prowler-cloud/prowler/pull/8472)
---
@@ -432,7 +397,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.10.0] (Prowler v5.10.0)
### Added
- `bedrock_api_key_no_administrative_privileges` check for AWS provider [(#8321)](https://github.com/prowler-cloud/prowler/pull/8321)
- `bedrock_api_key_no_long_term_credentials` check for AWS provider [(#8396)](https://github.com/prowler-cloud/prowler/pull/8396)
- Support App Key Content in GitHub provider [(#8271)](https://github.com/prowler-cloud/prowler/pull/8271)
@@ -445,13 +409,11 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Use `trivy` as engine for IaC provider [(#8466)](https://github.com/prowler-cloud/prowler/pull/8466)
### Changed
- Handle some AWS errors as warnings instead of errors [(#8347)](https://github.com/prowler-cloud/prowler/pull/8347)
- Revert import of `checkov` python library [(#8385)](https://github.com/prowler-cloud/prowler/pull/8385)
- Updated policy mapping in ISMS-P compliance file for improved alignment [(#8367)](https://github.com/prowler-cloud/prowler/pull/8367)
### Fixed
- False positives in SQS encryption check for ephemeral queues [(#8330)](https://github.com/prowler-cloud/prowler/pull/8330)
- Add protocol validation check in security group checks to ensure proper protocol matching [(#8374)](https://github.com/prowler-cloud/prowler/pull/8374)
- Add missing audit evidence for controls 1.1.4 and 2.5.5 for ISMS-P compliance. [(#8386)](https://github.com/prowler-cloud/prowler/pull/8386)
@@ -475,7 +437,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.9.2] (Prowler v5.9.2)
### Fixed
- Use the correct resource name in `defender_domain_dkim_enabled` check [(#8334)](https://github.com/prowler-cloud/prowler/pull/8334)
---
@@ -483,7 +444,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.9.0] (Prowler v5.9.0)
### Added
- `storage_smb_channel_encryption_with_secure_algorithm` check for Azure provider [(#8123)](https://github.com/prowler-cloud/prowler/pull/8123)
- `storage_smb_protocol_version_is_latest` check for Azure provider [(#8128)](https://github.com/prowler-cloud/prowler/pull/8128)
- `vm_backup_enabled` check for Azure provider [(#8182)](https://github.com/prowler-cloud/prowler/pull/8182)
@@ -496,11 +456,9 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Add `test_connection` method to GitHub provider [(#8248)](https://github.com/prowler-cloud/prowler/pull/8248)
### Changed
- Refactor the Azure Defender get security contact configuration method to use the API REST endpoint instead of the SDK [(#8241)](https://github.com/prowler-cloud/prowler/pull/8241)
### Fixed
- Title & description wording for `iam_user_accesskey_unused` check for AWS provider [(#8233)](https://github.com/prowler-cloud/prowler/pull/8233)
- Add GitHub provider to lateral panel in documentation and change -h environment variable output [(#8246)](https://github.com/prowler-cloud/prowler/pull/8246)
- Show `m365_identity_type` and `m365_identity_id` in cloud reports [(#8247)](https://github.com/prowler-cloud/prowler/pull/8247)
@@ -520,7 +478,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.8.1] (Prowler v5.8.1)
### Fixed
- Detect wildcarded ARNs in sts:AssumeRole policy resources [(#8164)](https://github.com/prowler-cloud/prowler/pull/8164)
- List all streams and `firehose_stream_encrypted_at_rest` logic [(#8213)](https://github.com/prowler-cloud/prowler/pull/8213)
- Allow empty values for http_endpoint in templates [(#8184)](https://github.com/prowler-cloud/prowler/pull/8184)
@@ -573,7 +530,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
- New check `codebuild_project_not_publicly_accessible` for AWS provider [(#8127)](https://github.com/prowler-cloud/prowler/pull/8127)
### Fixed
- Consolidate Azure Storage file service properties to the account level, improving the accuracy of the `storage_ensure_file_shares_soft_delete_is_enabled` check [(#8087)](https://github.com/prowler-cloud/prowler/pull/8087)
- Migrate Azure VM service and managed disk logic to Pydantic models for better serialization and type safety, and update all related tests to use the new models and fix UUID handling [(#https://github.com/prowler-cloud/prowler/pull/8151)](https://github.com/prowler-cloud/prowler/pull/https://github.com/prowler-cloud/prowler/pull/8151)
- `organizations_scp_check_deny_regions` check to pass when SCP policies have no statements [(#8091)](https://github.com/prowler-cloud/prowler/pull/8091)
@@ -584,11 +540,9 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Handle empty name in Azure Defender and GCP checks [(#8120)](https://github.com/prowler-cloud/prowler/pull/8120)
### Changed
- Reworked `S3.test_connection` to match the AwsProvider logic [(#8088)](https://github.com/prowler-cloud/prowler/pull/8088)
### Removed
- OCSF version number references to point always to the latest [(#8064)](https://github.com/prowler-cloud/prowler/pull/8064)
---
@@ -596,7 +550,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.7.5] (Prowler v5.7.5)
### Fixed
- Use unified timestamp for all requirements [(#8059)](https://github.com/prowler-cloud/prowler/pull/8059)
- Add EKS to service without subservices [(#7959)](https://github.com/prowler-cloud/prowler/pull/7959)
- `apiserver_strong_ciphers_only` check for K8S provider [(#7952)](https://github.com/prowler-cloud/prowler/pull/7952)
@@ -615,7 +568,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.7.3] (Prowler v5.7.3)
### Fixed
- Automatically encrypt password in Microsoft365 provider [(#7784)](https://github.com/prowler-cloud/prowler/pull/7784)
- Remove last encrypted password appearances [(#7825)](https://github.com/prowler-cloud/prowler/pull/7825)
@@ -624,7 +576,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.7.2] (Prowler v5.7.2)
### Fixed
- `m365_powershell test_credentials` to use sanitized credentials [(#7761)](https://github.com/prowler-cloud/prowler/pull/7761)
- `admincenter_users_admins_reduced_license_footprint` check logic to pass when admin user has no license [(#7779)](https://github.com/prowler-cloud/prowler/pull/7779)
- `m365_powershell` to close the PowerShell sessions in msgraph services [(#7816)](https://github.com/prowler-cloud/prowler/pull/7816)
@@ -637,7 +588,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.7.0] (Prowler v5.7.0)
### Added
- Update the compliance list supported for each provider from docs [(#7694)](https://github.com/prowler-cloud/prowler/pull/7694)
- Allow setting cluster name in in-cluster mode in Kubernetes [(#7695)](https://github.com/prowler-cloud/prowler/pull/7695)
- Prowler ThreatScore for M365 provider [(#7692)](https://github.com/prowler-cloud/prowler/pull/7692)
@@ -656,7 +606,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
- CIS 5.0 compliance framework for AWS [(7766)](https://github.com/prowler-cloud/prowler/pull/7766)
### Fixed
- Update CIS 4.0 for M365 provider [(#7699)](https://github.com/prowler-cloud/prowler/pull/7699)
- Update and upgrade CIS for all the providers [(#7738)](https://github.com/prowler-cloud/prowler/pull/7738)
- Cover policies with conditions with SNS endpoint in `sns_topics_not_publicly_accessible` [(#7750)](https://github.com/prowler-cloud/prowler/pull/7750)
@@ -667,7 +616,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.6.0] (Prowler v5.6.0)
### Added
- SOC2 compliance framework to Azure [(#7489)](https://github.com/prowler-cloud/prowler/pull/7489)
- Check for unused Service Accounts in GCP [(#7419)](https://github.com/prowler-cloud/prowler/pull/7419)
- Powershell to Microsoft365 [(#7331)](https://github.com/prowler-cloud/prowler/pull/7331)
@@ -717,7 +665,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
- Microsoft User and User Credential auth to reports [(#7681)](https://github.com/prowler-cloud/prowler/pull/7681)
### Fixed
- Package name location in pyproject.toml while replicating for prowler-cloud [(#7531)](https://github.com/prowler-cloud/prowler/pull/7531)
- Remove cache in PyPI release action [(#7532)](https://github.com/prowler-cloud/prowler/pull/7532)
- The correct values for logger.info inside iam service [(#7526)](https://github.com/prowler-cloud/prowler/pull/7526)
@@ -738,7 +685,6 @@ All notable changes to the **Prowler SDK** are documented in this file.
## [5.5.1] (Prowler v5.5.1)
### Fixed
- Default name to contacts in Azure Defender [(#7483)](https://github.com/prowler-cloud/prowler/pull/7483)
- Handle projects without ID in GCP [(#7496)](https://github.com/prowler-cloud/prowler/pull/7496)
- Restore packages location in PyProject [(#7510)](https://github.com/prowler-cloud/prowler/pull/7510)
+9 -11
View File
@@ -113,12 +113,12 @@ from prowler.providers.aws.lib.s3.s3 import S3
from prowler.providers.aws.lib.security_hub.security_hub import SecurityHub
from prowler.providers.aws.models import AWSOutputOptions
from prowler.providers.azure.models import AzureOutputOptions
from prowler.providers.cloudflare.models import CloudflareOutputOptions
from prowler.providers.common.provider import Provider
from prowler.providers.common.quick_inventory import run_provider_quick_inventory
from prowler.providers.gcp.models import GCPOutputOptions
from prowler.providers.github.models import GithubOutputOptions
from prowler.providers.iac.models import IACOutputOptions
from prowler.providers.image.models import ImageOutputOptions
from prowler.providers.kubernetes.models import KubernetesOutputOptions
from prowler.providers.llm.models import LLMOutputOptions
from prowler.providers.m365.models import M365OutputOptions
@@ -205,8 +205,8 @@ def prowler():
# Load compliance frameworks
logger.debug("Loading compliance frameworks from .json files")
# Skip compliance frameworks for IAC and LLM providers
if provider != "iac" and provider != "llm":
# Skip compliance frameworks for IAC, LLM, and Image providers
if provider not in ("iac", "llm", "image"):
bulk_compliance_frameworks = Compliance.get_bulk(provider)
# Complete checks metadata with the compliance framework specification
bulk_checks_metadata = update_checks_metadata_with_compliance(
@@ -263,8 +263,8 @@ def prowler():
if not args.only_logs:
global_provider.print_credentials()
# Skip service and check loading for IAC and LLM providers
if provider != "iac" and provider != "llm":
# Skip service and check loading for IAC, LLM, and Image providers
if provider not in ("iac", "llm", "image"):
# Import custom checks from folder
if checks_folder:
custom_checks = parse_checks_from_folder(global_provider, checks_folder)
@@ -333,10 +333,6 @@ def prowler():
output_options = GithubOutputOptions(
args, bulk_checks_metadata, global_provider.identity
)
elif provider == "cloudflare":
output_options = CloudflareOutputOptions(
args, bulk_checks_metadata, global_provider.identity
)
elif provider == "m365":
output_options = M365OutputOptions(
args, bulk_checks_metadata, global_provider.identity
@@ -351,6 +347,8 @@ def prowler():
)
elif provider == "iac":
output_options = IACOutputOptions(args, bulk_checks_metadata)
elif provider == "image":
output_options = ImageOutputOptions(args, bulk_checks_metadata)
elif provider == "llm":
output_options = LLMOutputOptions(args, bulk_checks_metadata)
elif provider == "oraclecloud":
@@ -370,8 +368,8 @@ def prowler():
# Execute checks
findings = []
if provider == "iac" or provider == "llm":
# For IAC and LLM providers, run the scan directly
if provider in ("iac", "llm", "image"):
# For IAC, LLM, and Image providers, run the scan directly
if provider == "llm":
def streaming_callback(findings_batch):
@@ -1,18 +0,0 @@
### Account, Check and/or Region can be * to apply for all the cases.
### Account == <Cloudflare Account ID>
### Region == <Cloudflare Zone ID> (use * for all zones)
### Resources and tags are lists that can have either Regex or Keywords.
### Tags is an optional list that matches on tuples of 'key=value' and are "ANDed" together.
### Use an alternation Regex to match one of multiple tags with "ORed" logic.
### For each check you can except Accounts, Regions, Resources and/or Tags.
########################### MUTELIST EXAMPLE ###########################
Mutelist:
Accounts:
"example-account-id":
Checks:
"zone_dnssec_enabled":
Regions:
- "*"
Resources:
- "example-zone-id"
- "another-zone-id"
+5 -3
View File
@@ -38,7 +38,7 @@ class _MutableTimestamp:
timestamp = _MutableTimestamp(datetime.today())
timestamp_utc = _MutableTimestamp(datetime.now(timezone.utc))
prowler_version = "5.17.2"
prowler_version = "5.17.0"
html_logo_url = "https://github.com/prowler-cloud/prowler/"
square_logo_img = "https://raw.githubusercontent.com/prowler-cloud/prowler/dc7d2d5aeb92fdf12e8604f42ef6472cd3e8e889/docs/img/prowler-logo-black.png"
aws_logo = "https://user-images.githubusercontent.com/38561120/235953920-3e3fba08-0795-41dc-b480-9bea57db9f2e.png"
@@ -53,7 +53,6 @@ class Provider(str, Enum):
AWS = "aws"
GCP = "gcp"
AZURE = "azure"
CLOUDFLARE = "cloudflare"
KUBERNETES = "kubernetes"
M365 = "m365"
GITHUB = "github"
@@ -74,7 +73,10 @@ def get_available_compliance_frameworks(provider=None):
if provider:
providers = [provider]
for provider in providers:
with os.scandir(f"{actual_directory}/../compliance/{provider}") as files:
compliance_dir = f"{actual_directory}/../compliance/{provider}"
if not os.path.isdir(compliance_dir):
continue
with os.scandir(compliance_dir) as files:
for file in files:
if file.is_file() and file.name.endswith(".json"):
available_compliance_frameworks.append(
-9
View File
@@ -510,9 +510,6 @@ gcp:
# gcp.compute_instance_group_multiple_zones
# Minimum number of zones a MIG should span for high availability
mig_min_zones: 2
# gcp.compute_snapshot_not_outdated
# Maximum age in days for disk snapshots before they are considered outdated
max_snapshot_age_days: 90
# GCP Service Account and user-managed keys unused configuration
# gcp.iam_service_account_unused
# gcp.iam_sa_user_managed_key_unused
@@ -595,9 +592,3 @@ github:
mongodbatlas:
# mongodbatlas.organizations_service_account_secrets_expiration --> Maximum hours for service account secrets validity
max_service_account_secret_validity_hours: 8
# Cloudflare Configuration
cloudflare:
# Maximum number of retries for API requests (default is 2)
# Set to 0 to disable retries
max_retries: 3
-2
View File
@@ -688,8 +688,6 @@ def execute(
global_provider.identity.account_id
)
for finding in check_findings:
if global_provider.type == "cloudflare":
is_finding_muted_args["account_id"] = finding.account_id
if global_provider.type == "azure":
is_finding_muted_args["subscription_id"] = (
global_provider.identity.subscriptions.get(finding.subscription)
+40 -68
View File
@@ -163,6 +163,7 @@ class CheckMetadata(BaseModel):
check_id
and values.get("Provider") != "iac"
and values.get("Provider") != "llm"
and values.get("Provider") != "image"
):
service_from_check_id = check_id.split("_")[0]
if service_name != service_from_check_id:
@@ -183,6 +184,7 @@ class CheckMetadata(BaseModel):
check_id
and values.get("Provider") != "iac"
and values.get("Provider") != "llm"
and values.get("Provider") != "image"
):
if "-" in check_id:
raise ValueError(
@@ -728,74 +730,6 @@ class CheckReportGithub(Check_Report):
)
@dataclass
class CheckReportCloudflare(Check_Report):
"""Contains the Cloudflare Check's finding information.
Cloudflare is a global service - zones are resources, not regional contexts.
All zone-related attributes are derived from the zone object passed as resource.
"""
resource_name: str
resource_id: str
_zone: Any # CloudflareZone object
def __init__(
self,
metadata: Dict,
resource: Any,
resource_name: str = None,
resource_id: str = None,
) -> None:
"""Initialize the Cloudflare Check's finding information.
Args:
metadata: Check metadata dictionary
resource: The CloudflareZone resource being checked
resource_name: Override for resource name
resource_id: Override for resource ID
"""
super().__init__(metadata, resource)
# Zone is the resource being checked
self._zone = resource
self.resource_name = resource_name or getattr(
resource, "name", getattr(resource, "resource_name", "")
)
self.resource_id = resource_id or getattr(
resource, "id", getattr(resource, "resource_id", "")
)
@property
def zone(self) -> Any:
"""The CloudflareZone object."""
return self._zone
@property
def zone_id(self) -> str:
"""Zone ID."""
return getattr(self._zone, "id", "")
@property
def zone_name(self) -> str:
"""Zone name."""
return getattr(self._zone, "name", "")
@property
def account_id(self) -> str:
"""Account ID derived from zone's account."""
zone_account = getattr(self._zone, "account", None)
if zone_account:
return getattr(zone_account, "id", "")
return ""
@property
def region(self) -> str:
"""Cloudflare is a global service."""
return "global"
@dataclass
class CheckReportM365(Check_Report):
"""Contains the M365 Check's finding information."""
@@ -859,6 +793,44 @@ class CheckReportIAC(Check_Report):
)
@dataclass
class CheckReportImage(Check_Report):
"""Contains the Container Image Check's finding information using Trivy."""
resource_name: str
image_digest: str
package_name: str
installed_version: str
fixed_version: str
def __init__(
self,
metadata: Optional[dict] = None,
finding: Optional[dict] = None,
image_name: str = "",
) -> None:
"""
Initialize the Container Image Check's finding information from a Trivy vulnerability/secret dict.
Args:
metadata (Dict): Check metadata.
finding (dict): A single vulnerability/secret result from Trivy's JSON output.
image_name (str): The container image name being scanned.
"""
if metadata is None:
metadata = {}
if finding is None:
finding = {}
super().__init__(metadata, finding)
self.resource = finding
self.resource_name = image_name
self.image_digest = finding.get("PkgID", "")
self.package_name = finding.get("PkgName", "")
self.installed_version = finding.get("InstalledVersion", "")
self.fixed_version = finding.get("FixedVersion", "")
@dataclass
class CheckReportLLM(Check_Report):
"""Contains the LLM Check's finding information."""
+2 -2
View File
@@ -14,8 +14,8 @@ def recover_checks_from_provider(
Returns a list of tuples with the following format (check_name, check_path)
"""
try:
# Bypass check loading for IAC provider since it uses Trivy directly
if provider == "iac" or provider == "llm":
# Bypass check loading for IAC, LLM, and Image providers since they use external tools directly
if provider in ("iac", "llm", "image"):
return []
checks = []
+3 -3
View File
@@ -27,21 +27,21 @@ class ProwlerArgumentParser:
self.parser = argparse.ArgumentParser(
prog="prowler",
formatter_class=RawTextHelpFormatter,
usage="prowler [-h] [--version] {aws,azure,gcp,kubernetes,m365,github,nhn,mongodbatlas,oraclecloud,alibabacloud,cloudflare,dashboard,iac} ...",
usage="prowler [-h] [--version] {aws,azure,gcp,kubernetes,m365,github,nhn,mongodbatlas,oraclecloud,alibabacloud,dashboard,iac,image} ...",
epilog="""
Available Cloud Providers:
{aws,azure,gcp,kubernetes,m365,github,iac,llm,nhn,mongodbatlas,oraclecloud,alibabacloud,cloudflare}
{aws,azure,gcp,kubernetes,m365,github,iac,llm,image,nhn,mongodbatlas,oraclecloud,alibabacloud}
aws AWS Provider
azure Azure Provider
gcp GCP Provider
kubernetes Kubernetes Provider
m365 Microsoft 365 Provider
github GitHub Provider
cloudflare Cloudflare Provider
oraclecloud Oracle Cloud Infrastructure Provider
alibabacloud Alibaba Cloud Provider
iac IaC Provider (Beta)
llm LLM Provider (Beta)
image Container Image Provider (PoC)
nhn NHN Provider (Unofficial)
mongodbatlas MongoDB Atlas Provider (Beta)
+17 -11
View File
@@ -342,14 +342,6 @@ class Finding(BaseModel):
output_data["resource_uid"] = check_output.resource_id
output_data["region"] = check_output.region
elif provider.type == "cloudflare":
output_data["auth_method"] = "api_token"
output_data["account_uid"] = check_output.account_id
output_data["account_name"] = check_output.account_id
output_data["resource_name"] = check_output.resource_name
output_data["resource_uid"] = check_output.resource_id
output_data["region"] = check_output.zone_name
elif provider.type == "alibabacloud":
output_data["auth_method"] = get_nested_attribute(
provider, "identity.identity_arn"
@@ -366,6 +358,23 @@ class Finding(BaseModel):
)
output_data["region"] = check_output.region
elif provider.type == "image":
output_data["auth_method"] = provider.auth_method
output_data["account_uid"] = "image"
output_data["account_name"] = "image"
output_data["resource_name"] = getattr(
check_output, "resource_name", ""
)
output_data["resource_uid"] = getattr(check_output, "resource_name", "")
output_data["region"] = getattr(check_output, "region", "container")
output_data["package_name"] = getattr(check_output, "package_name", "")
output_data["installed_version"] = getattr(
check_output, "installed_version", ""
)
output_data["fixed_version"] = getattr(
check_output, "fixed_version", ""
)
# check_output Unique ID
# TODO: move this to a function
# TODO: in Azure, GCP and K8s there are findings without resource_name
@@ -442,9 +451,6 @@ class Finding(BaseModel):
finding.resource_line_range = "" # Set empty for compatibility
elif provider.type == "oraclecloud":
finding.compartment_id = getattr(finding, "compartment_id", "")
elif provider.type == "cloudflare":
finding.zone_name = getattr(resource, "zone_name", resource.name)
finding.account_id = getattr(finding, "account_id", "")
finding.check_metadata = CheckMetadata(
Provider=finding.check_metadata["provider"],

Some files were not shown because too many files have changed in this diff Show More