Compare commits

..

2 Commits

Author SHA1 Message Date
Prowler Bot 43da3fb29e chore(api): Update prowler dependency to v5.23 for release 5.23.0 (#10660)
Co-authored-by: prowler-bot <179230569+prowler-bot@users.noreply.github.com>
Co-authored-by: César Arroba <19954079+cesararroba@users.noreply.github.com>
2026-04-13 15:42:47 +02:00
Adrián Peña e7a67eff5d chore: update pyjwt (#10661) 2026-04-13 15:26:22 +02:00
379 changed files with 8287 additions and 23360 deletions
+1 -1
View File
@@ -145,7 +145,7 @@ SENTRY_RELEASE=local
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT}
#### Prowler release version ####
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.24.3
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.16.0
# Social login credentials
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
@@ -13,15 +13,11 @@ inputs:
poetry-version:
description: 'Poetry version to install'
required: false
default: '2.3.4'
default: '2.1.1'
install-dependencies:
description: 'Install Python dependencies with Poetry'
required: false
default: 'true'
update-lock:
description: 'Run `poetry lock` during setup. Only enable when a prior step mutates pyproject.toml (e.g. API `@master` VCS rewrite). Default: false.'
required: false
default: 'false'
runs:
using: 'composite'
@@ -78,7 +74,7 @@ runs:
grep -A2 -B2 "resolved_reference" poetry.lock
- name: Update poetry.lock (prowler repo only)
if: github.repository == 'prowler-cloud/prowler' && inputs.update-lock == 'true'
if: github.repository == 'prowler-cloud/prowler'
shell: bash
working-directory: ${{ inputs.working-directory }}
run: poetry lock
-2
View File
@@ -13,8 +13,6 @@ env:
PROWLER_VERSION: ${{ github.event.release.tag_name }}
BASE_BRANCH: master
permissions: {}
jobs:
detect-release-type:
runs-on: ubuntu-latest
-3
View File
@@ -17,8 +17,6 @@ concurrency:
env:
API_WORKING_DIR: ./api
permissions: {}
jobs:
api-code-quality:
runs-on: ubuntu-latest
@@ -69,7 +67,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
working-directory: ./api
update-lock: 'true'
- name: Poetry check
if: steps.check-changes.outputs.any_changed == 'true'
-2
View File
@@ -24,8 +24,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
api-analyze:
name: CodeQL Security Analysis
@@ -33,8 +33,6 @@ env:
PROWLERCLOUD_DOCKERHUB_REPOSITORY: prowlercloud
PROWLERCLOUD_DOCKERHUB_IMAGE: prowler-api
permissions: {}
jobs:
setup:
if: github.repository == 'prowler-cloud/prowler'
@@ -18,8 +18,6 @@ env:
API_WORKING_DIR: ./api
IMAGE_NAME: prowler-api
permissions: {}
jobs:
api-dockerfile-lint:
if: github.repository == 'prowler-cloud/prowler'
-3
View File
@@ -17,8 +17,6 @@ concurrency:
env:
API_WORKING_DIR: ./api
permissions: {}
jobs:
api-security-scans:
runs-on: ubuntu-latest
@@ -72,7 +70,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
working-directory: ./api
update-lock: 'true'
- name: Bandit
if: steps.check-changes.outputs.any_changed == 'true'
-3
View File
@@ -30,8 +30,6 @@ env:
VALKEY_DB: 0
API_WORKING_DIR: ./api
permissions: {}
jobs:
api-tests:
runs-on: ubuntu-latest
@@ -118,7 +116,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
working-directory: ./api
update-lock: 'true'
- name: Run tests with pytest
if: steps.check-changes.outputs.any_changed == 'true'
-2
View File
@@ -17,8 +17,6 @@ env:
BACKPORT_LABEL_PREFIX: backport-to-
BACKPORT_LABEL_IGNORE: was-backported
permissions: {}
jobs:
backport:
if: github.event.pull_request.merged == true && !(contains(github.event.pull_request.labels.*.name, 'backport')) && !(contains(github.event.pull_request.labels.*.name, 'was-backported'))
-2
View File
@@ -21,8 +21,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
zizmor:
if: github.repository == 'prowler-cloud/prowler'
@@ -9,8 +9,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.issue.number }}
cancel-in-progress: false
permissions: {}
jobs:
update-labels:
if: contains(github.event.issue.labels.*.name, 'status/awaiting-response')
@@ -16,8 +16,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions: {}
jobs:
conventional-commit-check:
runs-on: ubuntu-latest
@@ -13,8 +13,6 @@ env:
BACKPORT_LABEL_PREFIX: backport-to-
BACKPORT_LABEL_COLOR: B60205
permissions: {}
jobs:
create-label:
runs-on: ubuntu-latest
-2
View File
@@ -13,8 +13,6 @@ env:
PROWLER_VERSION: ${{ github.event.release.tag_name }}
BASE_BRANCH: master
permissions: {}
jobs:
detect-release-type:
runs-on: ubuntu-latest
-2
View File
@@ -14,8 +14,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
scan-secrets:
runs-on: ubuntu-latest
-2
View File
@@ -21,8 +21,6 @@ concurrency:
env:
CHART_PATH: contrib/k8s/helm/prowler-app
permissions: {}
jobs:
helm-lint:
if: github.repository == 'prowler-cloud/prowler'
-2
View File
@@ -13,8 +13,6 @@ concurrency:
env:
CHART_PATH: contrib/k8s/helm/prowler-app
permissions: {}
jobs:
release-helm-chart:
if: github.repository == 'prowler-cloud/prowler'
@@ -9,8 +9,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.issue.number }}
cancel-in-progress: false
permissions: {}
jobs:
lock:
if: |
-2
View File
@@ -15,8 +15,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions: {}
jobs:
labeler:
runs-on: ubuntu-latest
@@ -32,8 +32,6 @@ env:
PROWLERCLOUD_DOCKERHUB_REPOSITORY: prowlercloud
PROWLERCLOUD_DOCKERHUB_IMAGE: prowler-mcp
permissions: {}
jobs:
setup:
if: github.repository == 'prowler-cloud/prowler'
@@ -18,8 +18,6 @@ env:
MCP_WORKING_DIR: ./mcp_server
IMAGE_NAME: prowler-mcp
permissions: {}
jobs:
mcp-dockerfile-lint:
if: github.repository == 'prowler-cloud/prowler'
@@ -89,9 +87,6 @@ jobs:
api.github.com:443
mirror.gcr.io:443
check.trivy.dev:443
get.trivy.dev:443
release-assets.githubusercontent.com:443
objects.githubusercontent.com:443
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-2
View File
@@ -14,8 +14,6 @@ env:
PYTHON_VERSION: "3.12"
WORKING_DIRECTORY: ./mcp_server
permissions: {}
jobs:
validate-release:
if: github.repository == 'prowler-cloud/prowler'
-2
View File
@@ -16,8 +16,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions: {}
jobs:
check-changelog:
if: contains(github.event.pull_request.labels.*.name, 'no-changelog') == false
@@ -16,8 +16,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions: {}
jobs:
check-compliance-mapping:
if: contains(github.event.pull_request.labels.*.name, 'no-compliance-check') == false
@@ -15,8 +15,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions: {}
jobs:
check-conflicts:
runs-on: ubuntu-latest
-2
View File
@@ -12,8 +12,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: false
permissions: {}
jobs:
trigger-cloud-pull-request:
if: |
+7 -5
View File
@@ -17,8 +17,6 @@ concurrency:
env:
PROWLER_VERSION: ${{ inputs.prowler_version }}
permissions: {}
jobs:
prepare-release:
if: github.event_name == 'workflow_dispatch' && github.repository == 'prowler-cloud/prowler'
@@ -40,11 +38,15 @@ jobs:
token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }}
persist-credentials: false
- name: Setup Python with Poetry
uses: ./.github/actions/setup-python-poetry
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
install-dependencies: 'false'
- name: Install Poetry
run: |
python3 -m pip install --user poetry==2.1.1
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Configure Git
run: |
-2
View File
@@ -13,8 +13,6 @@ env:
PROWLER_VERSION: ${{ github.event.release.tag_name }}
BASE_BRANCH: master
permissions: {}
jobs:
detect-release-type:
runs-on: ubuntu-latest
@@ -10,8 +10,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
check-duplicate-test-names:
if: github.repository == 'prowler-cloud/prowler'
+13 -4
View File
@@ -14,8 +14,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
sdk-code-quality:
if: github.repository == 'prowler-cloud/prowler'
@@ -71,11 +69,22 @@ jobs:
contrib/**
**/AGENTS.md
- name: Setup Python with Poetry
- name: Install Poetry
if: steps.check-changes.outputs.any_changed == 'true'
uses: ./.github/actions/setup-python-poetry
run: pipx install poetry==2.1.1
- name: Set up Python ${{ matrix.python-version }}
if: steps.check-changes.outputs.any_changed == 'true'
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
- name: Install dependencies
if: steps.check-changes.outputs.any_changed == 'true'
run: |
poetry install --no-root
poetry run pip list
- name: Check Poetry lock file
if: steps.check-changes.outputs.any_changed == 'true'
-2
View File
@@ -30,8 +30,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
sdk-analyze:
if: github.repository == 'prowler-cloud/prowler'
@@ -47,8 +47,6 @@ env:
# AWS configuration (for ECR)
AWS_REGION: us-east-1
permissions: {}
jobs:
setup:
if: github.repository == 'prowler-cloud/prowler'
@@ -76,14 +74,15 @@ jobs:
with:
persist-credentials: false
- name: Setup Python with Poetry
uses: ./.github/actions/setup-python-poetry
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
install-dependencies: 'false'
- name: Inject poetry-bumpversion plugin
run: pipx inject poetry poetry-bumpversion
- name: Install Poetry
run: |
pipx install poetry==2.1.1
pipx inject poetry poetry-bumpversion
- name: Get Prowler version and set tags
id: get-prowler-version
@@ -17,8 +17,6 @@ concurrency:
env:
IMAGE_NAME: prowler
permissions: {}
jobs:
sdk-dockerfile-lint:
if: github.repository == 'prowler-cloud/prowler'
@@ -87,7 +85,6 @@ jobs:
check.trivy.dev:443
debian.map.fastlydns.net:80
release-assets.githubusercontent.com:443
objects.githubusercontent.com:443
pypi.org:443
files.pythonhosted.org:443
www.powershellgallery.com:443
+10 -8
View File
@@ -13,8 +13,6 @@ env:
RELEASE_TAG: ${{ github.event.release.tag_name }}
PYTHON_VERSION: '3.12'
permissions: {}
jobs:
validate-release:
if: github.repository == 'prowler-cloud/prowler'
@@ -75,11 +73,13 @@ jobs:
with:
persist-credentials: false
- name: Setup Python with Poetry
uses: ./.github/actions/setup-python-poetry
- name: Install Poetry
run: pipx install poetry==2.1.1
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
install-dependencies: 'false'
- name: Build Prowler package
run: poetry build
@@ -111,11 +111,13 @@ jobs:
with:
persist-credentials: false
- name: Setup Python with Poetry
uses: ./.github/actions/setup-python-poetry
- name: Install Poetry
run: pipx install poetry==2.1.1
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
install-dependencies: 'false'
- name: Install toml package
run: pip install toml
@@ -13,8 +13,6 @@ env:
PYTHON_VERSION: '3.12'
AWS_REGION: 'us-east-1'
permissions: {}
jobs:
refresh-aws-regions:
if: github.repository == 'prowler-cloud/prowler'
@@ -12,8 +12,6 @@ concurrency:
env:
PYTHON_VERSION: '3.12'
permissions: {}
jobs:
refresh-oci-regions:
if: github.repository == 'prowler-cloud/prowler'
+11 -4
View File
@@ -14,8 +14,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
sdk-security-scans:
if: github.repository == 'prowler-cloud/prowler'
@@ -71,11 +69,20 @@ jobs:
contrib/**
**/AGENTS.md
- name: Setup Python with Poetry
- name: Install Poetry
if: steps.check-changes.outputs.any_changed == 'true'
uses: ./.github/actions/setup-python-poetry
run: pipx install poetry==2.1.1
- name: Set up Python 3.12
if: steps.check-changes.outputs.any_changed == 'true'
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
cache: 'poetry'
- name: Install dependencies
if: steps.check-changes.outputs.any_changed == 'true'
run: poetry install --no-root
- name: Security scan with Bandit
if: steps.check-changes.outputs.any_changed == 'true'
+11 -4
View File
@@ -14,8 +14,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
sdk-tests:
if: github.repository == 'prowler-cloud/prowler'
@@ -92,11 +90,20 @@ jobs:
contrib/**
**/AGENTS.md
- name: Setup Python with Poetry
- name: Install Poetry
if: steps.check-changes.outputs.any_changed == 'true'
uses: ./.github/actions/setup-python-poetry
run: pipx install poetry==2.1.1
- name: Set up Python ${{ matrix.python-version }}
if: steps.check-changes.outputs.any_changed == 'true'
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
- name: Install dependencies
if: steps.check-changes.outputs.any_changed == 'true'
run: poetry install --no-root
# AWS Provider
- name: Check if AWS files changed
@@ -31,8 +31,6 @@ on:
description: "Whether there are UI E2E tests to run"
value: ${{ jobs.analyze.outputs.has-ui-e2e }}
permissions: {}
jobs:
analyze:
runs-on: ubuntu-latest
-2
View File
@@ -13,8 +13,6 @@ env:
PROWLER_VERSION: ${{ github.event.release.tag_name }}
BASE_BRANCH: master
permissions: {}
jobs:
detect-release-type:
runs-on: ubuntu-latest
-2
View File
@@ -26,8 +26,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
ui-analyze:
if: github.repository == 'prowler-cloud/prowler'
@@ -35,8 +35,6 @@ env:
# Build args
NEXT_PUBLIC_API_BASE_URL: http://prowler-api:8080/api/v1
permissions: {}
jobs:
setup:
if: github.repository == 'prowler-cloud/prowler'
@@ -18,8 +18,6 @@ env:
UI_WORKING_DIR: ./ui
IMAGE_NAME: prowler-ui
permissions: {}
jobs:
ui-dockerfile-lint:
if: github.repository == 'prowler-cloud/prowler'
@@ -91,8 +89,6 @@ jobs:
mirror.gcr.io:443
check.trivy.dev:443
get.trivy.dev:443
release-assets.githubusercontent.com:443
objects.githubusercontent.com:443
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-2
View File
@@ -15,8 +15,6 @@ on:
- 'ui/**'
- 'api/**' # API changes can affect UI E2E
permissions: {}
jobs:
# First, analyze which tests need to run
impact-analysis:
-2
View File
@@ -18,8 +18,6 @@ env:
UI_WORKING_DIR: ./ui
NODE_VERSION: '24.13.0'
permissions: {}
jobs:
ui-tests:
runs-on: ubuntu-latest
-1
View File
@@ -84,7 +84,6 @@ continue.json
.continuerc.json
# AI Coding Assistants - OpenCode
.opencode/
opencode.json
# AI Coding Assistants - GitHub Copilot
+1 -1
View File
@@ -70,7 +70,7 @@ repos:
args: ["--ignore=E266,W503,E203,E501,W605"]
- repo: https://github.com/python-poetry/poetry
rev: 2.3.4
rev: 2.1.1
hooks:
- id: poetry-check
name: API - poetry-check
+1 -1
View File
@@ -13,7 +13,7 @@ build:
post_create_environment:
# Install poetry
# https://python-poetry.org/docs/#installing-manually
- python -m pip install poetry==2.3.4
- python -m pip install poetry
post_install:
# Install dependencies with 'docs' dependency group
# https://python-poetry.org/docs/managing-dependencies/#dependency-groups
+3 -3
View File
@@ -140,7 +140,7 @@ Prowler is an open-source cloud security assessment tool supporting AWS, Azure,
| Component | Location | Tech Stack |
|-----------|----------|------------|
| SDK | `prowler/` | Python 3.10+, Poetry 2.3+ |
| SDK | `prowler/` | Python 3.10+, Poetry |
| API | `api/` | Django 5.1, DRF, Celery |
| UI | `ui/` | Next.js 15, React 19, Tailwind 4 |
| MCP Server | `mcp_server/` | FastMCP, Python 3.12+ |
@@ -153,12 +153,12 @@ Prowler is an open-source cloud security assessment tool supporting AWS, Azure,
```bash
# Setup
poetry install --with dev
poetry run prek install
poetry run pre-commit install
# Code quality
poetry run make lint
poetry run make format
poetry run prek run --all-files
poetry run pre-commit run --all-files
```
---
+1 -1
View File
@@ -68,7 +68,7 @@ ENV HOME='/home/prowler'
ENV PATH="${HOME}/.local/bin:${PATH}"
#hadolint ignore=DL3013
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry==2.3.4
pip install --no-cache-dir poetry
RUN poetry install --compile && \
rm -rf ~/.cache/pip
+8 -1
View File
@@ -246,7 +246,14 @@ Some pre-commit hooks require tools installed on your system:
1. **Install [TruffleHog](https://github.com/trufflesecurity/trufflehog#install)** (secret scanning) — see the [official installation options](https://github.com/trufflesecurity/trufflehog#install).
2. **Install [Hadolint](https://github.com/hadolint/hadolint#install)** (Dockerfile linting) — see the [official installation options](https://github.com/hadolint/hadolint#install).
2. **Install [Safety](https://github.com/pyupio/safety)** (dependency vulnerability checking):
```console
# Requires a Python environment (e.g. via pyenv)
pip install safety
```
3. **Install [Hadolint](https://github.com/hadolint/hadolint#install)** (Dockerfile linting) — see the [official installation options](https://github.com/hadolint/hadolint#install).
## Prowler CLI
### Pip package
-60
View File
@@ -2,65 +2,6 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.25.3] (Prowler v5.24.3)
### 🚀 Added
- `/overviews/findings`, `/overviews/findings-severity` and `/overviews/services` now reflect newly-muted findings without waiting for the next scan. The post-mute `reaggregate-all-finding-group-summaries` task was extended to re-run the same per-scan pipeline that scan completion runs (`ScanSummary`, `DailySeveritySummary`, `FindingGroupDailySummary`) on the latest scan of every `(provider, day)` pair, keeping the pre-aggregated tables in sync with `Finding.muted` updates [(#10827)](https://github.com/prowler-cloud/prowler/pull/10827)
### 🐞 Fixed
- Finding groups aggregated `status` now treats muted findings as resolved: a group is `FAIL` only while at least one non-muted FAIL remains, otherwise it is `PASS` (including fully-muted groups). The `filter[status]` filter and the `sort=status` ordering share the same semantics, keeping `status` consistent with `fail_count` and the orthogonal `muted` flag [(#10825)](https://github.com/prowler-cloud/prowler/pull/10825)
- `aggregate_findings` is now idempotent: it deletes the scan's existing `ScanSummary` rows before `bulk_create`, so re-runs (such as the post-mute reaggregation pipeline) no longer violate the `unique_scan_summary` constraint and no longer abort the downstream `DailySeveritySummary` / `FindingGroupDailySummary` recomputation for the affected scan [(#10827)](https://github.com/prowler-cloud/prowler/pull/10827)
- Attack Paths: Findings on AWS were silently dropped during the Neo4j merge for resources whose Cartography node is keyed by a short identifier (e.g. EC2 instances) rather than the full ARN [(#10839)](https://github.com/prowler-cloud/prowler/pull/10839)
---
## [1.25.2] (Prowler v5.24.2)
### 🔄 Changed
- Finding groups `/resources` endpoints now materialize the filtered finding IDs into a Python list before filtering `ResourceFindingMapping`, so PostgreSQL switches from a Merge Semi Join that read hundreds of thousands of RFM index entries to a Nested Loop Index Scan over `finding_id`. The `has_mappings.exists()` pre-check is removed, and a request-scoped cache deduplicates the finding-id round-trip across the helpers that build different RFM querysets [(#10816)](https://github.com/prowler-cloud/prowler/pull/10816)
### 🐞 Fixed
- `/finding-groups/latest/<check_id>/resources` now selects the latest completed scan per provider by `-completed_at` (then `-inserted_at`) instead of `-inserted_at`, matching the `/finding-groups/latest` summary path and the daily-summary upsert so overlapping scans no longer produce diverging `delta`/`new_count` between the two endpoints [(#10802)](https://github.com/prowler-cloud/prowler/pull/10802)
---
## [1.25.1] (Prowler v5.24.1)
### 🔄 Changed
- Attack Paths: Restore `SYNC_BATCH_SIZE` and `FINDINGS_BATCH_SIZE` defaults to 1000, upgrade Cartography to 0.135.0, enable Celery queue priority for cleanup task, rewrite Finding insertion, remove AWS graph cleanup and add timing logs [(#10729)](https://github.com/prowler-cloud/prowler/pull/10729)
### 🐞 Fixed
- Finding group resources endpoints now include findings without associated resources (orphaned IaC findings) as simulated resource rows, and return one row per finding when multiple findings share a resource [(#10708)](https://github.com/prowler-cloud/prowler/pull/10708)
- Attack Paths: Missing `tenant_id` filter while getting related findings after scan completes [(#10722)](https://github.com/prowler-cloud/prowler/pull/10722)
- Finding group counters `pass_count`, `fail_count` and `manual_count` now exclude muted findings [(#10753)](https://github.com/prowler-cloud/prowler/pull/10753)
- Silent data loss in `ResourceFindingMapping` bulk insert that left findings orphaned when `INSERT ... ON CONFLICT DO NOTHING` dropped rows without raising; added explicit `unique_fields` [(#10724)](https://github.com/prowler-cloud/prowler/pull/10724)
---
## [1.25.0] (Prowler v5.24.0)
### 🔄 Changed
- Bump Poetry to `2.3.4` in Dockerfile and pre-commit hooks. Regenerate `api/poetry.lock` [(#10681)](https://github.com/prowler-cloud/prowler/pull/10681)
- Attack Paths: Remove dead `cleanup_findings` no-op and its supporting `prowler_finding_lastupdated` index [(#10684)](https://github.com/prowler-cloud/prowler/pull/10684)
### 🐞 Fixed
- Worker-beat race condition on cold start: replaced `sleep 15` with API service healthcheck dependency (Docker Compose) and init containers (Helm), aligned Gunicorn default port to `8080` [(#10603)](https://github.com/prowler-cloud/prowler/pull/10603)
- API container startup crash on Linux due to root-owned bind-mount preventing JWT key generation [(#10646)](https://github.com/prowler-cloud/prowler/pull/10646)
### 🔐 Security
- `pytest` from 8.2.2 to 9.0.3 to fix CVE-2025-71176 [(#10678)](https://github.com/prowler-cloud/prowler/pull/10678)
---
## [1.24.0] (Prowler v5.23.0)
### 🚀 Added
@@ -71,7 +12,6 @@ All notable changes to the **Prowler API** are documented in this file.
- Finding groups list and latest endpoints support `sort=delta`, ordering by `new_count` then `changed_count` so groups with the most new findings rank highest [(#10606)](https://github.com/prowler-cloud/prowler/pull/10606)
- Finding group resources endpoints (`/finding-groups/{check_id}/resources` and `/finding-groups/latest/{check_id}/resources`) now expose `finding_id` per row, pointing to the most recent matching Finding for each resource. UUIDv7 ordering guarantees `Max(finding__id)` resolves to the latest snapshot [(#10630)](https://github.com/prowler-cloud/prowler/pull/10630)
- Handle CIS and CISA SCuBA compliance framework from google workspace [(#10629)](https://github.com/prowler-cloud/prowler/pull/10629)
- Sort support for all finding group counter fields: `pass_muted_count`, `fail_muted_count`, `manual_muted_count`, and all `new_*`/`changed_*` status-mute breakdown counters [(#10655)](https://github.com/prowler-cloud/prowler/pull/10655)
### 🔄 Changed
+1 -1
View File
@@ -71,7 +71,7 @@ RUN mkdir -p /tmp/prowler_api_output
COPY pyproject.toml ./
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir poetry==2.3.4
pip install --no-cache-dir poetry
ENV PATH="/home/prowler/.local/bin:$PATH"
+1
View File
@@ -56,6 +56,7 @@ start_worker() {
start_worker_beat() {
echo "Starting the worker-beat..."
sleep 15
poetry run python -m celery -A config.celery beat -l "${DJANGO_LOGGING_LEVEL:-info}" --scheduler django_celery_beat.schedulers:DatabaseScheduler
}
+432 -115
View File
File diff suppressed because it is too large Load Diff
+5 -5
View File
@@ -25,7 +25,7 @@ dependencies = [
"defusedxml==0.7.1",
"gunicorn==23.0.0",
"lxml==5.3.2",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.24",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.23",
"psycopg2-binary==2.9.9",
"pytest-celery[redis] (==1.3.0)",
"sentry-sdk[django] (==2.56.0)",
@@ -38,7 +38,7 @@ dependencies = [
"matplotlib (==3.10.8)",
"reportlab (==4.4.10)",
"neo4j (==6.1.0)",
"cartography (==0.135.0)",
"cartography (==0.132.0)",
"gevent (==25.9.1)",
"werkzeug (==3.1.7)",
"sqlparse (==0.5.5)",
@@ -50,7 +50,7 @@ name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
version = "1.25.3"
version = "1.24.0"
[project.scripts]
celery = "src.backend.config.settings.celery"
@@ -62,9 +62,10 @@ django-silk = "5.3.2"
docker = "7.1.0"
filelock = "3.20.3"
freezegun = "1.5.1"
marshmallow = "==3.26.2"
mypy = "1.10.1"
pylint = "3.2.5"
pytest = "9.0.3"
pytest = "8.2.2"
pytest-cov = "5.0.0"
pytest-django = "4.8.0"
pytest-env = "1.1.3"
@@ -74,4 +75,3 @@ ruff = "0.5.0"
safety = "3.7.0"
tqdm = "4.67.1"
vulture = "2.14"
prek = "0.3.9"
@@ -1,23 +0,0 @@
from django.db import migrations
TASK_NAME = "attack-paths-cleanup-stale-scans"
def set_cleanup_priority(apps, schema_editor):
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")
PeriodicTask.objects.filter(name=TASK_NAME).update(priority=0)
def unset_cleanup_priority(apps, schema_editor):
PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask")
PeriodicTask.objects.filter(name=TASK_NAME).update(priority=None)
class Migration(migrations.Migration):
dependencies = [
("api", "0089_backfill_finding_group_status_muted"),
]
operations = [
migrations.RunPython(set_cleanup_priority, unset_cleanup_priority),
]
+1 -1
View File
@@ -1,7 +1,7 @@
openapi: 3.0.3
info:
title: Prowler API
version: 1.25.3
version: 1.24.0
description: |-
Prowler API specification.
+8 -275
View File
@@ -57,7 +57,6 @@ from api.models import (
ProviderGroupMembership,
ProviderSecret,
Resource,
ResourceFindingMapping,
Role,
RoleProviderGroupRelationship,
SAMLConfiguration,
@@ -15446,15 +15445,15 @@ class TestFindingGroupViewSet:
# iam_password_policy has only PASS findings
assert data[0]["attributes"]["status"] == "PASS"
def test_finding_groups_fully_muted_group_is_pass(
def test_finding_groups_fully_muted_group_reflects_underlying_status(
self, authenticated_client, finding_groups_fixture
):
"""A fully-muted group reports status=PASS and muted=True.
"""A fully-muted group still surfaces its underlying status (no MUTED).
rds_encryption has 2 muted FAIL findings. Muted findings are treated
as resolved/accepted, so the group is no longer actionable and its
status must be PASS. The `muted` flag is True because every finding
in the group is muted.
rds_encryption has 2 muted FAIL findings, so the group must report
status=FAIL (the orthogonal `muted` boolean signals it isn't actionable).
The status×muted breakdown lets clients answer 'how many failing
findings are muted in this group'.
"""
response = authenticated_client.get(
reverse("finding-group-list"),
@@ -15464,9 +15463,9 @@ class TestFindingGroupViewSet:
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
assert attrs["status"] == "PASS"
assert attrs["status"] == "FAIL"
assert attrs["muted"] is True
assert attrs["fail_count"] == 0
assert attrs["fail_count"] == 2
assert attrs["fail_muted_count"] == 2
assert attrs["pass_muted_count"] == 0
assert attrs["manual_muted_count"] == 0
@@ -15479,83 +15478,6 @@ class TestFindingGroupViewSet:
== attrs["muted_count"]
)
def test_finding_groups_status_ignores_muted_failures(
self,
authenticated_client,
tenants_fixture,
scans_fixture,
resources_fixture,
):
"""Muted FAIL findings must not drive the aggregated status.
When a group mixes one non-muted PASS with one muted FAIL, the
actionable outcome is PASS: there are no unmuted failures left. The
aggregated `status` must reflect that (not FAIL), while `muted`
stays False because the group still has a non-muted finding.
"""
tenant = tenants_fixture[0]
scan1, *_ = scans_fixture
resource1, *_ = resources_fixture
pass_finding = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_mixed_muted_pass",
scan=scan1,
delta=None,
status=Status.PASS,
severity=Severity.low,
impact=Severity.low,
check_id="mixed_muted_check",
check_metadata={
"CheckId": "mixed_muted_check",
"checktitle": "Mixed muted check",
"Description": "Fixture for muted status aggregation.",
},
first_seen_at="2024-01-11T00:00:00Z",
muted=False,
)
pass_finding.add_resources([resource1])
fail_muted_finding = Finding.objects.create(
tenant_id=tenant.id,
uid="fg_mixed_muted_fail",
scan=scan1,
delta=None,
status=Status.FAIL,
severity=Severity.high,
impact=Severity.high,
check_id="mixed_muted_check",
check_metadata={
"CheckId": "mixed_muted_check",
"checktitle": "Mixed muted check",
"Description": "Fixture for muted status aggregation.",
},
first_seen_at="2024-01-12T00:00:00Z",
muted=True,
)
fail_muted_finding.add_resources([resource1])
# filter[region] forces finding-level aggregation so we exercise the
# raw-findings path without touching the daily summary fixture.
response = authenticated_client.get(
reverse("finding-group-list"),
{
"filter[inserted_at]": TODAY,
"filter[check_id]": "mixed_muted_check",
"filter[region]": "us-east-1",
},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
assert attrs["status"] == "PASS"
assert attrs["muted"] is False
assert attrs["pass_count"] == 1
assert attrs["fail_count"] == 0
assert attrs["fail_muted_count"] == 1
assert attrs["muted_count"] == 1
def test_finding_groups_status_filter(
self, authenticated_client, finding_groups_fixture
):
@@ -16108,36 +16030,6 @@ class TestFindingGroupViewSet:
# s3_bucket_public_access has 2 findings with 2 different resources
assert len(data) == 2
def test_resources_id_matches_resource_id_for_mapped_findings(
self, authenticated_client, finding_groups_fixture
):
"""Findings with a resource expose the resource id as row id (hot path contract)."""
response = authenticated_client.get(
reverse(
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
),
{"filter[inserted_at]": TODAY},
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert data, "expected resources in response"
resource_ids = set(
ResourceFindingMapping.objects.filter(
finding__check_id="s3_bucket_public_access",
).values_list("resource_id", flat=True)
)
finding_ids = set(
Finding.objects.filter(
check_id="s3_bucket_public_access",
).values_list("id", flat=True)
)
returned_ids = {item["id"] for item in data}
assert returned_ids <= {str(rid) for rid in resource_ids}
assert returned_ids.isdisjoint({str(fid) for fid in finding_ids})
def test_resources_fields(self, authenticated_client, finding_groups_fixture):
"""Test resource fields (uid, name, service, region, type) have valid values."""
response = authenticated_client.get(
@@ -17154,57 +17046,6 @@ class TestFindingGroupViewSet:
# Descending boolean: True (1) before False (0)
assert muted_values == sorted(muted_values, reverse=True)
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
)
@pytest.mark.parametrize(
"sort_field",
[
"pass_muted_count",
"fail_muted_count",
"manual_muted_count",
"new_fail_count",
"new_fail_muted_count",
"new_pass_count",
"new_pass_muted_count",
"new_manual_count",
"new_manual_muted_count",
"changed_fail_count",
"changed_fail_muted_count",
"changed_pass_count",
"changed_pass_muted_count",
"changed_manual_count",
"changed_manual_muted_count",
],
)
def test_finding_groups_sort_by_counter_fields(
self,
authenticated_client,
finding_groups_fixture,
endpoint_name,
sort_field,
):
"""All counter fields are accepted as sort parameters (asc and desc)."""
params = {"sort": f"-{sort_field}"}
if endpoint_name == "finding-group-list":
params["filter[inserted_at]"] = TODAY
response = authenticated_client.get(reverse(endpoint_name), params)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) > 0
desc_values = [item["attributes"][sort_field] for item in data]
assert desc_values == sorted(desc_values, reverse=True)
params["sort"] = sort_field
response = authenticated_client.get(reverse(endpoint_name), params)
assert response.status_code == status.HTTP_200_OK
asc_values = [
item["attributes"][sort_field] for item in response.json()["data"]
]
assert asc_values == sorted(asc_values)
@pytest.mark.parametrize(
"endpoint_name", ["finding-group-list", "finding-group-latest"]
)
@@ -17348,111 +17189,3 @@ class TestFindingGroupViewSet:
attrs = item["attributes"]
assert "finding_id" in attrs
assert attrs["finding_id"] in rds_finding_ids
def test_latest_resources_picks_scan_by_completed_at_when_overlap(
self,
authenticated_client,
tenants_fixture,
providers_fixture,
resources_fixture,
):
"""Overlapping scans on the same provider must resolve to the scan
with the latest completed_at, matching the /latest summary path and
the daily-summary upsert (keyed on midnight(completed_at)). Picking
by inserted_at here produced /resources and /latest reading from
different scans and reporting diverging delta/new counts.
"""
tenant = tenants_fixture[0]
provider = providers_fixture[0]
resource = resources_fixture[0]
check_id = "overlap_regression_check"
t0 = datetime.now(timezone.utc) - timedelta(hours=5)
t1 = t0 + timedelta(hours=1)
t1_end = t1 + timedelta(minutes=30)
t2 = t0 + timedelta(hours=4)
scan_long = Scan.objects.create(
name="long overlap scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
started_at=t0,
completed_at=t2,
)
scan_short = Scan.objects.create(
name="short overlap scan",
provider=provider,
trigger=Scan.TriggerChoices.MANUAL,
state=StateChoices.COMPLETED,
tenant_id=tenant.id,
started_at=t1,
completed_at=t1_end,
)
# inserted_at is auto_now_add so override with .update() to recreate
# the overlap shape: short scan inserted later but completed earlier.
Scan.all_objects.filter(pk=scan_long.pk).update(inserted_at=t0)
Scan.all_objects.filter(pk=scan_short.pk).update(inserted_at=t1)
scan_long.refresh_from_db()
scan_short.refresh_from_db()
assert scan_short.inserted_at > scan_long.inserted_at
assert scan_long.completed_at > scan_short.completed_at
long_finding = Finding.objects.create(
tenant_id=tenant.id,
uid=f"{check_id}_long",
scan=scan_long,
delta=None,
status=Status.FAIL,
status_extended="long scan finding",
impact=Severity.high,
impact_extended="high",
severity=Severity.high,
raw_result={"status": Status.FAIL, "severity": Severity.high},
check_id=check_id,
check_metadata={
"CheckId": check_id,
"checktitle": "Overlap regression",
"Description": "Overlapping scan regression.",
},
first_seen_at=t0,
muted=False,
)
long_finding.add_resources([resource])
short_finding = Finding.objects.create(
tenant_id=tenant.id,
uid=f"{check_id}_short",
scan=scan_short,
delta="new",
status=Status.FAIL,
status_extended="short scan finding",
impact=Severity.high,
impact_extended="high",
severity=Severity.high,
raw_result={"status": Status.FAIL, "severity": Severity.high},
check_id=check_id,
check_metadata={
"CheckId": check_id,
"checktitle": "Overlap regression",
"Description": "Overlapping scan regression.",
},
first_seen_at=t1,
muted=False,
)
short_finding.add_resources([resource])
response = authenticated_client.get(
reverse(
"finding-group-latest_resources",
kwargs={"check_id": check_id},
),
)
assert response.status_code == status.HTTP_200_OK
data = response.json()["data"]
assert len(data) == 1
attrs = data[0]["attributes"]
assert attrs["finding_id"] == str(long_finding.id)
assert attrs["delta"] is None
+2 -3
View File
@@ -4225,11 +4225,10 @@ class FindingGroupResourceSerializer(BaseSerializerV1):
Serializer for Finding Group Resources - resources within a finding group.
Returns individual resources with their current status, severity,
and timing information. Orphan findings (without any resource) expose the
finding id as `id` so the row stays identifiable in the UI.
and timing information.
"""
id = serializers.UUIDField(source="row_id")
id = serializers.UUIDField(source="resource_id")
resource = serializers.SerializerMethodField()
provider = serializers.SerializerMethodField()
finding_id = serializers.UUIDField()
+62 -309
View File
@@ -35,13 +35,11 @@ from django.db.models import (
CharField,
Count,
DecimalField,
Exists,
ExpressionWrapper,
F,
IntegerField,
Max,
Min,
OuterRef,
Prefetch,
Q,
QuerySet,
@@ -417,7 +415,7 @@ class SchemaView(SpectacularAPIView):
def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.25.3"
spectacular_settings.VERSION = "1.24.0"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)
@@ -7127,16 +7125,17 @@ class FindingGroupViewSet(BaseRLSViewSet):
output_field=IntegerField(),
)
# `pass_count`, `fail_count` and `manual_count` only count non-muted
# findings. Muted findings are tracked separately via the
# `*_muted_count` fields.
# `pass_count`, `fail_count` and `manual_count` count *every* finding
# for the check (muted or not) so the aggregated `status` reflects the
# underlying check outcome regardless of mute state. Whether the group
# is actionable is signalled by the orthogonal `muted` flag below.
return (
queryset.values("check_id")
.annotate(
severity_order=Max(severity_case),
pass_count=Count("id", filter=Q(status="PASS", muted=False)),
fail_count=Count("id", filter=Q(status="FAIL", muted=False)),
manual_count=Count("id", filter=Q(status="MANUAL", muted=False)),
pass_count=Count("id", filter=Q(status="PASS")),
fail_count=Count("id", filter=Q(status="FAIL")),
manual_count=Count("id", filter=Q(status="MANUAL")),
pass_muted_count=Count("id", filter=Q(status="PASS", muted=True)),
fail_muted_count=Count("id", filter=Q(status="FAIL", muted=True)),
manual_muted_count=Count("id", filter=Q(status="MANUAL", muted=True)),
@@ -7281,18 +7280,12 @@ class FindingGroupViewSet(BaseRLSViewSet):
# finding-level aggregation path.
row.pop("nonmuted_count", None)
# Muted findings are treated as resolved/accepted, so they do not
# contribute to a failing status. A group is FAIL only when there
# is at least one non-muted FAIL; otherwise any pass (muted or
# not) or any muted fail makes the group PASS. Only groups whose
# findings are exclusively MANUAL fall through to MANUAL.
# Compute aggregated status. Counts are inclusive of muted findings,
# so the underlying check outcome surfaces even when the group is
# fully muted.
if row.get("fail_count", 0) > 0:
row["status"] = "FAIL"
elif (
row.get("pass_count", 0) > 0
or row.get("pass_muted_count", 0) > 0
or row.get("fail_muted_count", 0) > 0
):
elif row.get("pass_count", 0) > 0:
row["status"] = "PASS"
else:
row["status"] = "MANUAL"
@@ -7318,23 +7311,8 @@ class FindingGroupViewSet(BaseRLSViewSet):
"pass_count": "pass_count",
"manual_count": "manual_count",
"muted_count": "muted_count",
"pass_muted_count": "pass_muted_count",
"fail_muted_count": "fail_muted_count",
"manual_muted_count": "manual_muted_count",
"new_count": "new_count",
"new_fail_count": "new_fail_count",
"new_fail_muted_count": "new_fail_muted_count",
"new_pass_count": "new_pass_count",
"new_pass_muted_count": "new_pass_muted_count",
"new_manual_count": "new_manual_count",
"new_manual_muted_count": "new_manual_muted_count",
"changed_count": "changed_count",
"changed_fail_count": "changed_fail_count",
"changed_fail_muted_count": "changed_fail_muted_count",
"changed_pass_count": "changed_pass_count",
"changed_pass_muted_count": "changed_pass_muted_count",
"changed_manual_count": "changed_manual_count",
"changed_manual_muted_count": "changed_manual_muted_count",
"resources_total": "resources_total",
"resources_fail": "resources_fail",
"first_seen_at": "agg_first_seen_at",
@@ -7395,8 +7373,6 @@ class FindingGroupViewSet(BaseRLSViewSet):
aggregated_status=Case(
When(fail_count__gt=0, then=Value("FAIL")),
When(pass_count__gt=0, then=Value("PASS")),
When(pass_muted_count__gt=0, then=Value("PASS")),
When(fail_muted_count__gt=0, then=Value("PASS")),
default=Value("MANUAL"),
output_field=CharField(),
)
@@ -7416,25 +7392,6 @@ class FindingGroupViewSet(BaseRLSViewSet):
return filterset.qs
def _resolve_finding_ids(self, filtered_queryset):
"""
Materialize and request-cache the finding_ids list used to anchor
RFM lookups.
Turning `finding_id__in=Subquery(findings_qs)` into `finding_id__in=
[uuid, ...]` nudges PostgreSQL out of a Merge Semi Join that ends up
reading hundreds of thousands of RFM index entries just to post-
filter tenant_id. Caching on the ViewSet instance (one instance per
request) avoids duplicating the findings round-trip when several
helpers build different RFM querysets from the same filtered set.
"""
cached = getattr(self, "_finding_ids_cache", None)
if cached is not None and cached[0] is filtered_queryset:
return cached[1]
finding_ids = list(filtered_queryset.order_by().values_list("id", flat=True))
self._finding_ids_cache = (filtered_queryset, finding_ids)
return finding_ids
def _build_resource_mapping_queryset(
self, filtered_queryset, resource_ids=None, tenant_id: str | None = None
):
@@ -7444,10 +7401,10 @@ class FindingGroupViewSet(BaseRLSViewSet):
Starting from ResourceFindingMapping avoids scanning all mappings
before applying check_id/date filters on findings.
"""
finding_ids = self._resolve_finding_ids(filtered_queryset)
finding_ids = filtered_queryset.order_by().values("id")
mapping_queryset = ResourceFindingMapping.objects.filter(
finding_id__in=finding_ids
finding_id__in=Subquery(finding_ids)
)
if tenant_id:
mapping_queryset = mapping_queryset.filter(tenant_id=tenant_id)
@@ -7606,53 +7563,6 @@ class FindingGroupViewSet(BaseRLSViewSet):
.order_by(*ordering)
)
def _orphan_findings_queryset(self, filtered_queryset, finding_ids=None):
"""Findings in the filtered set with no ResourceFindingMapping entries."""
orphan_qs = filtered_queryset.filter(
~Exists(ResourceFindingMapping.objects.filter(finding_id=OuterRef("pk")))
)
if finding_ids is not None:
orphan_qs = orphan_qs.filter(id__in=finding_ids)
return orphan_qs
def _has_orphan_findings(self, filtered_queryset) -> bool:
"""Return True if any finding in the filtered set has no resource mapping."""
return self._orphan_findings_queryset(filtered_queryset).exists()
def _orphan_aggregation_values(self, orphan_queryset):
"""Raw rows for orphan findings; resource payload synthesized from metadata.
check_metadata is stored with lowercase keys (see
`prowler.lib.outputs.finding.Finding.get_metadata`) and
`Finding.resource_groups` is already denormalized at ingest time.
"""
return orphan_queryset.annotate(
_provider_type=F("scan__provider__provider"),
_provider_uid=F("scan__provider__uid"),
_provider_alias=F("scan__provider__alias"),
_svc=KeyTextTransform("servicename", "check_metadata"),
_region=KeyTextTransform("region", "check_metadata"),
_rtype=KeyTextTransform("resourcetype", "check_metadata"),
_rgroup=F("resource_groups"),
).values(
"id",
"uid",
"status",
"severity",
"delta",
"muted",
"muted_reason",
"first_seen_at",
"inserted_at",
"_provider_type",
"_provider_uid",
"_provider_alias",
"_svc",
"_region",
"_rtype",
"_rgroup",
)
def _post_process_resources(self, resource_data):
"""Convert resource aggregation rows to API output."""
results = []
@@ -7674,13 +7584,9 @@ class FindingGroupViewSet(BaseRLSViewSet):
else:
delta = None
resource_id = row["resource_id"]
finding_id = str(row["finding_id"]) if row.get("finding_id") else None
results.append(
{
"row_id": resource_id,
"resource_id": resource_id,
"resource_id": row["resource_id"],
"resource_uid": row["resource_uid"],
"resource_name": row["resource_name"],
"resource_service": row["resource_service"],
@@ -7699,46 +7605,9 @@ class FindingGroupViewSet(BaseRLSViewSet):
"muted": bool(row.get("muted", False)),
"muted_reason": row.get("muted_reason"),
"resource_group": row.get("resource_group", ""),
"finding_id": finding_id,
}
)
return results
def _post_process_orphans(self, orphan_rows):
"""Convert orphan finding rows into the same API shape as mapping rows."""
results = []
for row in orphan_rows:
status_val = row["status"]
status = status_val if status_val in ("FAIL", "PASS") else "MANUAL"
muted = bool(row["muted"])
delta_val = row.get("delta")
delta = delta_val if delta_val in ("new", "changed") and not muted else None
finding_id = str(row["id"])
results.append(
{
"row_id": finding_id,
"resource_id": None,
"resource_uid": row["uid"],
"resource_name": row["uid"],
"resource_service": row["_svc"] or "",
"resource_region": row["_region"] or "",
"resource_type": row["_rtype"] or "",
"provider_type": row["_provider_type"],
"provider_uid": row["_provider_uid"],
"provider_alias": row["_provider_alias"],
"status": status,
"severity": row["severity"],
"delta": delta,
"first_seen_at": row["first_seen_at"],
"last_seen_at": row["inserted_at"],
"muted": muted,
"muted_reason": row.get("muted_reason"),
"resource_group": row["_rgroup"] or "",
"finding_id": finding_id,
"finding_id": (
str(row["finding_id"]) if row.get("finding_id") else None
),
}
)
@@ -7799,18 +7668,26 @@ class FindingGroupViewSet(BaseRLSViewSet):
sort_param, self._FINDING_GROUP_SORT_MAP
)
if ordering:
# status_order is annotated on demand so groups can be sorted by
# their aggregated status (FAIL > PASS > MANUAL), mirroring the
# priority used in _post_process_aggregation. Counts are
# inclusive of muted findings, so the underlying check outcome
# surfaces even for fully muted groups.
if any(field.lstrip("-") == "status_order" for field in ordering):
aggregated_queryset = aggregated_queryset.annotate(
status_order=Case(
When(fail_count__gt=0, then=Value(3)),
When(pass_count__gt=0, then=Value(2)),
When(pass_muted_count__gt=0, then=Value(2)),
When(fail_muted_count__gt=0, then=Value(2)),
default=Value(1),
output_field=IntegerField(),
)
)
# delta_order is a virtual sort field: expand it to a
# lexicographic ordering by (new_count, changed_count) so groups
# with more new findings rank higher, with changed_count as the
# tie-breaker (preserves the "new > changed" priority used by
# the resources endpoint, but driven by the actual counters).
expanded_ordering = []
for field in ordering:
if field.lstrip("-") == "delta_order":
@@ -7844,65 +7721,41 @@ class FindingGroupViewSet(BaseRLSViewSet):
def _paginated_resource_response(
self, request, filtered_queryset, resource_ids, tenant_id
):
"""Paginate and return resources, appending orphan findings when present.
"""Paginate and return resources.
Hot path (no orphans, or resource filter applied): resources come from
ResourceFindingMapping aggregation. Untouched pre-existing behaviour.
Orphan fallback: findings without a mapping (e.g. IaC) are appended
after mapping rows as synthesised resource-like rows so they remain
visible in the UI without paying the aggregation cost on the hot path.
Without sort: paginate lightweight resource IDs first, aggregate only the page.
With sort: build a lightweight ordering subquery (resource_id + sort keys),
paginate that, then aggregate full details only for the page.
"""
sort_param = request.query_params.get("sort")
ordering = None
if sort_param:
validated = self._validate_sort_fields(sort_param, self._RESOURCE_SORT_MAP)
ordering = validated if validated else None
ordering = self._validate_sort_fields(sort_param, self._RESOURCE_SORT_MAP)
if ordering:
if "resource_id" not in {field.lstrip("-") for field in ordering}:
ordering.append("resource_id")
# Resource filters can only match findings with resources; skip orphan
# detection entirely when they are present.
if resource_ids is not None:
return self._mapping_paginated_response(
request, filtered_queryset, resource_ids, tenant_id, ordering
)
# Phase 1: lightweight aggregation with only sort keys, paginate
ordering_qs = self._build_resource_ordering_queryset(
filtered_queryset,
resource_ids=resource_ids,
tenant_id=tenant_id,
ordering=ordering,
)
page = self.paginate_queryset(ordering_qs)
if page is not None:
page_ids = [row["resource_id"] for row in page]
resource_data = self._build_resource_aggregation(
filtered_queryset, resource_ids=page_ids, tenant_id=tenant_id
)
# Re-sort to match the page ordering
id_order = {rid: idx for idx, rid in enumerate(page_ids)}
results = self._post_process_resources(resource_data)
results.sort(key=lambda r: id_order.get(r["resource_id"], 0))
serializer = FindingGroupResourceSerializer(results, many=True)
return self.get_paginated_response(serializer.data)
# Serve the mapping response directly and piggyback on the paginator
# count to detect orphan-only groups, instead of paying a separate
# has_mappings.exists() semi-join over ResourceFindingMapping on
# every non-IaC request. TODO: once the ephemeral resources strategy
# is decided, mixed groups should route to _combined_paginated_response.
response = self._mapping_paginated_response(
request, filtered_queryset, resource_ids, tenant_id, ordering
)
page = getattr(self.paginator, "page", None)
mapping_total = page.paginator.count if page is not None else None
if mapping_total == 0:
# Pure orphan group (e.g. IaC): synthesize resource-like rows.
return self._combined_paginated_response(
request, filtered_queryset, tenant_id, ordering
)
return response
def _mapping_paginated_response(
self, request, filtered_queryset, resource_ids, tenant_id, ordering
):
"""Mapping-only paginated response (original fast path)."""
if ordering:
if "resource_id" not in {field.lstrip("-") for field in ordering}:
ordering.append("resource_id")
# Phase 1: lightweight aggregation with only sort keys, paginate
ordering_qs = self._build_resource_ordering_queryset(
filtered_queryset,
resource_ids=resource_ids,
tenant_id=tenant_id,
ordering=ordering,
)
page = self.paginate_queryset(ordering_qs)
if page is not None:
page_ids = [row["resource_id"] for row in page]
page_ids = [row["resource_id"] for row in ordering_qs]
resource_data = self._build_resource_aggregation(
filtered_queryset, resource_ids=page_ids, tenant_id=tenant_id
)
@@ -7910,18 +7763,10 @@ class FindingGroupViewSet(BaseRLSViewSet):
results = self._post_process_resources(resource_data)
results.sort(key=lambda r: id_order.get(r["resource_id"], 0))
serializer = FindingGroupResourceSerializer(results, many=True)
return self.get_paginated_response(serializer.data)
page_ids = [row["resource_id"] for row in ordering_qs]
resource_data = self._build_resource_aggregation(
filtered_queryset, resource_ids=page_ids, tenant_id=tenant_id
)
id_order = {rid: idx for idx, rid in enumerate(page_ids)}
results = self._post_process_resources(resource_data)
results.sort(key=lambda r: id_order.get(r["resource_id"], 0))
serializer = FindingGroupResourceSerializer(results, many=True)
return Response(serializer.data)
return Response(serializer.data)
# No sort (or only empty sort fragments): paginate lightweight resource IDs
# first, aggregate only the page.
mapping_qs = self._build_resource_mapping_queryset(
filtered_queryset, resource_ids=resource_ids, tenant_id=tenant_id
)
@@ -7949,95 +7794,6 @@ class FindingGroupViewSet(BaseRLSViewSet):
serializer = FindingGroupResourceSerializer(results, many=True)
return Response(serializer.data)
def _combined_paginated_response(
self, request, filtered_queryset, tenant_id, ordering
):
"""Mapping rows + orphan findings appended at end.
Orphans sit after mapping rows regardless of sort. This keeps the
mapping-only code path intact for checks that have no orphans (the
common case) and avoids paying UNION/coalesce costs there.
"""
mapping_qs = self._build_resource_mapping_queryset(
filtered_queryset, resource_ids=None, tenant_id=tenant_id
)
mapping_count = mapping_qs.values("resource_id").distinct().count()
orphan_ids = list(
self._orphan_findings_queryset(filtered_queryset)
.order_by("id")
.values_list("id", flat=True)
)
orphan_count = len(orphan_ids)
total = mapping_count + orphan_count
# Paginate a simple [0..total) index sequence so DRF produces proper
# links/meta; then slice mapping / orphan sources accordingly.
page = self.paginate_queryset(range(total))
page_indices = list(page) if page is not None else list(range(total))
mapping_indices = [i for i in page_indices if i < mapping_count]
orphan_positions = [
i - mapping_count for i in page_indices if i >= mapping_count
]
mapping_results = []
if mapping_indices:
start = mapping_indices[0]
stop = mapping_indices[-1] + 1
if ordering:
ordering_fields = list(ordering)
if "resource_id" not in {
field.lstrip("-") for field in ordering_fields
}:
ordering_fields.append("resource_id")
ordered_qs = self._build_resource_ordering_queryset(
filtered_queryset,
resource_ids=None,
tenant_id=tenant_id,
ordering=ordering_fields,
)
slice_rids = [row["resource_id"] for row in ordered_qs[start:stop]]
else:
slice_rids = list(
mapping_qs.values_list("resource_id", flat=True)
.distinct()
.order_by("resource_id")[start:stop]
)
if slice_rids:
resource_data = self._build_resource_aggregation(
filtered_queryset,
resource_ids=slice_rids,
tenant_id=tenant_id,
)
rows_by_rid = {row["resource_id"]: row for row in resource_data}
ordered_rows = [
rows_by_rid[rid] for rid in slice_rids if rid in rows_by_rid
]
mapping_results = self._post_process_resources(ordered_rows)
orphan_results = []
if orphan_positions:
slice_fids = [orphan_ids[pos] for pos in orphan_positions]
raw_rows = list(
self._orphan_aggregation_values(
self._orphan_findings_queryset(
filtered_queryset, finding_ids=slice_fids
)
)
)
rows_by_fid = {row["id"]: row for row in raw_rows}
ordered_rows = [
rows_by_fid[fid] for fid in slice_fids if fid in rows_by_fid
]
orphan_results = self._post_process_orphans(ordered_rows)
results = mapping_results + orphan_results
serializer = FindingGroupResourceSerializer(results, many=True)
if page is not None:
return self.get_paginated_response(serializer.data)
return Response(serializer.data)
def list(self, request, *args, **kwargs):
"""
List finding groups with aggregation and filtering.
@@ -8167,13 +7923,10 @@ class FindingGroupViewSet(BaseRLSViewSet):
tenant_id = request.tenant_id
queryset = self._get_finding_queryset()
# Order by -completed_at (matching the /latest summary path and the
# daily summary upsert keyed on midnight(completed_at)) so that
# overlapping scans do not make /resources and /latest read from
# different scans and report diverging counts.
# Get latest completed scan for each provider
latest_scan_ids = (
Scan.objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
.order_by("provider_id", "-completed_at", "-inserted_at")
.order_by("provider_id", "-inserted_at")
.distinct("provider_id")
.values_list("id", flat=True)
)
+1 -3
View File
@@ -17,10 +17,8 @@ celery_app.config_from_object("django.conf:settings", namespace="CELERY")
celery_app.conf.update(result_extended=True, result_expires=None)
celery_app.conf.broker_transport_options = {
"visibility_timeout": BROKER_VISIBILITY_TIMEOUT,
"queue_order_strategy": "priority",
"visibility_timeout": BROKER_VISIBILITY_TIMEOUT
}
celery_app.conf.task_default_priority = 6
celery_app.conf.result_backend_transport_options = {
"visibility_timeout": BROKER_VISIBILITY_TIMEOUT
}
+1 -1
View File
@@ -15,7 +15,7 @@ from config.django.production import LOGGING as DJANGO_LOGGERS, DEBUG # noqa: E
from config.custom_logging import BackendLogger # noqa: E402
BIND_ADDRESS = env("DJANGO_BIND_ADDRESS", default="127.0.0.1")
PORT = env("DJANGO_PORT", default=8080)
PORT = env("DJANGO_PORT", default=8000)
# Server settings
bind = f"{BIND_ADDRESS}:{PORT}"
+10 -59
View File
@@ -1,8 +1,6 @@
# Portions of this file are based on code from the Cartography project
# (https://github.com/cartography-cncf/cartography), which is licensed under the Apache 2.0 License.
import time
from typing import Any
import aioboto3
@@ -35,7 +33,7 @@ def start_aws_ingestion(
For the scan progress updates:
- The caller of this function (`tasks.jobs.attack_paths.scan.run`) has set it to 2.
- When the control returns to the caller, it will be set to 93.
- When the control returns to the caller, it will be set to 95.
"""
# Initialize variables common to all jobs
@@ -91,50 +89,34 @@ def start_aws_ingestion(
logger.info(
f"Syncing function permission_relationships for AWS account {prowler_api_provider.uid}"
)
t0 = time.perf_counter()
cartography_aws.RESOURCE_FUNCTIONS["permission_relationships"](**sync_args)
logger.info(
f"Synced function permission_relationships for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 88)
if "resourcegroupstaggingapi" in requested_syncs:
logger.info(
f"Syncing function resourcegroupstaggingapi for AWS account {prowler_api_provider.uid}"
)
t0 = time.perf_counter()
cartography_aws.RESOURCE_FUNCTIONS["resourcegroupstaggingapi"](**sync_args)
logger.info(
f"Synced function resourcegroupstaggingapi for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 89)
logger.info(
f"Syncing ec2_iaminstanceprofile scoped analysis for AWS account {prowler_api_provider.uid}"
)
t0 = time.perf_counter()
cartography_aws.run_scoped_analysis_job(
"aws_ec2_iaminstanceprofile.json",
neo4j_session,
common_job_parameters,
)
logger.info(
f"Synced ec2_iaminstanceprofile scoped analysis for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 90)
logger.info(
f"Syncing lambda_ecr analysis for AWS account {prowler_api_provider.uid}"
)
t0 = time.perf_counter()
cartography_aws.run_analysis_job(
"aws_lambda_ecr.json",
neo4j_session,
common_job_parameters,
)
logger.info(
f"Synced lambda_ecr analysis for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
if all(
s in requested_syncs
@@ -143,34 +125,25 @@ def start_aws_ingestion(
logger.info(
f"Syncing lb_container_exposure scoped analysis for AWS account {prowler_api_provider.uid}"
)
t0 = time.perf_counter()
cartography_aws.run_scoped_analysis_job(
"aws_lb_container_exposure.json",
neo4j_session,
common_job_parameters,
)
logger.info(
f"Synced lb_container_exposure scoped analysis for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
if all(s in requested_syncs for s in ["ec2:network_acls", "ec2:load_balancer_v2"]):
logger.info(
f"Syncing lb_nacl_direct scoped analysis for AWS account {prowler_api_provider.uid}"
)
t0 = time.perf_counter()
cartography_aws.run_scoped_analysis_job(
"aws_lb_nacl_direct.json",
neo4j_session,
common_job_parameters,
)
logger.info(
f"Synced lb_nacl_direct scoped analysis for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 91)
logger.info(f"Syncing metadata for AWS account {prowler_api_provider.uid}")
t0 = time.perf_counter()
cartography_aws.merge_module_sync_metadata(
neo4j_session,
group_type="AWSAccount",
@@ -179,23 +152,24 @@ def start_aws_ingestion(
update_tag=cartography_config.update_tag,
stat_handler=cartography_aws.stat_handler,
)
logger.info(
f"Synced metadata for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 92)
# Removing the added extra field
del common_job_parameters["AWS_ID"]
logger.info(f"Syncing cleanup_job for AWS account {prowler_api_provider.uid}")
cartography_aws.run_cleanup_job(
"aws_post_ingestion_principals_cleanup.json",
neo4j_session,
common_job_parameters,
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 93)
logger.info(f"Syncing analysis for AWS account {prowler_api_provider.uid}")
t0 = time.perf_counter()
cartography_aws._perform_aws_analysis(
requested_syncs, neo4j_session, common_job_parameters
)
logger.info(
f"Synced analysis for AWS account {prowler_api_provider.uid} in {time.perf_counter() - t0:.3f}s"
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 93)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 94)
return failed_syncs
@@ -260,8 +234,6 @@ def sync_aws_account(
)
try:
func_t0 = time.perf_counter()
# `ecr:image_layers` uses `aioboto3_session` instead of `boto3_session`
if func_name == "ecr:image_layers":
cartography_aws.RESOURCE_FUNCTIONS[func_name](
@@ -285,15 +257,7 @@ def sync_aws_account(
else:
cartography_aws.RESOURCE_FUNCTIONS[func_name](**sync_args)
logger.info(
f"Synced function {func_name} for AWS account {prowler_api_provider.uid} in {time.perf_counter() - func_t0:.3f}s"
)
except Exception as e:
logger.info(
f"Synced function {func_name} for AWS account {prowler_api_provider.uid} in {time.perf_counter() - func_t0:.3f}s (FAILED)"
)
exception_message = utils.stringify_exception(
e, f"Exception for AWS sync function: {func_name}"
)
@@ -313,16 +277,3 @@ def sync_aws_account(
)
return failed_syncs
def extract_short_uid(uid: str) -> str:
"""Return the short identifier from an AWS ARN or resource ID.
Supported inputs end in one of:
- `<type>/<id>` (e.g. `instance/i-xxx`)
- `<type>:<id>` (e.g. `function:name`)
- `<id>` (e.g. `bucket-name` or `i-xxx`)
If `uid` is already a short resource ID, it is returned unchanged.
"""
return uid.rsplit("/", 1)[-1].rsplit(":", 1)[-1]
@@ -8,9 +8,9 @@ from tasks.jobs.attack_paths import aws
# Batch size for Neo4j write operations (resource labeling, cleanup)
BATCH_SIZE = env.int("ATTACK_PATHS_BATCH_SIZE", 1000)
# Batch size for Postgres findings fetch (keyset pagination page size)
FINDINGS_BATCH_SIZE = env.int("ATTACK_PATHS_FINDINGS_BATCH_SIZE", 1000)
FINDINGS_BATCH_SIZE = env.int("ATTACK_PATHS_FINDINGS_BATCH_SIZE", 500)
# Batch size for temp-to-tenant graph sync (nodes and relationships per cursor page)
SYNC_BATCH_SIZE = env.int("ATTACK_PATHS_SYNC_BATCH_SIZE", 1000)
SYNC_BATCH_SIZE = env.int("ATTACK_PATHS_SYNC_BATCH_SIZE", 250)
# Neo4j internal labels (Prowler-specific, not provider-specific)
# - `Internet`: Singleton node representing external internet access for exposed-resource queries
@@ -37,8 +37,6 @@ class ProviderConfig:
# Label for resources connected to the account node, enabling indexed finding lookups.
resource_label: str # e.g., "_AWSResource"
ingestion_function: Callable
# Maps a Postgres resource UID (e.g. full ARN) to the short-id form Cartography stores on some node types (e.g. `i-xxx` for EC2Instance).
short_uid_extractor: Callable[[str], str]
# Provider Configurations
@@ -50,7 +48,6 @@ AWS_CONFIG = ProviderConfig(
uid_field="arn",
resource_label="_AWSResource",
ingestion_function=aws.start_aws_ingestion,
short_uid_extractor=aws.extract_short_uid,
)
PROVIDER_CONFIGS: dict[str, ProviderConfig] = {
@@ -119,21 +116,6 @@ def get_provider_resource_label(provider_type: str) -> str:
return config.resource_label if config else "_UnknownProviderResource"
def _identity_short_uid(uid: str) -> str:
"""Fallback short-uid extractor for providers without a custom mapping."""
return uid
def get_short_uid_extractor(provider_type: str) -> Callable[[str], str]:
"""Get the short-uid extractor for a provider type.
Returns an identity function when the provider is unknown, so callers can
rely on a callable always being returned.
"""
config = PROVIDER_CONFIGS.get(provider_type)
return config.short_uid_extractor if config else _identity_short_uid
# Dynamic Isolation Label Helpers
# --------------------------------
@@ -5,14 +5,14 @@ This module handles:
- Adding resource labels to Cartography nodes for efficient lookups
- Loading Prowler findings into the graph
- Linking findings to resources
- Cleaning up stale findings
"""
from collections import defaultdict
from typing import Any, Callable, Generator
from typing import Any, Generator
from uuid import UUID
import neo4j
from cartography.config import Config as CartographyConfig
from celery.utils.log import get_task_logger
from tasks.jobs.attack_paths.config import (
@@ -21,10 +21,10 @@ from tasks.jobs.attack_paths.config import (
get_node_uid_field,
get_provider_resource_label,
get_root_node_label,
get_short_uid_extractor,
)
from tasks.jobs.attack_paths.queries import (
ADD_RESOURCE_LABEL_TEMPLATE,
CLEANUP_FINDINGS_TEMPLATE,
INSERT_FINDING_TEMPLATE,
render_cypher_template,
)
@@ -58,9 +58,7 @@ _DB_QUERY_FIELDS = [
]
def _to_neo4j_dict(
record: dict[str, Any], resource_uid: str, resource_short_uid: str
) -> dict[str, Any]:
def _to_neo4j_dict(record: dict[str, Any], resource_uid: str) -> dict[str, Any]:
"""Transform a Django `.values()` record into a `dict` ready for Neo4j ingestion."""
return {
"id": str(record["id"]),
@@ -78,7 +76,6 @@ def _to_neo4j_dict(
"muted": record["muted"],
"muted_reason": record["muted_reason"],
"resource_uid": resource_uid,
"resource_short_uid": resource_short_uid,
}
@@ -91,21 +88,18 @@ def analysis(
prowler_api_provider: Provider,
scan_id: str,
config: CartographyConfig,
) -> tuple[int, int]:
) -> None:
"""
Main entry point for Prowler findings analysis.
Adds resource labels and loads findings.
Returns (labeled_nodes, findings_loaded).
Adds resource labels, loads findings, and cleans up stale data.
"""
total_labeled = add_resource_label(
add_resource_label(
neo4j_session, prowler_api_provider.provider, str(prowler_api_provider.uid)
)
findings_data = stream_findings_with_resources(prowler_api_provider, scan_id)
total_loaded = load_findings(
neo4j_session, findings_data, prowler_api_provider, config
)
return total_labeled, total_loaded
load_findings(neo4j_session, findings_data, prowler_api_provider, config)
cleanup_findings(neo4j_session, prowler_api_provider, config)
def add_resource_label(
@@ -155,11 +149,12 @@ def load_findings(
findings_batches: Generator[list[dict[str, Any]], None, None],
prowler_api_provider: Provider,
config: CartographyConfig,
) -> int:
) -> None:
"""Load Prowler findings into the graph, linking them to resources."""
query = render_cypher_template(
INSERT_FINDING_TEMPLATE,
{
"__ROOT_NODE_LABEL__": get_root_node_label(prowler_api_provider.provider),
"__NODE_UID_FIELD__": get_node_uid_field(prowler_api_provider.provider),
"__RESOURCE_LABEL__": get_provider_resource_label(
prowler_api_provider.provider
@@ -168,14 +163,13 @@ def load_findings(
)
parameters = {
"provider_uid": str(prowler_api_provider.uid),
"last_updated": config.update_tag,
"prowler_version": ProwlerConfig.prowler_version,
}
batch_num = 0
total_records = 0
edges_merged = 0
edges_dropped = 0
for batch in findings_batches:
batch_num += 1
batch_size = len(batch)
@@ -184,16 +178,31 @@ def load_findings(
parameters["findings_data"] = batch
logger.info(f"Loading findings batch {batch_num} ({batch_size} records)")
summary = neo4j_session.run(query, parameters).single()
if summary is not None:
edges_merged += summary.get("merged_count", 0)
edges_dropped += summary.get("dropped_count", 0)
neo4j_session.run(query, parameters)
logger.info(
f"Finished loading {total_records} records in {batch_num} batches "
f"(edges_merged={edges_merged}, edges_dropped={edges_dropped})"
)
return total_records
logger.info(f"Finished loading {total_records} records in {batch_num} batches")
def cleanup_findings(
neo4j_session: neo4j.Session,
prowler_api_provider: Provider,
config: CartographyConfig,
) -> None:
"""Remove stale findings (classic Cartography behaviour)."""
parameters = {
"last_updated": config.update_tag,
"batch_size": BATCH_SIZE,
}
batch = 1
deleted_count = 1
while deleted_count > 0:
logger.info(f"Cleaning findings batch {batch}")
result = neo4j_session.run(CLEANUP_FINDINGS_TEMPLATE, parameters)
deleted_count = result.single().get("deleted_findings_count", 0)
batch += 1
# Findings Streaming (Generator-based)
@@ -217,9 +226,8 @@ def stream_findings_with_resources(
)
tenant_id = prowler_api_provider.tenant_id
short_uid_extractor = get_short_uid_extractor(prowler_api_provider.provider)
for batch in _paginate_findings(tenant_id, scan_id):
enriched = _enrich_batch_with_resources(batch, tenant_id, short_uid_extractor)
enriched = _enrich_batch_with_resources(batch, tenant_id)
if enriched:
yield enriched
@@ -265,9 +273,7 @@ def _fetch_findings_batch(
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
# Use `all_objects` to get `Findings` even on soft-deleted `Providers`
# But even the provider is already validated as active in this context
qs = FindingModel.all_objects.filter(
tenant_id=tenant_id, scan_id=scan_id
).order_by("id")
qs = FindingModel.all_objects.filter(scan_id=scan_id).order_by("id")
if after_id is not None:
qs = qs.filter(id__gt=after_id)
@@ -282,7 +288,6 @@ def _fetch_findings_batch(
def _enrich_batch_with_resources(
findings_batch: list[dict[str, Any]],
tenant_id: str,
short_uid_extractor: Callable[[str], str],
) -> list[dict[str, Any]]:
"""
Enrich findings with their resource UIDs.
@@ -294,7 +299,7 @@ def _enrich_batch_with_resources(
resource_map = _build_finding_resource_map(finding_ids, tenant_id)
return [
_to_neo4j_dict(finding, resource_uid, short_uid_extractor(resource_uid))
_to_neo4j_dict(finding, resource_uid)
for finding in findings_batch
for resource_uid in resource_map.get(finding["id"], [])
]
@@ -13,13 +13,14 @@ from tasks.jobs.attack_paths.config import (
logger = get_task_logger(__name__)
# Indexes for Prowler Findings and resource lookups
# Indexes for Prowler findings and resource lookups
FINDINGS_INDEX_STATEMENTS = [
# Resource indexes for Prowler Finding lookups
"CREATE INDEX aws_resource_arn IF NOT EXISTS FOR (n:_AWSResource) ON (n.arn);",
"CREATE INDEX aws_resource_id IF NOT EXISTS FOR (n:_AWSResource) ON (n.id);",
# Prowler Finding indexes
f"CREATE INDEX prowler_finding_id IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.id);",
f"CREATE INDEX prowler_finding_lastupdated IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.lastupdated);",
f"CREATE INDEX prowler_finding_status IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.status);",
# Internet node index for MERGE lookups
f"CREATE INDEX internet_id IF NOT EXISTS FOR (n:{INTERNET_NODE_LABEL}) ON (n.id);",
@@ -32,59 +32,63 @@ ADD_RESOURCE_LABEL_TEMPLATE = """
"""
INSERT_FINDING_TEMPLATE = f"""
MATCH (account:__ROOT_NODE_LABEL__ {{id: $provider_uid}})
UNWIND $findings_data AS finding_data
OPTIONAL MATCH (resource_by_uid:__RESOURCE_LABEL__ {{__NODE_UID_FIELD__: finding_data.resource_uid}})
OPTIONAL MATCH (resource_by_id:__RESOURCE_LABEL__ {{id: finding_data.resource_uid}})
OPTIONAL MATCH (account)-->(resource_by_uid:__RESOURCE_LABEL__)
WHERE resource_by_uid.__NODE_UID_FIELD__ = finding_data.resource_uid
WITH account, finding_data, resource_by_uid
OPTIONAL MATCH (account)-->(resource_by_id:__RESOURCE_LABEL__)
WHERE resource_by_uid IS NULL
OPTIONAL MATCH (resource_by_short:__RESOURCE_LABEL__ {{id: finding_data.resource_short_uid}})
WHERE resource_by_uid IS NULL AND resource_by_id IS NULL
WITH finding_data,
resource_by_uid,
resource_by_id,
head(collect(resource_by_short)) AS resource_by_short
WITH finding_data,
COALESCE(resource_by_uid, resource_by_id, resource_by_short) AS resource
AND resource_by_id.id = finding_data.resource_uid
WITH account, finding_data, COALESCE(resource_by_uid, resource_by_id) AS resource
WHERE resource IS NOT NULL
FOREACH (_ IN CASE WHEN resource IS NOT NULL THEN [1] ELSE [] END |
MERGE (finding:{PROWLER_FINDING_LABEL} {{id: finding_data.id}})
ON CREATE SET
finding.id = finding_data.id,
finding.uid = finding_data.uid,
finding.inserted_at = finding_data.inserted_at,
finding.updated_at = finding_data.updated_at,
finding.first_seen_at = finding_data.first_seen_at,
finding.scan_id = finding_data.scan_id,
finding.delta = finding_data.delta,
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.severity = finding_data.severity,
finding.check_id = finding_data.check_id,
finding.check_title = finding_data.check_title,
finding.muted = finding_data.muted,
finding.muted_reason = finding_data.muted_reason,
finding.firstseen = timestamp(),
finding.lastupdated = $last_updated,
finding._module_name = 'cartography:prowler',
finding._module_version = $prowler_version
ON MATCH SET
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.lastupdated = $last_updated
MERGE (resource)-[rel:HAS_FINDING]->(finding)
ON CREATE SET
rel.firstseen = timestamp(),
rel.lastupdated = $last_updated,
rel._module_name = 'cartography:prowler',
rel._module_version = $prowler_version
ON MATCH SET
rel.lastupdated = $last_updated
)
MERGE (finding:{PROWLER_FINDING_LABEL} {{id: finding_data.id}})
ON CREATE SET
finding.id = finding_data.id,
finding.uid = finding_data.uid,
finding.inserted_at = finding_data.inserted_at,
finding.updated_at = finding_data.updated_at,
finding.first_seen_at = finding_data.first_seen_at,
finding.scan_id = finding_data.scan_id,
finding.delta = finding_data.delta,
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.severity = finding_data.severity,
finding.check_id = finding_data.check_id,
finding.check_title = finding_data.check_title,
finding.muted = finding_data.muted,
finding.muted_reason = finding_data.muted_reason,
finding.firstseen = timestamp(),
finding.lastupdated = $last_updated,
finding._module_name = 'cartography:prowler',
finding._module_version = $prowler_version
ON MATCH SET
finding.status = finding_data.status,
finding.status_extended = finding_data.status_extended,
finding.lastupdated = $last_updated
WITH sum(CASE WHEN resource IS NOT NULL THEN 1 ELSE 0 END) AS merged_count,
sum(CASE WHEN resource IS NULL THEN 1 ELSE 0 END) AS dropped_count
MERGE (resource)-[rel:HAS_FINDING]->(finding)
ON CREATE SET
rel.firstseen = timestamp(),
rel.lastupdated = $last_updated,
rel._module_name = 'cartography:prowler',
rel._module_version = $prowler_version
ON MATCH SET
rel.lastupdated = $last_updated
"""
RETURN merged_count, dropped_count
CLEANUP_FINDINGS_TEMPLATE = f"""
MATCH (finding:{PROWLER_FINDING_LABEL})
WHERE finding.lastupdated < $last_updated
WITH finding LIMIT $batch_size
DETACH DELETE finding
RETURN COUNT(finding) AS deleted_findings_count
"""
# Internet queries (used by internet.py)
+10 -38
View File
@@ -55,7 +55,6 @@ exception propagates to Celery.
import logging
import time
from typing import Any
from cartography.config import Config as CartographyConfig
@@ -145,12 +144,6 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
attack_paths_scan, task_id, tenant_cartography_config
)
scan_t0 = time.perf_counter()
logger.info(
f"Starting Attack Paths scan ({attack_paths_scan.id}) for "
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
subgraph_dropped = False
sync_completed = False
provider_gated = False
@@ -176,7 +169,6 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 2)
# The real scan, where iterates over cloud services
t0 = time.perf_counter()
ingestion_exceptions = utils.call_within_event_loop(
cartography_ingestion_function,
tmp_neo4j_session,
@@ -185,23 +177,19 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
prowler_sdk_provider,
attack_paths_scan,
)
logger.info(
f"Cartography ingestion completed in {time.perf_counter() - t0:.3f}s "
f"(failed_syncs={len(ingestion_exceptions)})"
)
# Post-processing: Just keeping it to be more Cartography compliant
logger.info(
f"Syncing Cartography ontology for AWS account {prowler_api_provider.uid}"
)
cartography_ontology.run(tmp_neo4j_session, tmp_cartography_config)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 94)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 95)
logger.info(
f"Syncing Cartography analysis for AWS account {prowler_api_provider.uid}"
)
cartography_analysis.run(tmp_neo4j_session, tmp_cartography_config)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 95)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 96)
# Creating Internet node and CAN_ACCESS relationships
logger.info(
@@ -210,20 +198,14 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
internet.analysis(
tmp_neo4j_session, prowler_api_provider, tmp_cartography_config
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 96)
# Adding Prowler Finding nodes and relationships
logger.info(
f"Syncing Prowler analysis for AWS account {prowler_api_provider.uid}"
)
t0 = time.perf_counter()
labeled_nodes, findings_loaded = findings.analysis(
findings.analysis(
tmp_neo4j_session, prowler_api_provider, scan_id, tmp_cartography_config
)
logger.info(
f"Prowler analysis completed in {time.perf_counter() - t0:.3f}s "
f"(findings={findings_loaded}, labeled_nodes={labeled_nodes})"
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 97)
logger.info(
@@ -245,33 +227,22 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
logger.info(f"Deleting existing provider graph in {tenant_database_name}")
db_utils.set_provider_graph_data_ready(attack_paths_scan, False)
provider_gated = True
t0 = time.perf_counter()
deleted_nodes = graph_database.drop_subgraph(
graph_database.drop_subgraph(
database=tenant_database_name,
provider_id=str(prowler_api_provider.id),
)
logger.info(
f"Deleted existing provider graph in {time.perf_counter() - t0:.3f}s "
f"(deleted_nodes={deleted_nodes})"
)
subgraph_dropped = True
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 98)
logger.info(
f"Syncing graph from {tmp_database_name} into {tenant_database_name}"
)
t0 = time.perf_counter()
sync_result = sync.sync_graph(
sync.sync_graph(
source_database=tmp_database_name,
target_database=tenant_database_name,
tenant_id=str(prowler_api_provider.tenant_id),
provider_id=str(prowler_api_provider.id),
)
logger.info(
f"Synced graph in {time.perf_counter() - t0:.3f}s "
f"(nodes={sync_result['nodes']}, relationships={sync_result['relationships']})"
)
sync_completed = True
db_utils.set_graph_data_ready(attack_paths_scan, True)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 99)
@@ -279,16 +250,17 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
logger.info(f"Clearing Neo4j cache for database {tenant_database_name}")
graph_database.clear_cache(tenant_database_name)
logger.info(
f"Completed Cartography ({attack_paths_scan.id}) for "
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
logger.info(f"Dropping temporary Neo4j database {tmp_database_name}")
graph_database.drop_database(tmp_database_name)
db_utils.finish_attack_paths_scan(
attack_paths_scan, StateChoices.COMPLETED, ingestion_exceptions
)
logger.info(
f"Attack Paths scan completed in {time.perf_counter() - scan_t0:.3f}s "
f"(state=completed, failed_syncs={len(ingestion_exceptions)})"
)
return ingestion_exceptions
except Exception as e:
@@ -5,8 +5,6 @@ This module handles syncing graph data from temporary scan databases
to the tenant database, adding provider isolation labels and properties.
"""
import time
from collections import defaultdict
from typing import Any
@@ -83,7 +81,6 @@ def sync_nodes(
Source and target sessions are opened sequentially per batch to avoid
holding two Bolt connections simultaneously for the entire sync duration.
"""
t0 = time.perf_counter()
last_id = -1
total_synced = 0
@@ -120,7 +117,7 @@ def sync_nodes(
total_synced += batch_count
logger.info(
f"Synced {total_synced} nodes from {source_database} to {target_database} in {time.perf_counter() - t0:.3f}s"
f"Synced {total_synced} nodes from {source_database} to {target_database}"
)
return total_synced
@@ -139,7 +136,6 @@ def sync_relationships(
Source and target sessions are opened sequentially per batch to avoid
holding two Bolt connections simultaneously for the entire sync duration.
"""
t0 = time.perf_counter()
last_id = -1
total_synced = 0
@@ -170,7 +166,7 @@ def sync_relationships(
total_synced += batch_count
logger.info(
f"Synced {total_synced} relationships from {source_database} to {target_database} in {time.perf_counter() - t0:.3f}s"
f"Synced {total_synced} relationships from {source_database} to {target_database}"
)
return total_synced
+9 -18
View File
@@ -752,19 +752,11 @@ def _process_finding_micro_batch(
)
if mappings_to_create:
created_mappings = ResourceFindingMapping.objects.bulk_create(
ResourceFindingMapping.objects.bulk_create(
mappings_to_create,
batch_size=SCAN_DB_BATCH_SIZE,
ignore_conflicts=True,
unique_fields=["tenant_id", "resource_id", "finding_id"],
)
inserted = sum(1 for m in created_mappings if m.pk)
if inserted != len(mappings_to_create):
logger.error(
f"scan {scan_instance.id}: expected "
f"{len(mappings_to_create)} ResourceFindingMapping rows, "
f"inserted {inserted}. Rolling back micro-batch."
)
# Update finding denormalized arrays
findings_to_update = []
@@ -1198,9 +1190,6 @@ def aggregate_findings(tenant_id: str, scan_id: str):
)
for agg in aggregation
}
# Delete first so re-runs (e.g. post-mute reaggregation) don't hit
# the `unique_scan_summary` constraint.
ScanSummary.objects.filter(tenant_id=tenant_id, scan_id=scan_id).delete()
ScanSummary.objects.bulk_create(scan_aggregations, batch_size=3000)
@@ -1815,9 +1804,11 @@ def aggregate_finding_group_summaries(tenant_id: str, scan_id: str):
)
# Aggregate findings by check_id for this scan.
# `pass_count`, `fail_count` and `manual_count` only count non-muted
# findings. Muted findings are tracked separately via the
# `*_muted_count` fields.
# `pass_count`, `fail_count` and `manual_count` count *every* finding
# in this group, regardless of mute state, so the aggregated `status`
# always reflects the underlying check outcome (FAIL > PASS > MANUAL)
# even when the group is fully muted. The orthogonal `muted` flag is
# what tells whether the group has any actionable (non-muted) findings.
aggregated = (
Finding.objects.filter(
tenant_id=tenant_id,
@@ -1826,9 +1817,9 @@ def aggregate_finding_group_summaries(tenant_id: str, scan_id: str):
.values("check_id")
.annotate(
severity_order=Max(severity_case),
pass_count=Count("id", filter=Q(status="PASS", muted=False)),
fail_count=Count("id", filter=Q(status="FAIL", muted=False)),
manual_count=Count("id", filter=Q(status="MANUAL", muted=False)),
pass_count=Count("id", filter=Q(status="PASS")),
fail_count=Count("id", filter=Q(status="FAIL")),
manual_count=Count("id", filter=Q(status="MANUAL")),
pass_muted_count=Count("id", filter=Q(status="PASS", muted=True)),
fail_muted_count=Count("id", filter=Q(status="FAIL", muted=True)),
manual_muted_count=Count("id", filter=Q(status="MANUAL", muted=True)),
+9 -27
View File
@@ -771,22 +771,15 @@ def aggregate_finding_group_summaries_task(tenant_id: str, scan_id: str):
)
@set_tenant(keep_tenant=True)
def reaggregate_all_finding_group_summaries_task(tenant_id: str):
"""Reaggregate every pre-aggregated summary table for this tenant.
"""Reaggregate finding group summaries for every (provider, day) combination.
Mirrors the unbounded scope of `mute_historical_findings_task`: that task
rewrites every Finding row whose UID matches a mute rule, with no time
limit. To keep the pre-aggregated tables consistent with that update,
this task re-runs the same per-scan aggregation pipeline that scan
completion runs on the latest completed scan of every (provider, day)
pair, rebuilding the three tables that power the read endpoints:
- `ScanSummary` and `DailySeveritySummary` -> `/overviews/findings`,
`/overviews/findings-severity`, `/overviews/services`.
- `FindingGroupDailySummary` -> `/finding-groups` and
`/finding-groups/latest`.
Per-scan pipelines are dispatched in parallel via a Celery group so
wallclock scales with the worker pool.
limit. To keep the daily summaries consistent with that update, this task
re-runs the aggregator on the latest completed scan of every (provider,
day) pair that exists in the database. Tasks are dispatched in parallel
via a Celery group so the wallclock scales with the worker pool, not with
the number of pairs.
"""
completed_scans = list(
Scan.objects.filter(
@@ -811,23 +804,12 @@ def reaggregate_all_finding_group_summaries_task(tenant_id: str):
scan_ids = list(latest_scans.values())
if scan_ids:
logger.info(
"Reaggregating overview/finding summaries for %d scans (provider x day)",
"Reaggregating finding group summaries for %d scans (provider x day)",
len(scan_ids),
)
# DailySeveritySummary reads from ScanSummary, so ScanSummary must be
# recomputed first; FindingGroupDailySummary reads from Finding
# directly and can run in parallel with the severity step.
group(
chain(
perform_scan_summary_task.si(tenant_id=tenant_id, scan_id=scan_id),
group(
aggregate_daily_severity_task.si(
tenant_id=tenant_id, scan_id=scan_id
),
aggregate_finding_group_summaries_task.si(
tenant_id=tenant_id, scan_id=scan_id
),
),
aggregate_finding_group_summaries_task.si(
tenant_id=tenant_id, scan_id=scan_id
)
for scan_id in scan_ids
).apply_async()
@@ -38,14 +38,11 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch(
"tasks.jobs.attack_paths.scan.sync.sync_graph",
return_value={"nodes": 0, "relationships": 0},
)
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_subgraph", return_value=0)
@patch("tasks.jobs.attack_paths.scan.sync.sync_graph")
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_subgraph")
@patch("tasks.jobs.attack_paths.scan.indexes.create_sync_indexes")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_ontology.run")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -191,7 +188,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -290,7 +287,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -393,7 +390,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -492,17 +489,14 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch(
"tasks.jobs.attack_paths.scan.sync.sync_graph",
return_value={"nodes": 0, "relationships": 0},
)
@patch("tasks.jobs.attack_paths.scan.sync.sync_graph")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.drop_subgraph",
side_effect=RuntimeError("drop failed"),
)
@patch("tasks.jobs.attack_paths.scan.indexes.create_sync_indexes")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_ontology.run")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -615,7 +609,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_subgraph")
@patch("tasks.jobs.attack_paths.scan.indexes.create_sync_indexes")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_ontology.run")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -724,14 +718,11 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch(
"tasks.jobs.attack_paths.scan.sync.sync_graph",
return_value={"nodes": 0, "relationships": 0},
)
@patch("tasks.jobs.attack_paths.scan.sync.sync_graph")
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_subgraph")
@patch("tasks.jobs.attack_paths.scan.indexes.create_sync_indexes")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_ontology.run")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -842,17 +833,14 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.update_attack_paths_scan_progress")
@patch("tasks.jobs.attack_paths.scan.db_utils.starting_attack_paths_scan")
@patch(
"tasks.jobs.attack_paths.scan.sync.sync_graph",
return_value={"nodes": 0, "relationships": 0},
)
@patch("tasks.jobs.attack_paths.scan.sync.sync_graph")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.drop_subgraph",
side_effect=RuntimeError("drop failed"),
)
@patch("tasks.jobs.attack_paths.scan.indexes.create_sync_indexes")
@patch("tasks.jobs.attack_paths.scan.internet.analysis")
@patch("tasks.jobs.attack_paths.scan.findings.analysis", return_value=(0, 0))
@patch("tasks.jobs.attack_paths.scan.findings.analysis")
@patch("tasks.jobs.attack_paths.scan.indexes.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_ontology.run")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@@ -1285,13 +1273,11 @@ class TestAttackPathsFindingsHelpers:
config = SimpleNamespace(update_tag=12345)
mock_session = MagicMock()
first_result = MagicMock()
first_result.single.return_value = {"merged_count": 1, "dropped_count": 0}
second_result = MagicMock()
second_result.single.return_value = {"merged_count": 0, "dropped_count": 1}
mock_session.run.side_effect = [first_result, second_result]
with (
patch(
"tasks.jobs.attack_paths.findings.get_root_node_label",
return_value="AWSAccount",
),
patch(
"tasks.jobs.attack_paths.findings.get_node_uid_field",
return_value="arn",
@@ -1300,7 +1286,6 @@ class TestAttackPathsFindingsHelpers:
"tasks.jobs.attack_paths.findings.get_provider_resource_label",
return_value="_AWSResource",
),
patch("tasks.jobs.attack_paths.findings.logger") as mock_logger,
):
findings_module.load_findings(
mock_session, findings_generator(), provider, config
@@ -1309,16 +1294,26 @@ class TestAttackPathsFindingsHelpers:
assert mock_session.run.call_count == 2
for call_args in mock_session.run.call_args_list:
params = call_args.args[1]
assert params["provider_uid"] == str(provider.uid)
assert params["last_updated"] == config.update_tag
assert "findings_data" in params
summary_log = next(
call_args.args[0]
for call_args in mock_logger.info.call_args_list
if call_args.args and "Finished loading" in call_args.args[0]
)
assert "edges_merged=1" in summary_log
assert "edges_dropped=1" in summary_log
def test_cleanup_findings_runs_batches(self, providers_fixture):
provider = providers_fixture[0]
config = SimpleNamespace(update_tag=1024)
mock_session = MagicMock()
first_batch = MagicMock()
first_batch.single.return_value = {"deleted_findings_count": 3}
second_batch = MagicMock()
second_batch.single.return_value = {"deleted_findings_count": 0}
mock_session.run.side_effect = [first_batch, second_batch]
findings_module.cleanup_findings(mock_session, provider, config)
assert mock_session.run.call_count == 2
params = mock_session.run.call_args.args[1]
assert params["last_updated"] == config.update_tag
def test_stream_findings_with_resources_returns_latest_scan_data(
self,
@@ -1499,12 +1494,11 @@ class TestAttackPathsFindingsHelpers:
"default",
):
result = findings_module._enrich_batch_with_resources(
[finding_dict], str(tenant.id), lambda uid: f"short:{uid}"
[finding_dict], str(tenant.id)
)
assert len(result) == 1
assert result[0]["resource_uid"] == resource.uid
assert result[0]["resource_short_uid"] == f"short:{resource.uid}"
assert result[0]["id"] == str(finding.id)
assert result[0]["status"] == "FAIL"
@@ -1588,7 +1582,7 @@ class TestAttackPathsFindingsHelpers:
"default",
):
result = findings_module._enrich_batch_with_resources(
[finding_dict], str(tenant.id), lambda uid: uid
[finding_dict], str(tenant.id)
)
assert len(result) == 3
@@ -1662,7 +1656,7 @@ class TestAttackPathsFindingsHelpers:
patch("tasks.jobs.attack_paths.findings.logger") as mock_logger,
):
result = findings_module._enrich_batch_with_resources(
[finding_dict], str(tenant.id), lambda uid: uid
[finding_dict], str(tenant.id)
)
assert len(result) == 0
@@ -1696,6 +1690,10 @@ class TestAttackPathsFindingsHelpers:
yield # Make it a generator
with (
patch(
"tasks.jobs.attack_paths.findings.get_root_node_label",
return_value="AWSAccount",
),
patch(
"tasks.jobs.attack_paths.findings.get_node_uid_field",
return_value="arn",
@@ -1709,63 +1707,6 @@ class TestAttackPathsFindingsHelpers:
mock_session.run.assert_not_called()
@pytest.mark.parametrize(
"uid, expected",
[
(
"arn:aws:ec2:us-east-1:552455647653:instance/i-05075b63eb51baacb",
"i-05075b63eb51baacb",
),
(
"arn:aws:ec2:us-east-1:123456789012:volume/vol-0abcd1234ef567890",
"vol-0abcd1234ef567890",
),
(
"arn:aws:ec2:us-east-1:123456789012:security-group/sg-0123abcd",
"sg-0123abcd",
),
("arn:aws:s3:::my-bucket-name", "my-bucket-name"),
("arn:aws:iam::123456789012:role/MyRole", "MyRole"),
(
"arn:aws:lambda:us-east-1:123456789012:function:my-function",
"my-function",
),
("i-05075b63eb51baacb", "i-05075b63eb51baacb"),
],
)
def test_extract_short_uid_aws_variants(self, uid, expected):
from tasks.jobs.attack_paths.aws import extract_short_uid
assert extract_short_uid(uid) == expected
def test_insert_finding_template_has_short_id_fallback(self):
from tasks.jobs.attack_paths.queries import (
INSERT_FINDING_TEMPLATE,
render_cypher_template,
)
rendered = render_cypher_template(
INSERT_FINDING_TEMPLATE,
{
"__NODE_UID_FIELD__": "arn",
"__RESOURCE_LABEL__": "_AWSResource",
},
)
assert (
"resource_by_uid:_AWSResource {arn: finding_data.resource_uid}" in rendered
)
assert "resource_by_id:_AWSResource {id: finding_data.resource_uid}" in rendered
assert (
"resource_by_short:_AWSResource {id: finding_data.resource_short_uid}"
in rendered
)
assert "head(collect(resource_by_short)) AS resource_by_short" in rendered
assert (
"COALESCE(resource_by_uid, resource_by_id, resource_by_short)" in rendered
)
assert "RETURN merged_count, dropped_count" in rendered
class TestAddResourceLabel:
def test_add_resource_label_applies_private_label(self):
-59
View File
@@ -36,7 +36,6 @@ from api.models import (
Provider,
Resource,
Scan,
ScanSummary,
StateChoices,
StatusChoices,
)
@@ -3359,64 +3358,6 @@ class TestAggregateFindings:
regions = {s.region for s in summaries}
assert regions == {"us-east-1", "us-west-2"}
def test_aggregate_findings_is_idempotent_on_rerun(
self,
tenants_fixture,
scans_fixture,
findings_fixture,
):
"""Re-running `aggregate_findings` for the same scan must not violate
the `unique_scan_summary` constraint, and the resulting row set for
the scan must match the single-run output. This is exercised by the
post-mute reaggregation pipeline, which re-dispatches
`perform_scan_summary_task` against scans whose summaries already
exist."""
tenant = tenants_fixture[0]
scan = scans_fixture[0]
aggregate_findings(str(tenant.id), str(scan.id))
first_run_ids = set(
ScanSummary.all_objects.filter(
tenant_id=tenant.id, scan_id=scan.id
).values_list("id", flat=True)
)
first_run_rows = list(
ScanSummary.all_objects.filter(tenant_id=tenant.id, scan_id=scan.id).values(
"check_id",
"service",
"severity",
"region",
"fail",
"_pass",
"muted",
"total",
)
)
# Second invocation must not raise and must replace the rows without
# leaving duplicates behind.
aggregate_findings(str(tenant.id), str(scan.id))
second_run_ids = set(
ScanSummary.all_objects.filter(
tenant_id=tenant.id, scan_id=scan.id
).values_list("id", flat=True)
)
second_run_rows = list(
ScanSummary.all_objects.filter(tenant_id=tenant.id, scan_id=scan.id).values(
"check_id",
"service",
"severity",
"region",
"fail",
"_pass",
"muted",
"total",
)
)
assert second_run_rows == first_run_rows
assert first_run_ids.isdisjoint(second_run_ids)
@pytest.mark.django_db
class TestAggregateFindingsByRegion:
+22 -66
View File
@@ -2359,20 +2359,11 @@ class TestReaggregateAllFindingGroupSummaries:
def setup_method(self):
self.tenant_id = str(uuid.uuid4())
@patch("tasks.tasks.chain")
@patch("tasks.tasks.group")
@patch("tasks.tasks.aggregate_finding_group_summaries_task")
@patch("tasks.tasks.aggregate_daily_severity_task")
@patch("tasks.tasks.perform_scan_summary_task")
@patch("tasks.tasks.Scan.objects.filter")
def test_dispatches_subtasks_for_each_provider_per_day(
self,
mock_scan_filter,
mock_scan_summary_task,
mock_daily_severity_task,
mock_finding_group_task,
mock_group,
mock_chain,
self, mock_scan_filter, mock_agg_task, mock_group
):
provider_id_1 = uuid.uuid4()
provider_id_2 = uuid.uuid4()
@@ -2382,13 +2373,8 @@ class TestReaggregateAllFindingGroupSummaries:
today = datetime.now(tz=timezone.utc)
yesterday = today - timedelta(days=1)
mock_outer_group_result = MagicMock()
# The first `group()` call wraps the inner (severity, finding-group)
# parallel step; subsequent calls wrap the outer per-scan generator.
mock_group.side_effect = lambda *args, **kwargs: (
list(args[0]) if args and hasattr(args[0], "__iter__") else None,
mock_outer_group_result,
)[1]
mock_group_result = MagicMock()
mock_group.side_effect = lambda gen: (list(gen), mock_group_result)[1]
mock_scan_filter.return_value.order_by.return_value.values.return_value = [
{
@@ -2411,40 +2397,23 @@ class TestReaggregateAllFindingGroupSummaries:
result = reaggregate_all_finding_group_summaries_task(tenant_id=self.tenant_id)
assert result == {"scans_reaggregated": 3}
expected_scan_ids = {
str(scan_id_today_p1),
str(scan_id_today_p2),
str(scan_id_yesterday_p1),
}
for task_mock in (
mock_scan_summary_task,
mock_daily_severity_task,
mock_finding_group_task,
):
assert task_mock.si.call_count == 3
dispatched = {
call.kwargs["scan_id"] for call in task_mock.si.call_args_list
}
assert dispatched == expected_scan_ids
for call in task_mock.si.call_args_list:
assert call.kwargs["tenant_id"] == self.tenant_id
assert mock_chain.call_count == 3
mock_outer_group_result.apply_async.assert_called_once()
assert mock_agg_task.si.call_count == 3
mock_agg_task.si.assert_any_call(
tenant_id=self.tenant_id, scan_id=str(scan_id_today_p1)
)
mock_agg_task.si.assert_any_call(
tenant_id=self.tenant_id, scan_id=str(scan_id_today_p2)
)
mock_agg_task.si.assert_any_call(
tenant_id=self.tenant_id, scan_id=str(scan_id_yesterday_p1)
)
mock_group_result.apply_async.assert_called_once()
@patch("tasks.tasks.chain")
@patch("tasks.tasks.group")
@patch("tasks.tasks.aggregate_finding_group_summaries_task")
@patch("tasks.tasks.aggregate_daily_severity_task")
@patch("tasks.tasks.perform_scan_summary_task")
@patch("tasks.tasks.Scan.objects.filter")
def test_dedupes_scans_to_latest_per_provider_per_day(
self,
mock_scan_filter,
mock_scan_summary_task,
mock_daily_severity_task,
mock_finding_group_task,
mock_group,
mock_chain,
self, mock_scan_filter, mock_agg_task, mock_group
):
"""When several scans run on the same day for the same provider, only
the latest one is dispatched (matching the daily summary unique key)."""
@@ -2454,11 +2423,8 @@ class TestReaggregateAllFindingGroupSummaries:
today_late = datetime.now(tz=timezone.utc)
today_early = today_late - timedelta(hours=4)
mock_outer_group_result = MagicMock()
mock_group.side_effect = lambda *args, **kwargs: (
list(args[0]) if args and hasattr(args[0], "__iter__") else None,
mock_outer_group_result,
)[1]
mock_group_result = MagicMock()
mock_group.side_effect = lambda gen: (list(gen), mock_group_result)[1]
# Returned ordered by `-completed_at`, so the most recent comes first.
mock_scan_filter.return_value.order_by.return_value.values.return_value = [
@@ -2477,27 +2443,17 @@ class TestReaggregateAllFindingGroupSummaries:
result = reaggregate_all_finding_group_summaries_task(tenant_id=self.tenant_id)
assert result == {"scans_reaggregated": 1}
for task_mock in (
mock_scan_summary_task,
mock_daily_severity_task,
mock_finding_group_task,
):
task_mock.si.assert_called_once_with(
tenant_id=self.tenant_id, scan_id=str(latest_scan_today)
)
mock_chain.assert_called_once()
mock_outer_group_result.apply_async.assert_called_once()
mock_agg_task.si.assert_called_once_with(
tenant_id=self.tenant_id, scan_id=str(latest_scan_today)
)
mock_group_result.apply_async.assert_called_once()
@patch("tasks.tasks.chain")
@patch("tasks.tasks.group")
@patch("tasks.tasks.Scan.objects.filter")
def test_no_completed_scans_skips_dispatch(
self, mock_scan_filter, mock_group, mock_chain
):
def test_no_completed_scans_skips_dispatch(self, mock_scan_filter, mock_group):
mock_scan_filter.return_value.order_by.return_value.values.return_value = []
result = reaggregate_all_finding_group_summaries_task(tenant_id=self.tenant_id)
assert result == {"scans_reaggregated": 0}
mock_group.assert_not_called()
mock_chain.assert_not_called()
@@ -34,10 +34,6 @@ spec:
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.worker.initContainers }}
initContainers:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: worker
{{- with .Values.worker.securityContext }}
@@ -32,10 +32,6 @@ spec:
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.worker_beat.initContainers }}
initContainers:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: worker-beat
{{- with .Values.worker_beat.securityContext }}
+1 -1
View File
@@ -220,7 +220,7 @@ def _send_prowler_results(prowler_results, _prowler_version, options):
try:
_debug("RESULT MSG --- {0}".format(_check_result), 2)
_check_result = json.loads(TEMPLATE_CHECK.format(_check_result))
except Exception:
except:
_debug(
"INVALID JSON --- {0}".format(TEMPLATE_CHECK.format(_check_result)), 1
)
+10 -17
View File
@@ -1,11 +1,4 @@
services:
api-dev-init:
image: busybox:1.37.0
volumes:
- ./_data/api:/data
command: ["sh", "-c", "chown -R 1000:1000 /data"]
restart: "no"
api-dev:
hostname: "prowler-api"
image: prowler-api-dev
@@ -28,20 +21,12 @@ services:
- ./_data/api:/home/prowler/.config/prowler-api
- outputs:/tmp/prowler_api_output
depends_on:
api-dev-init:
condition: service_completed_successfully
postgres:
condition: service_healthy
valkey:
condition: service_healthy
neo4j:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "wget -q -O /dev/null http://127.0.0.1:${DJANGO_PORT:-8080}/api/v1/ || exit 1"]
interval: 10s
timeout: 5s
retries: 12
start_period: 60s
entrypoint:
- "/home/prowler/docker-entrypoint.sh"
- "dev"
@@ -154,7 +139,11 @@ services:
- ./api/docker-entrypoint.sh:/home/prowler/docker-entrypoint.sh
- outputs:/tmp/prowler_api_output
depends_on:
api-dev:
valkey:
condition: service_healthy
postgres:
condition: service_healthy
neo4j:
condition: service_healthy
ulimits:
nofile:
@@ -176,7 +165,11 @@ services:
- path: ./.env
required: false
depends_on:
api-dev:
valkey:
condition: service_healthy
postgres:
condition: service_healthy
neo4j:
condition: service_healthy
ulimits:
nofile:
+6 -17
View File
@@ -5,13 +5,6 @@
# docker compose -f docker-compose-dev.yml up
#
services:
api-init:
image: busybox:1.37.0
volumes:
- ./_data/api:/data
command: ["sh", "-c", "chown -R 1000:1000 /data"]
restart: "no"
api:
hostname: "prowler-api"
image: prowlercloud/prowler-api:${PROWLER_API_VERSION:-stable}
@@ -24,20 +17,12 @@ services:
- ./_data/api:/home/prowler/.config/prowler-api
- output:/tmp/prowler_api_output
depends_on:
api-init:
condition: service_completed_successfully
postgres:
condition: service_healthy
valkey:
condition: service_healthy
neo4j:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "wget -q -O /dev/null http://127.0.0.1:${DJANGO_PORT:-8080}/api/v1/ || exit 1"]
interval: 10s
timeout: 5s
retries: 12
start_period: 60s
entrypoint:
- "/home/prowler/docker-entrypoint.sh"
- "prod"
@@ -129,7 +114,9 @@ services:
volumes:
- "output:/tmp/prowler_api_output"
depends_on:
api:
valkey:
condition: service_healthy
postgres:
condition: service_healthy
ulimits:
nofile:
@@ -145,7 +132,9 @@ services:
- path: ./.env
required: false
depends_on:
api:
valkey:
condition: service_healthy
postgres:
condition: service_healthy
ulimits:
nofile:
+3 -7
View File
@@ -118,22 +118,18 @@ In case you have any doubts, consult the [Poetry environment activation guide](h
### Pre-Commit Hooks
This repository uses Git pre-commit hooks managed by the [prek](https://prek.j178.dev/) tool, it is installed with `poetry install --with dev`. Next, run the following command in the root of this repository:
This repository uses Git pre-commit hooks managed by the [pre-commit](https://pre-commit.com/) tool, it is installed with `poetry install --with dev`. Next, run the following command in the root of this repository:
```shell
prek install
pre-commit install
```
Successful installation should produce the following output:
```shell
prek installed at `.git/hooks/pre-commit`
pre-commit installed at .git/hooks/pre-commit
```
<Warning>
If pre-commit hooks were previously installed, run `prek install --overwrite` to replace the existing hook. Otherwise, both tools will run on each commit.
</Warning>
### Code Quality and Security Checks
Before merging pull requests, several automated checks and utilities ensure code security and updated dependencies:
@@ -15,7 +15,8 @@ This document describes the internal architecture of Prowler Lighthouse AI, enab
Lighthouse AI operates as a Langchain-based agent that connects Large Language Models (LLMs) with Prowler security data through the Model Context Protocol (MCP).
![Prowler Lighthouse Architecture](/images/lighthouse-architecture.png)
<img className="block dark:hidden" src="/images/lighthouse-architecture-light.png" alt="Prowler Lighthouse Architecture" />
<img className="hidden dark:block" src="/images/lighthouse-architecture-dark.png" alt="Prowler Lighthouse Architecture" />
### Three-Tier Architecture
-19
View File
@@ -12,24 +12,6 @@
"dark": "/images/prowler-logo-white.png",
"light": "/images/prowler-logo-black.png"
},
"contextual": {
"options": [
"copy",
"view",
{
"title": "Request a feature",
"description": "Open a feature request on GitHub",
"icon": "plus",
"href": "https://github.com/prowler-cloud/prowler/issues/new?template=feature-request.yml"
},
{
"title": "Report an issue",
"description": "Open a bug report on GitHub",
"icon": "bug",
"href": "https://github.com/prowler-cloud/prowler/issues/new?template=bug_report.yml"
}
]
},
"navigation": {
"tabs": [
{
@@ -151,7 +133,6 @@
]
},
"user-guide/tutorials/prowler-app-attack-paths",
"user-guide/tutorials/prowler-app-finding-groups",
"user-guide/tutorials/prowler-cloud-public-ips",
{
"group": "Tutorials",
@@ -121,8 +121,8 @@ To update the environment file:
Edit the `.env` file and change version values:
```env
PROWLER_UI_VERSION="5.24.2"
PROWLER_API_VERSION="5.24.2"
PROWLER_UI_VERSION="5.22.0"
PROWLER_API_VERSION="5.22.0"
```
<Note>
@@ -59,10 +59,6 @@ Prowler Lighthouse AI is powerful, but there are limitations:
- **NextJS session dependence**: If your Prowler application session expires or logs out, Lighthouse AI will error out. Refresh and log back in to continue.
- **Response quality**: The response quality depends on the selected LLM provider and model. Choose models with strong tool-calling capabilities for best results. We recommend `gpt-5` model from OpenAI.
## Architecture
![Prowler Lighthouse Architecture](/images/lighthouse-architecture.png)
## Extending Lighthouse AI
Lighthouse AI retrieves data through Prowler MCP. To add new capabilities, extend the Prowler MCP Server with additional tools and Lighthouse AI discovers them automatically.
@@ -46,7 +46,8 @@ Search and retrieve official Prowler documentation:
The following diagram illustrates the Prowler MCP Server architecture and its integration points:
![Prowler MCP Server Schema](/images/prowler_mcp_schema.png)
<img className="block dark:hidden" src="/images/prowler_mcp_schema_light.png" alt="Prowler MCP Server Schema" />
<img className="hidden dark:block" src="/images/prowler_mcp_schema_dark.png" alt="Prowler MCP Server Schema" />
The architecture shows how AI assistants connect through the MCP protocol to access Prowler's three main components:
- Prowler Cloud/App for security operations
Binary file not shown.

Before

Width:  |  Height:  |  Size: 755 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 340 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 410 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 267 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 265 KiB

-37
View File
@@ -1,37 +0,0 @@
flowchart TB
browser([Browser])
subgraph NEXTJS["Next.js Server"]
route["API Route<br/>(auth + context assembly)"]
agent["LangChain Agent"]
subgraph TOOLS["Agent Tools"]
metatools["Meta-tools<br/>describe_tool / execute_tool / load_skill"]
end
mcpclient["MCP Client<br/>(HTTP transport)"]
end
llm["LLM Provider<br/>(OpenAI / Bedrock / OpenAI-compatible)"]
subgraph MCP["Prowler MCP Server"]
app_tools["prowler_app_* tools<br/>(auth required)"]
hub_tools["prowler_hub_* tools<br/>(no auth)"]
docs_tools["prowler_docs_* tools<br/>(no auth)"]
end
api["Prowler API"]
hub["hub.prowler.com"]
docs["docs.prowler.com<br/>(Mintlify)"]
browser <-->|SSE stream| route
route --> agent
agent <-->|LLM API| llm
agent --> metatools
metatools --> mcpclient
mcpclient -->|MCP HTTP · Bearer token<br/>for prowler_app_* only| app_tools
mcpclient -->|MCP HTTP| hub_tools
mcpclient -->|MCP HTTP| docs_tools
app_tools -->|REST| api
hub_tools -->|REST| hub
docs_tools -->|REST| docs
Binary file not shown.

Before

Width:  |  Height:  |  Size: 286 KiB

@@ -23,8 +23,6 @@ flowchart TB
user --> ui
user --> cli
ui -->|REST| api
ui -->|MCP HTTP| mcp
mcp -->|REST| api
api --> pg
api --> valkey
beat -->|enqueue jobs| valkey
@@ -33,5 +31,7 @@ flowchart TB
worker -->|Attack Paths| neo4j
worker -->|invokes| sdk
cli --> sdk
api -. AI tools .-> mcp
mcp -. context .-> api
sdk --> providers
Binary file not shown.

Before

Width:  |  Height:  |  Size: 348 KiB

After

Width:  |  Height:  |  Size: 268 KiB

-29
View File
@@ -1,29 +0,0 @@
flowchart LR
subgraph HOSTS["MCP Hosts"]
chat["Chat Interfaces<br/>(Claude Desktop, LobeChat)"]
ide["IDEs and Code Editors<br/>(Claude Code, Cursor)"]
apps["Other AI Applications<br/>(5ire, custom agents)"]
end
subgraph MCP["Prowler MCP Server"]
app_tools["prowler_app_* tools<br/>(JWT or API key auth)<br/>Findings · Providers · Scans<br/>Resources · Muting · Compliance<br/>Attack Paths"]
hub_tools["prowler_hub_* tools<br/>(no auth)<br/>Checks Catalog · Check Code<br/>Fixers · Compliance Frameworks"]
docs_tools["prowler_docs_* tools<br/>(no auth)<br/>Search · Document Retrieval"]
end
api["Prowler API<br/>(REST)"]
hub["hub.prowler.com<br/>(REST)"]
docs["docs.prowler.com<br/>(Mintlify)"]
chat -->|STDIO or HTTP| app_tools
chat -->|STDIO or HTTP| hub_tools
chat -->|STDIO or HTTP| docs_tools
ide -->|STDIO or HTTP| app_tools
ide -->|STDIO or HTTP| hub_tools
ide -->|STDIO or HTTP| docs_tools
apps -->|STDIO or HTTP| app_tools
apps -->|STDIO or HTTP| hub_tools
apps -->|STDIO or HTTP| docs_tools
app_tools -->|REST| api
hub_tools -->|REST| hub
docs_tools -->|REST| docs
Binary file not shown.

Before

Width:  |  Height:  |  Size: 371 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 328 KiB

Some files were not shown because too many files have changed in this diff Show More