Compare commits

...

7 Commits

Author SHA1 Message Date
Pablo F.G
f276c8e530 docs: add Hadolint installation step to pre-commit hooks setup
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 15:08:15 +01:00
Pablo Fernandez
21f36ed92d docs: simplify TruffleHog section to link official install guide
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 15:46:01 +01:00
Pablo Fernandez
9bd2f8f9ba docs: add pre-commit hooks setup for TruffleHog and Safety
Document the missing onboarding step for installing TruffleHog
and Safety, which are required by pre-commit hooks but were not
listed in the developer setup instructions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 15:27:55 +01:00
Adrián Peña
ad6368a446 chore: add defusedxml as api dependency (#10401) 2026-03-19 18:26:55 +01:00
Adrián Peña
3361393b7d chore: update changelog (#10400) 2026-03-19 17:55:18 +01:00
Sandiyo Christan
0b7a21a70c fix(api): [security] use defusedxml to prevent XML bomb DoS in SAML metadata parsing (#10165)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Adrián Peña <adrianjpr@gmail.com>
2026-03-19 17:44:52 +01:00
Josema Camacho
872e6e239c perf(api): replace JOINs with pre-check in threat score aggregation query (#10394) 2026-03-19 17:30:06 +01:00
8 changed files with 106 additions and 32 deletions

View File

@@ -239,6 +239,21 @@ pnpm start
> Once configured, access the Prowler App at http://localhost:3000. Sign up using your email and password to get started.
**Pre-commit Hooks Setup**
Some pre-commit hooks require tools installed on your system:
1. **Install [TruffleHog](https://github.com/trufflesecurity/trufflehog#install)** (secret scanning) — see the [official installation options](https://github.com/trufflesecurity/trufflehog#install).
2. **Install [Safety](https://github.com/pyupio/safety)** (dependency vulnerability checking):
```console
# Requires a Python environment (e.g. via pyenv)
pip install safety
```
3. **Install [Hadolint](https://github.com/hadolint/hadolint#install)** (Dockerfile linting) — see the [official installation options](https://github.com/hadolint/hadolint#install).
## Prowler CLI
### Pip package
Prowler CLI is available as a project in [PyPI](https://pypi.org/project/prowler-cloud/). Consequently, it can be installed using pip with Python >3.9.1, <3.13:

View File

@@ -2,6 +2,22 @@
All notable changes to the **Prowler API** are documented in this file.
## [1.23.0] (Prowler UNRELEASED)
### 🔐 Security
- Replace stdlib XML parser with `defusedxml` in SAML metadata parsing to prevent XML bomb (billion laughs) DoS attacks [(#10165)](https://github.com/prowler-cloud/prowler/pull/10165)
---
## [1.22.1] (Prowler v5.21.1)
### 🐞 Fixed
- Threat score aggregation query to eliminate unnecessary JOINs and `COUNT(DISTINCT)` overhead [(#10394)](https://github.com/prowler-cloud/prowler/pull/10394)
---
## [1.22.0] (Prowler v5.21.0)
### 🚀 Added
@@ -22,6 +38,7 @@ All notable changes to the **Prowler API** are documented in this file.
### 🔐 Security
- Use `psycopg2.sql` to safely compose DDL in `PostgresEnumMigration`, preventing SQL injection via f-string interpolation [(#10166)](https://github.com/prowler-cloud/prowler/pull/10166)
- Replace stdlib XML parser with `defusedxml` in SAML metadata parsing to prevent XML bomb (billion laughs) DoS attacks [(#10165)](https://github.com/prowler-cloud/prowler/pull/10165)
---

30
api/poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
[[package]]
name = "about-time"
@@ -2699,6 +2699,18 @@ files = [
{file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"},
]
[[package]]
name = "defusedxml"
version = "0.7.1"
description = "XML bomb protection for Python stdlib modules"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
groups = ["main"]
files = [
{file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
{file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
]
[[package]]
name = "deprecated"
version = "1.3.1"
@@ -2971,7 +2983,7 @@ files = [
[package.dependencies]
autopep8 = "*"
Django = ">=4.2"
gprof2dot = ">=2017.09.19"
gprof2dot = ">=2017.9.19"
sqlparse = "*"
[[package]]
@@ -4579,7 +4591,7 @@ files = [
[package.dependencies]
attrs = ">=22.2.0"
jsonschema-specifications = ">=2023.03.6"
jsonschema-specifications = ">=2023.3.6"
referencing = ">=0.28.4"
rpds-py = ">=0.7.1"
@@ -4787,7 +4799,7 @@ librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""]
mongodb = ["pymongo (==4.15.3)"]
msgpack = ["msgpack (==1.1.2)"]
pyro = ["pyro4 (==4.82)"]
qpid = ["qpid-python (==1.36.0-1)", "qpid-tools (==1.36.0-1)"]
qpid = ["qpid-python (==1.36.0.post1)", "qpid-tools (==1.36.0.post1)"]
redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2,<6.5)"]
slmq = ["softlayer_messaging (>=1.0.3)"]
sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"]
@@ -4808,7 +4820,7 @@ files = [
]
[package.dependencies]
certifi = ">=14.05.14"
certifi = ">=14.5.14"
durationpy = ">=0.7"
google-auth = ">=1.0.1"
oauthlib = ">=3.2.2"
@@ -7170,7 +7182,7 @@ files = [
]
[package.dependencies]
astroid = ">=3.2.2,<=3.3.0-dev0"
astroid = ">=3.2.2,<=3.3.0.dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
{version = ">=0.3.7", markers = "python_version >= \"3.12\""},
@@ -8184,10 +8196,10 @@ files = [
]
[package.dependencies]
botocore = ">=1.37.4,<2.0a.0"
botocore = ">=1.37.4,<2.0a0"
[package.extras]
crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
crt = ["botocore[crt] (>=1.37.4,<2.0a0)"]
[[package]]
name = "safety"
@@ -9382,4 +9394,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.11,<3.13"
content-hash = "6e38c38b1f8dc05b881f49703fa445eec299527e6697992b18e4613534fbcdb6"
content-hash = "2ed5b4e47d81da81963814f21702220ac5619f50cd605fd779be53c8c46ffca5"

View File

@@ -22,6 +22,7 @@ dependencies = [
"drf-nested-routers (>=0.94.1,<1.0.0)",
"drf-spectacular==0.27.2",
"drf-spectacular-jsonapi==0.5.1",
"defusedxml==0.7.1",
"gunicorn==23.0.0",
"lxml==5.3.2",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",

View File

@@ -1,7 +1,6 @@
import json
import logging
import re
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta, timezone
from uuid import UUID, uuid4
@@ -9,6 +8,8 @@ from allauth.socialaccount.models import SocialApp
from config.custom_logging import BackendLogger
from config.settings.social_login import SOCIALACCOUNT_PROVIDERS
from cryptography.fernet import Fernet, InvalidToken
import defusedxml
from defusedxml import ElementTree as ET
from django.conf import settings
from django.contrib.auth.models import AbstractBaseUser
from django.contrib.postgres.fields import ArrayField
@@ -2067,6 +2068,8 @@ class SAMLConfiguration(RowLevelSecurityProtectedModel):
root = ET.fromstring(self.metadata_xml)
except ET.ParseError as e:
raise ValidationError({"metadata_xml": f"Invalid XML: {e}"})
except defusedxml.DefusedXmlException as e:
raise ValidationError({"metadata_xml": f"Unsafe XML content rejected: {e}"})
# Entity ID
entity_id = root.attrib.get("entityID")

View File

@@ -243,6 +243,39 @@ class TestSAMLConfigurationModel:
assert "Invalid XML" in errors["metadata_xml"][0]
assert "not well-formed" in errors["metadata_xml"][0]
def test_xml_bomb_rejected(self, tenants_fixture):
"""
Regression test: a 'billion laughs' XML bomb in the SAML metadata field
must be rejected and not allowed to exhaust server memory / CPU.
Before the fix, xml.etree.ElementTree was used directly, which does not
protect against entity-expansion attacks. The fix switches to defusedxml
which raises an exception for any XML containing entity definitions.
"""
tenant = tenants_fixture[0]
xml_bomb = (
"<?xml version='1.0'?>"
"<!DOCTYPE bomb ["
" <!ENTITY a 'aaaaaaaaaa'>"
" <!ENTITY b '&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;'>"
" <!ENTITY c '&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;'>"
" <!ENTITY d '&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;'>"
"]>"
"<md:EntityDescriptor entityID='&d;' "
"xmlns:md='urn:oasis:names:tc:SAML:2.0:metadata'/>"
)
config = SAMLConfiguration(
email_domain="xmlbomb.com",
metadata_xml=xml_bomb,
tenant=tenant,
)
with pytest.raises(ValidationError) as exc_info:
config._parse_metadata()
errors = exc_info.value.message_dict
assert "metadata_xml" in errors
def test_metadata_missing_sso_fails(self, tenants_fixture):
tenant = tenants_fixture[0]
xml = """<md:EntityDescriptor entityID="x" xmlns:md="urn:oasis:names:tc:SAML:2.0:metadata">

View File

@@ -4,7 +4,7 @@ from django.db.models import Count, Q
from api.db_router import READ_REPLICA_ALIAS
from api.db_utils import rls_transaction
from api.models import Finding, StatusChoices
from api.models import Finding, Scan, StatusChoices
from prowler.lib.outputs.finding import Finding as FindingOutput
logger = get_task_logger(__name__)
@@ -35,25 +35,26 @@ def _aggregate_requirement_statistics_from_database(
}
"""
requirement_statistics_by_check_id = {}
# TODO: take into account that now the relation is 1 finding == 1 resource, review this when the logic changes
# TODO: review when finding-resource relation changes from 1:1
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
# Pre-check: skip if the scan's provider is deleted (avoids JOINs in the main query)
if Scan.all_objects.filter(id=scan_id, provider__is_deleted=True).exists():
return requirement_statistics_by_check_id
aggregated_statistics_queryset = (
Finding.all_objects.filter(
tenant_id=tenant_id,
scan_id=scan_id,
muted=False,
resources__provider__is_deleted=False,
)
.values("check_id")
.annotate(
total_findings=Count(
"id",
distinct=True,
filter=Q(status__in=[StatusChoices.PASS, StatusChoices.FAIL]),
),
passed_findings=Count(
"id",
distinct=True,
filter=Q(status=StatusChoices.PASS),
),
)

View File

@@ -169,35 +169,27 @@ class TestAggregateRequirementStatistics:
assert result["check_1"]["passed"] == 1
assert result["check_1"]["total"] == 1
def test_excludes_findings_without_resources(self, tenants_fixture, scans_fixture):
"""Verify findings without resources are excluded from aggregation."""
def test_skips_aggregation_for_deleted_provider(
self, tenants_fixture, scans_fixture
):
"""Verify aggregation returns empty when the scan's provider is soft-deleted."""
tenant = tenants_fixture[0]
scan = scans_fixture[0]
# Finding WITH resource → should be counted
self._create_finding_with_resource(
tenant, scan, "finding-1", "check_1", StatusChoices.PASS
)
# Finding WITHOUT resource → should be EXCLUDED
Finding.objects.create(
tenant_id=tenant.id,
scan=scan,
uid="finding-2",
check_id="check_1",
status=StatusChoices.FAIL,
severity=Severity.high,
impact=Severity.high,
check_metadata={},
raw_result={},
)
# Soft-delete the provider
provider = scan.provider
provider.is_deleted = True
provider.save(update_fields=["is_deleted"])
result = _aggregate_requirement_statistics_from_database(
str(tenant.id), str(scan.id)
)
assert result["check_1"]["passed"] == 1
assert result["check_1"]["total"] == 1
assert result == {}
def test_multiple_resources_no_double_count(self, tenants_fixture, scans_fixture):
"""Verify a finding with multiple resources is only counted once."""