mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-05-06 08:47:18 +00:00
fix(api): redirect scan report and compliance downloads to presigned S3 URLs (#10927)
This commit is contained in:
+2
-1
@@ -7,6 +7,7 @@ All notable changes to the **Prowler API** are documented in this file.
|
||||
### 🐞 Fixed
|
||||
|
||||
- Attack Paths: AWS scans no longer fail when enabled regions cannot be retrieved, and scans stuck in `scheduled` state are now cleaned up after the stale threshold [(#10917)](https://github.com/prowler-cloud/prowler/pull/10917)
|
||||
- Scan report and compliance downloads now redirect to a presigned S3 URL instead of streaming through the API worker, preventing gunicorn timeouts on large files [(#10927)](https://github.com/prowler-cloud/prowler/pull/10927)
|
||||
|
||||
---
|
||||
|
||||
@@ -20,7 +21,7 @@ All notable changes to the **Prowler API** are documented in this file.
|
||||
|
||||
### 🔄 Changed
|
||||
|
||||
- Allows tenant owners to expel users from their organizations [(#10787)](https://github.com/prowler-cloud/prowler/pull/10787)
|
||||
- Allows tenant owners to expel users from their organizations [(#10787)](https://github.com/prowler-cloud/prowler/pull/10787)
|
||||
- `aggregate_findings`, `aggregate_attack_surface`, `aggregate_scan_resource_group_summaries` and `aggregate_scan_category_summaries` now upsert via `bulk_create(update_conflicts=True, ...)` instead of the prior `ignore_conflicts=True` / plain INSERT / `already backfilled` short-circuit. Re-runs triggered by the post-mute reaggregation pipeline no longer trip the `unique_*_per_scan` constraints nor silently drop updates, and are race-safe under concurrent writers (e.g. scan completion overlapping with a fresh mute rule) [(#10843)](https://github.com/prowler-cloud/prowler/pull/10843)
|
||||
- Rename the scan-category and scan-resource-group summary aggregators from `backfill_*` to `aggregate_*` [(#10843)](https://github.com/prowler-cloud/prowler/pull/10843)
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ class ApiConfig(AppConfig):
|
||||
"check_and_fix_socialaccount_sites_migration",
|
||||
]
|
||||
|
||||
# Skip Neo4j initialization during tests, some Django commands, and Celery
|
||||
# Skip eager Neo4j init for tests, some Django commands, and Celery (prefork pool: driver must stay lazy, no post_fork hook)
|
||||
if getattr(settings, "TESTING", False) or (
|
||||
len(sys.argv) > 1
|
||||
and (
|
||||
@@ -64,7 +64,7 @@ class ApiConfig(AppConfig):
|
||||
)
|
||||
):
|
||||
logger.info(
|
||||
"Skipping Neo4j initialization because tests, some Django commands or Celery"
|
||||
"Skipping eager Neo4j init: tests, some Django commands, or Celery prefork pool (driver stays lazy)"
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
+1619
-57
File diff suppressed because it is too large
Load Diff
@@ -3841,9 +3841,14 @@ class TestScanViewSet:
|
||||
"prowler-output-123_threatscore_report.pdf",
|
||||
)
|
||||
|
||||
presigned_url = (
|
||||
"https://test-bucket.s3.amazonaws.com/"
|
||||
"tenant-id/scan-id/threatscore/prowler-output-123_threatscore_report.pdf"
|
||||
"?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Expires=300"
|
||||
)
|
||||
mock_s3_client = Mock()
|
||||
mock_s3_client.list_objects_v2.return_value = {"Contents": [{"Key": pdf_key}]}
|
||||
mock_s3_client.get_object.return_value = {"Body": io.BytesIO(b"pdf-bytes")}
|
||||
mock_s3_client.generate_presigned_url.return_value = presigned_url
|
||||
|
||||
mock_env_str.return_value = bucket
|
||||
mock_get_s3_client.return_value = mock_s3_client
|
||||
@@ -3852,19 +3857,26 @@ class TestScanViewSet:
|
||||
url = reverse("scan-threatscore", kwargs={"pk": scan.id})
|
||||
response = authenticated_client.get(url)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response["Content-Type"] == "application/pdf"
|
||||
assert response["Content-Disposition"].endswith(
|
||||
'"prowler-output-123_threatscore_report.pdf"'
|
||||
)
|
||||
assert response.content == b"pdf-bytes"
|
||||
assert response.status_code == status.HTTP_302_FOUND
|
||||
assert response["Location"] == presigned_url
|
||||
mock_s3_client.list_objects_v2.assert_called_once()
|
||||
mock_s3_client.get_object.assert_called_once_with(Bucket=bucket, Key=pdf_key)
|
||||
mock_s3_client.generate_presigned_url.assert_called_once_with(
|
||||
"get_object",
|
||||
Params={
|
||||
"Bucket": bucket,
|
||||
"Key": pdf_key,
|
||||
"ResponseContentDisposition": (
|
||||
'attachment; filename="prowler-output-123_threatscore_report.pdf"'
|
||||
),
|
||||
"ResponseContentType": "application/pdf",
|
||||
},
|
||||
ExpiresIn=300,
|
||||
)
|
||||
|
||||
def test_report_s3_success(self, authenticated_client, scans_fixture, monkeypatch):
|
||||
"""
|
||||
When output_location is an S3 URL and the S3 client returns the file successfully,
|
||||
the view should return the ZIP file with HTTP 200 and proper headers.
|
||||
When output_location is an S3 URL and the object exists,
|
||||
the view should return a 302 redirect to a presigned S3 URL.
|
||||
"""
|
||||
scan = scans_fixture[0]
|
||||
bucket = "test-bucket"
|
||||
@@ -3878,22 +3890,33 @@ class TestScanViewSet:
|
||||
type("env", (), {"str": lambda self, *args, **kwargs: "test-bucket"})(),
|
||||
)
|
||||
|
||||
presigned_url = (
|
||||
"https://test-bucket.s3.amazonaws.com/report.zip"
|
||||
"?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Expires=300"
|
||||
)
|
||||
|
||||
class FakeS3Client:
|
||||
def get_object(self, Bucket, Key):
|
||||
def head_object(self, Bucket, Key):
|
||||
assert Bucket == bucket
|
||||
assert Key == key
|
||||
return {"Body": io.BytesIO(b"s3 zip content")}
|
||||
return {}
|
||||
|
||||
def generate_presigned_url(self, ClientMethod, Params, ExpiresIn):
|
||||
assert ClientMethod == "get_object"
|
||||
assert Params["Bucket"] == bucket
|
||||
assert Params["Key"] == key
|
||||
assert Params["ResponseContentDisposition"] == (
|
||||
'attachment; filename="report.zip"'
|
||||
)
|
||||
assert ExpiresIn == 300
|
||||
return presigned_url
|
||||
|
||||
monkeypatch.setattr("api.v1.views.get_s3_client", lambda: FakeS3Client())
|
||||
|
||||
url = reverse("scan-report", kwargs={"pk": scan.id})
|
||||
response = authenticated_client.get(url)
|
||||
assert response.status_code == 200
|
||||
expected_filename = os.path.basename("report.zip")
|
||||
content_disposition = response.get("Content-Disposition")
|
||||
assert content_disposition.startswith('attachment; filename="')
|
||||
assert f'filename="{expected_filename}"' in content_disposition
|
||||
assert response.content == b"s3 zip content"
|
||||
assert response.status_code == status.HTTP_302_FOUND
|
||||
assert response["Location"] == presigned_url
|
||||
|
||||
def test_report_s3_success_no_local_files(
|
||||
self, authenticated_client, scans_fixture, monkeypatch
|
||||
@@ -4032,23 +4055,31 @@ class TestScanViewSet:
|
||||
)
|
||||
|
||||
match_key = "path/compliance/mitre_attack_aws.csv"
|
||||
presigned_url = (
|
||||
"https://test-bucket.s3.amazonaws.com/path/compliance/mitre_attack_aws.csv"
|
||||
"?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Expires=300"
|
||||
)
|
||||
|
||||
class FakeS3Client:
|
||||
def list_objects_v2(self, Bucket, Prefix):
|
||||
return {"Contents": [{"Key": match_key}]}
|
||||
|
||||
def get_object(self, Bucket, Key):
|
||||
return {"Body": io.BytesIO(b"ignored")}
|
||||
def generate_presigned_url(self, ClientMethod, Params, ExpiresIn):
|
||||
assert ClientMethod == "get_object"
|
||||
assert Params["Key"] == match_key
|
||||
assert Params["ResponseContentDisposition"] == (
|
||||
'attachment; filename="mitre_attack_aws.csv"'
|
||||
)
|
||||
assert ExpiresIn == 300
|
||||
return presigned_url
|
||||
|
||||
monkeypatch.setattr("api.v1.views.get_s3_client", lambda: FakeS3Client())
|
||||
|
||||
framework = match_key.split("/")[-1].split(".")[0]
|
||||
url = reverse("scan-compliance", kwargs={"pk": scan.id, "name": framework})
|
||||
resp = authenticated_client.get(url)
|
||||
assert resp.status_code == status.HTTP_200_OK
|
||||
cd = resp["Content-Disposition"]
|
||||
assert cd.startswith('attachment; filename="')
|
||||
assert cd.endswith('filename="mitre_attack_aws.csv"')
|
||||
assert resp.status_code == status.HTTP_302_FOUND
|
||||
assert resp["Location"] == presigned_url
|
||||
|
||||
def test_compliance_s3_not_found(
|
||||
self, authenticated_client, scans_fixture, monkeypatch
|
||||
@@ -4251,8 +4282,8 @@ class TestScanViewSet:
|
||||
scan.save()
|
||||
|
||||
fake_client = MagicMock()
|
||||
fake_client.get_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey"}}, "GetObject"
|
||||
fake_client.head_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey"}}, "HeadObject"
|
||||
)
|
||||
mock_get_s3_client.return_value = fake_client
|
||||
|
||||
@@ -4275,8 +4306,8 @@ class TestScanViewSet:
|
||||
scan.save()
|
||||
|
||||
fake_client = MagicMock()
|
||||
fake_client.get_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied"}}, "GetObject"
|
||||
fake_client.head_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied"}}, "HeadObject"
|
||||
)
|
||||
mock_get_s3_client.return_value = fake_client
|
||||
|
||||
|
||||
+111
-37
@@ -53,7 +53,7 @@ from django.db.models import (
|
||||
)
|
||||
from django.db.models.fields.json import KeyTextTransform
|
||||
from django.db.models.functions import Cast, Coalesce, RowNumber
|
||||
from django.http import HttpResponse, QueryDict
|
||||
from django.http import HttpResponse, HttpResponseBase, HttpResponseRedirect, QueryDict
|
||||
from django.shortcuts import redirect
|
||||
from django.urls import reverse
|
||||
from django.utils.dateparse import parse_date
|
||||
@@ -2080,24 +2080,38 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
},
|
||||
)
|
||||
|
||||
def _load_file(self, path_pattern, s3=False, bucket=None, list_objects=False):
|
||||
def _load_file(
|
||||
self,
|
||||
path_pattern,
|
||||
s3=False,
|
||||
bucket=None,
|
||||
list_objects=False,
|
||||
content_type=None,
|
||||
):
|
||||
"""
|
||||
Loads a binary file (e.g., ZIP or CSV) and returns its content and filename.
|
||||
Resolve a report file location and return the bytes (filesystem) or a redirect (S3).
|
||||
|
||||
Depending on the input parameters, this method supports loading:
|
||||
- From S3 using a direct key.
|
||||
- From S3 by listing objects under a prefix and matching suffix.
|
||||
- From the local filesystem using glob pattern matching.
|
||||
- From S3 using a direct key, returns a 302 to a short-lived presigned URL.
|
||||
- From S3 by listing objects under a prefix and matching suffix, returns a 302 to a short-lived presigned URL.
|
||||
- From the local filesystem using glob pattern matching, returns the file bytes.
|
||||
|
||||
The S3 branch never streams bytes through the worker; this prevents gunicorn
|
||||
worker timeouts on large reports.
|
||||
|
||||
Args:
|
||||
path_pattern (str): The key or glob pattern representing the file location.
|
||||
s3 (bool, optional): Whether the file is stored in S3. Defaults to False.
|
||||
bucket (str, optional): The name of the S3 bucket, required if `s3=True`. Defaults to None.
|
||||
list_objects (bool, optional): If True and `s3=True`, list objects by prefix to find the file. Defaults to False.
|
||||
content_type (str, optional): On the S3 branch, forwarded as `ResponseContentType`
|
||||
so the presigned download advertises the same Content-Type the API used to send.
|
||||
Ignored on the filesystem branch.
|
||||
|
||||
Returns:
|
||||
tuple[bytes, str]: A tuple containing the file content as bytes and the filename if successful.
|
||||
Response: A DRF `Response` object with an appropriate status and error detail if an error occurs.
|
||||
tuple[bytes, str]: For the filesystem branch, the file content and filename.
|
||||
HttpResponseRedirect: For the S3 branch on success, a 302 redirect to a presigned `GetObject` URL.
|
||||
Response: For any error path, a DRF `Response` with an appropriate status and detail.
|
||||
"""
|
||||
if s3:
|
||||
try:
|
||||
@@ -2144,25 +2158,45 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
# path_pattern here is prefix, but in compliance we build correct suffix check before
|
||||
key = keys[0]
|
||||
else:
|
||||
# path_pattern is exact key
|
||||
# path_pattern is exact key; HEAD before presigning to preserve the 404 contract.
|
||||
key = path_pattern
|
||||
try:
|
||||
s3_obj = client.get_object(Bucket=bucket, Key=key)
|
||||
except ClientError as e:
|
||||
code = e.response.get("Error", {}).get("Code")
|
||||
if code == "NoSuchKey":
|
||||
try:
|
||||
client.head_object(Bucket=bucket, Key=key)
|
||||
except ClientError as e:
|
||||
code = e.response.get("Error", {}).get("Code")
|
||||
if code in ("NoSuchKey", "404"):
|
||||
return Response(
|
||||
{
|
||||
"detail": "The scan has no reports, or the report generation task has not started yet."
|
||||
},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
return Response(
|
||||
{
|
||||
"detail": "The scan has no reports, or the report generation task has not started yet."
|
||||
},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
{"detail": "There is a problem with credentials."},
|
||||
status=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
return Response(
|
||||
{"detail": "There is a problem with credentials."},
|
||||
status=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
content = s3_obj["Body"].read()
|
||||
|
||||
filename = os.path.basename(key)
|
||||
# escape quotes and strip CR/LF so a malformed key cannot break out of the header
|
||||
safe_filename = (
|
||||
filename.replace("\\", "\\\\")
|
||||
.replace('"', '\\"')
|
||||
.replace("\r", "")
|
||||
.replace("\n", "")
|
||||
)
|
||||
params = {
|
||||
"Bucket": bucket,
|
||||
"Key": key,
|
||||
"ResponseContentDisposition": f'attachment; filename="{safe_filename}"',
|
||||
}
|
||||
if content_type:
|
||||
params["ResponseContentType"] = content_type
|
||||
url = client.generate_presigned_url(
|
||||
"get_object",
|
||||
Params=params,
|
||||
ExpiresIn=300,
|
||||
)
|
||||
return HttpResponseRedirect(url)
|
||||
else:
|
||||
files = glob.glob(path_pattern)
|
||||
if not files:
|
||||
@@ -2205,12 +2239,16 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
bucket = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
|
||||
key_prefix = scan.output_location.removeprefix(f"s3://{bucket}/")
|
||||
loader = self._load_file(
|
||||
key_prefix, s3=True, bucket=bucket, list_objects=False
|
||||
key_prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=False,
|
||||
content_type="application/x-zip-compressed",
|
||||
)
|
||||
else:
|
||||
loader = self._load_file(scan.output_location, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2248,13 +2286,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
prefix = os.path.join(
|
||||
os.path.dirname(key_prefix), "compliance", f"{name}.csv"
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="text/csv",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "compliance", f"*_{name}.csv")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2287,13 +2331,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"cis",
|
||||
"*_cis_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "cis", "*_cis_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2327,13 +2377,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"threatscore",
|
||||
"*_threatscore_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "threatscore", "*_threatscore_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2367,13 +2423,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"ens",
|
||||
"*_ens_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "ens", "*_ens_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2406,13 +2468,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"nis2",
|
||||
"*_nis2_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "nis2", "*_nis2_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2445,13 +2513,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"csa",
|
||||
"*_csa_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "csa", "*_csa_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
|
||||
Reference in New Issue
Block a user