fix(s3): file uploading for threatscore (#8993)

This commit is contained in:
Víctor Fernández Poyatos
2025-10-23 12:22:06 +02:00
committed by GitHub
parent b436cc1cac
commit ba8dbb0d28
8 changed files with 142 additions and 39 deletions

View File

@@ -24,7 +24,7 @@ class Migration(migrations.Migration):
(
"name",
models.CharField(
max_length=255,
max_length=100,
validators=[django.core.validators.MinLengthValidator(3)],
),
),

View File

@@ -2689,6 +2689,55 @@ class TestScanViewSet:
== "There is a problem with credentials."
)
@patch("api.v1.views.ScanViewSet._get_task_status")
@patch("api.v1.views.get_s3_client")
@patch("api.v1.views.env.str")
def test_threatscore_s3_wildcard(
self,
mock_env_str,
mock_get_s3_client,
mock_get_task_status,
authenticated_client,
scans_fixture,
):
"""
When the threatscore endpoint is called with an S3 output_location,
the view should list objects in S3 using wildcard pattern matching,
retrieve the matching PDF file, and return it with HTTP 200 and proper headers.
"""
scan = scans_fixture[0]
scan.state = StateChoices.COMPLETED
bucket = "test-bucket"
zip_key = "tenant-id/scan-id/prowler-output-foo.zip"
scan.output_location = f"s3://{bucket}/{zip_key}"
scan.save()
pdf_key = os.path.join(
os.path.dirname(zip_key),
"threatscore",
"prowler-output-123_threatscore_report.pdf",
)
mock_s3_client = Mock()
mock_s3_client.list_objects_v2.return_value = {"Contents": [{"Key": pdf_key}]}
mock_s3_client.get_object.return_value = {"Body": io.BytesIO(b"pdf-bytes")}
mock_env_str.return_value = bucket
mock_get_s3_client.return_value = mock_s3_client
mock_get_task_status.return_value = None
url = reverse("scan-threatscore", kwargs={"pk": scan.id})
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response["Content-Type"] == "application/pdf"
assert response["Content-Disposition"].endswith(
'"prowler-output-123_threatscore_report.pdf"'
)
assert response.content == b"pdf-bytes"
mock_s3_client.list_objects_v2.assert_called_once()
mock_s3_client.get_object.assert_called_once_with(Bucket=bucket, Key=pdf_key)
def test_report_s3_success(self, authenticated_client, scans_fixture, monkeypatch):
"""
When output_location is an S3 URL and the S3 client returns the file successfully,

View File

@@ -1,3 +1,4 @@
import fnmatch
import glob
import logging
import os
@@ -1775,7 +1776,18 @@ class ScanViewSet(BaseRLSViewSet):
status=status.HTTP_502_BAD_GATEWAY,
)
contents = resp.get("Contents", [])
keys = [obj["Key"] for obj in contents if obj["Key"].endswith(suffix)]
keys = []
for obj in contents:
key = obj["Key"]
key_basename = os.path.basename(key)
if any(ch in suffix for ch in ("*", "?", "[")):
if fnmatch.fnmatch(key_basename, suffix):
keys.append(key)
elif key_basename == suffix:
keys.append(key)
elif key.endswith(suffix):
# Backward compatibility if suffix already includes directories
keys.append(key)
if not keys:
return Response(
{

View File

@@ -20,10 +20,10 @@ from prowler.lib.outputs.asff.asff import ASFF
from prowler.lib.outputs.compliance.aws_well_architected.aws_well_architected import (
AWSWellArchitected,
)
from prowler.lib.outputs.compliance.c5.c5_aws import AWSC5
from prowler.lib.outputs.compliance.ccc.ccc_aws import CCC_AWS
from prowler.lib.outputs.compliance.ccc.ccc_azure import CCC_Azure
from prowler.lib.outputs.compliance.ccc.ccc_gcp import CCC_GCP
from prowler.lib.outputs.compliance.c5.c5_aws import AWSC5
from prowler.lib.outputs.compliance.cis.cis_aws import AWSCIS
from prowler.lib.outputs.compliance.cis.cis_azure import AzureCIS
from prowler.lib.outputs.compliance.cis.cis_gcp import GCPCIS
@@ -183,18 +183,21 @@ def get_s3_client():
return s3_client
def _upload_to_s3(tenant_id: str, zip_path: str, scan_id: str) -> str | None:
def _upload_to_s3(
tenant_id: str, scan_id: str, local_path: str, relative_key: str
) -> str | None:
"""
Upload the specified ZIP file to an S3 bucket.
If the S3 bucket environment variables are not configured,
the function returns None without performing an upload.
Upload a local artifact to an S3 bucket under the tenant/scan prefix.
Args:
tenant_id (str): The tenant identifier, used as part of the S3 key prefix.
zip_path (str): The local file system path to the ZIP file to be uploaded.
scan_id (str): The scan identifier, used as part of the S3 key prefix.
tenant_id (str): The tenant identifier used as the first segment of the S3 key.
scan_id (str): The scan identifier used as the second segment of the S3 key.
local_path (str): Filesystem path to the artifact to upload.
relative_key (str): Object key relative to `<tenant_id>/<scan_id>/`.
Returns:
str: The S3 URI of the uploaded file (e.g., "s3://<bucket>/<key>") if successful.
None: If the required environment variables for the S3 bucket are not set.
str | None: S3 URI of the uploaded artifact, or None if the upload is skipped.
Raises:
botocore.exceptions.ClientError: If the upload attempt to S3 fails for any reason.
"""
@@ -202,27 +205,19 @@ def _upload_to_s3(tenant_id: str, zip_path: str, scan_id: str) -> str | None:
if not bucket:
return
if not relative_key:
return
if not os.path.isfile(local_path):
return
try:
s3 = get_s3_client()
# Upload the ZIP file (outputs) to the S3 bucket
zip_key = f"{tenant_id}/{scan_id}/{os.path.basename(zip_path)}"
s3.upload_file(
Filename=zip_path,
Bucket=bucket,
Key=zip_key,
)
s3_key = f"{tenant_id}/{scan_id}/{relative_key}"
s3.upload_file(Filename=local_path, Bucket=bucket, Key=s3_key)
# Upload the compliance directory to the S3 bucket
compliance_dir = os.path.join(os.path.dirname(zip_path), "compliance")
for filename in os.listdir(compliance_dir):
local_path = os.path.join(compliance_dir, filename)
if not os.path.isfile(local_path):
continue
file_key = f"{tenant_id}/{scan_id}/compliance/{filename}"
s3.upload_file(Filename=local_path, Bucket=bucket, Key=file_key)
return f"s3://{base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET}/{zip_key}"
return f"s3://{base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET}/{s3_key}"
except (ClientError, NoCredentialsError, ParamValidationError, ValueError) as e:
logger.error(f"S3 upload failed: {str(e)}")

View File

@@ -1317,7 +1317,12 @@ def generate_threatscore_report_job(
min_risk_level=4,
)
upload_uri = _upload_to_s3(tenant_id, pdf_path, scan_id)
upload_uri = _upload_to_s3(
tenant_id,
scan_id,
pdf_path,
f"threatscore/{Path(pdf_path).name}",
)
if upload_uri:
try:
rmtree(Path(pdf_path).parent, ignore_errors=True)

View File

@@ -1,3 +1,4 @@
import os
from datetime import datetime, timedelta, timezone
from pathlib import Path
from shutil import rmtree
@@ -413,7 +414,24 @@ def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
writer._data.clear()
compressed = _compress_output_files(out_dir)
upload_uri = _upload_to_s3(tenant_id, compressed, scan_id)
upload_uri = _upload_to_s3(
tenant_id,
scan_id,
compressed,
os.path.basename(compressed),
)
compliance_dir_path = Path(comp_dir).parent
if compliance_dir_path.exists():
for artifact_path in sorted(compliance_dir_path.iterdir()):
if artifact_path.is_file():
_upload_to_s3(
tenant_id,
scan_id,
str(artifact_path),
f"compliance/{artifact_path.name}",
)
# S3 integrations (need output_directory)
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):

View File

@@ -72,17 +72,26 @@ class TestOutputs:
client_mock = MagicMock()
mock_get_client.return_value = client_mock
result = _upload_to_s3("tenant-id", str(zip_path), "scan-id")
result = _upload_to_s3(
"tenant-id",
"scan-id",
str(zip_path),
"outputs.zip",
)
expected_uri = "s3://test-bucket/tenant-id/scan-id/outputs.zip"
assert result == expected_uri
assert client_mock.upload_file.call_count == 2
client_mock.upload_file.assert_called_once_with(
Filename=str(zip_path),
Bucket="test-bucket",
Key="tenant-id/scan-id/outputs.zip",
)
@patch("tasks.jobs.export.get_s3_client")
@patch("tasks.jobs.export.base")
def test_upload_to_s3_missing_bucket(self, mock_base, mock_get_client):
mock_base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = ""
result = _upload_to_s3("tenant", "/tmp/fake.zip", "scan")
result = _upload_to_s3("tenant", "scan", "/tmp/fake.zip", "fake.zip")
assert result is None
@patch("tasks.jobs.export.get_s3_client")
@@ -101,11 +110,15 @@ class TestOutputs:
client_mock = MagicMock()
mock_get_client.return_value = client_mock
result = _upload_to_s3("tenant", str(zip_path), "scan")
result = _upload_to_s3(
"tenant",
"scan",
str(compliance_dir / "subdir"),
"compliance/subdir",
)
expected_uri = "s3://test-bucket/tenant/scan/results.zip"
assert result == expected_uri
client_mock.upload_file.assert_called_once()
assert result is None
client_mock.upload_file.assert_not_called()
@patch(
"tasks.jobs.export.get_s3_client",
@@ -126,7 +139,12 @@ class TestOutputs:
compliance_dir.mkdir()
(compliance_dir / "report.csv").write_text("csv")
_upload_to_s3("tenant", str(zip_path), "scan")
_upload_to_s3(
"tenant",
"scan",
str(zip_path),
"zipfile.zip",
)
mock_logger.assert_called()
@patch("tasks.jobs.export.rls_transaction")

View File

@@ -85,6 +85,12 @@ class TestGenerateThreatscoreReport:
only_failed=True,
min_risk_level=4,
)
mock_upload.assert_called_once_with(
self.tenant_id,
self.scan_id,
"/tmp/threatscore_path_threatscore_report.pdf",
"threatscore/threatscore_path_threatscore_report.pdf",
)
mock_rmtree.assert_called_once_with(
Path("/tmp/threatscore_path_threatscore_report.pdf").parent,
ignore_errors=True,