mirror of
https://github.com/prowler-cloud/prowler.git
synced 2025-12-19 05:17:47 +00:00
fix(s3): file uploading for threatscore (#8993)
This commit is contained in:
committed by
GitHub
parent
b436cc1cac
commit
ba8dbb0d28
@@ -24,7 +24,7 @@ class Migration(migrations.Migration):
|
||||
(
|
||||
"name",
|
||||
models.CharField(
|
||||
max_length=255,
|
||||
max_length=100,
|
||||
validators=[django.core.validators.MinLengthValidator(3)],
|
||||
),
|
||||
),
|
||||
|
||||
@@ -2689,6 +2689,55 @@ class TestScanViewSet:
|
||||
== "There is a problem with credentials."
|
||||
)
|
||||
|
||||
@patch("api.v1.views.ScanViewSet._get_task_status")
|
||||
@patch("api.v1.views.get_s3_client")
|
||||
@patch("api.v1.views.env.str")
|
||||
def test_threatscore_s3_wildcard(
|
||||
self,
|
||||
mock_env_str,
|
||||
mock_get_s3_client,
|
||||
mock_get_task_status,
|
||||
authenticated_client,
|
||||
scans_fixture,
|
||||
):
|
||||
"""
|
||||
When the threatscore endpoint is called with an S3 output_location,
|
||||
the view should list objects in S3 using wildcard pattern matching,
|
||||
retrieve the matching PDF file, and return it with HTTP 200 and proper headers.
|
||||
"""
|
||||
scan = scans_fixture[0]
|
||||
scan.state = StateChoices.COMPLETED
|
||||
bucket = "test-bucket"
|
||||
zip_key = "tenant-id/scan-id/prowler-output-foo.zip"
|
||||
scan.output_location = f"s3://{bucket}/{zip_key}"
|
||||
scan.save()
|
||||
|
||||
pdf_key = os.path.join(
|
||||
os.path.dirname(zip_key),
|
||||
"threatscore",
|
||||
"prowler-output-123_threatscore_report.pdf",
|
||||
)
|
||||
|
||||
mock_s3_client = Mock()
|
||||
mock_s3_client.list_objects_v2.return_value = {"Contents": [{"Key": pdf_key}]}
|
||||
mock_s3_client.get_object.return_value = {"Body": io.BytesIO(b"pdf-bytes")}
|
||||
|
||||
mock_env_str.return_value = bucket
|
||||
mock_get_s3_client.return_value = mock_s3_client
|
||||
mock_get_task_status.return_value = None
|
||||
|
||||
url = reverse("scan-threatscore", kwargs={"pk": scan.id})
|
||||
response = authenticated_client.get(url)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response["Content-Type"] == "application/pdf"
|
||||
assert response["Content-Disposition"].endswith(
|
||||
'"prowler-output-123_threatscore_report.pdf"'
|
||||
)
|
||||
assert response.content == b"pdf-bytes"
|
||||
mock_s3_client.list_objects_v2.assert_called_once()
|
||||
mock_s3_client.get_object.assert_called_once_with(Bucket=bucket, Key=pdf_key)
|
||||
|
||||
def test_report_s3_success(self, authenticated_client, scans_fixture, monkeypatch):
|
||||
"""
|
||||
When output_location is an S3 URL and the S3 client returns the file successfully,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import fnmatch
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
@@ -1775,7 +1776,18 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
status=status.HTTP_502_BAD_GATEWAY,
|
||||
)
|
||||
contents = resp.get("Contents", [])
|
||||
keys = [obj["Key"] for obj in contents if obj["Key"].endswith(suffix)]
|
||||
keys = []
|
||||
for obj in contents:
|
||||
key = obj["Key"]
|
||||
key_basename = os.path.basename(key)
|
||||
if any(ch in suffix for ch in ("*", "?", "[")):
|
||||
if fnmatch.fnmatch(key_basename, suffix):
|
||||
keys.append(key)
|
||||
elif key_basename == suffix:
|
||||
keys.append(key)
|
||||
elif key.endswith(suffix):
|
||||
# Backward compatibility if suffix already includes directories
|
||||
keys.append(key)
|
||||
if not keys:
|
||||
return Response(
|
||||
{
|
||||
|
||||
@@ -20,10 +20,10 @@ from prowler.lib.outputs.asff.asff import ASFF
|
||||
from prowler.lib.outputs.compliance.aws_well_architected.aws_well_architected import (
|
||||
AWSWellArchitected,
|
||||
)
|
||||
from prowler.lib.outputs.compliance.c5.c5_aws import AWSC5
|
||||
from prowler.lib.outputs.compliance.ccc.ccc_aws import CCC_AWS
|
||||
from prowler.lib.outputs.compliance.ccc.ccc_azure import CCC_Azure
|
||||
from prowler.lib.outputs.compliance.ccc.ccc_gcp import CCC_GCP
|
||||
from prowler.lib.outputs.compliance.c5.c5_aws import AWSC5
|
||||
from prowler.lib.outputs.compliance.cis.cis_aws import AWSCIS
|
||||
from prowler.lib.outputs.compliance.cis.cis_azure import AzureCIS
|
||||
from prowler.lib.outputs.compliance.cis.cis_gcp import GCPCIS
|
||||
@@ -183,18 +183,21 @@ def get_s3_client():
|
||||
return s3_client
|
||||
|
||||
|
||||
def _upload_to_s3(tenant_id: str, zip_path: str, scan_id: str) -> str | None:
|
||||
def _upload_to_s3(
|
||||
tenant_id: str, scan_id: str, local_path: str, relative_key: str
|
||||
) -> str | None:
|
||||
"""
|
||||
Upload the specified ZIP file to an S3 bucket.
|
||||
If the S3 bucket environment variables are not configured,
|
||||
the function returns None without performing an upload.
|
||||
Upload a local artifact to an S3 bucket under the tenant/scan prefix.
|
||||
|
||||
Args:
|
||||
tenant_id (str): The tenant identifier, used as part of the S3 key prefix.
|
||||
zip_path (str): The local file system path to the ZIP file to be uploaded.
|
||||
scan_id (str): The scan identifier, used as part of the S3 key prefix.
|
||||
tenant_id (str): The tenant identifier used as the first segment of the S3 key.
|
||||
scan_id (str): The scan identifier used as the second segment of the S3 key.
|
||||
local_path (str): Filesystem path to the artifact to upload.
|
||||
relative_key (str): Object key relative to `<tenant_id>/<scan_id>/`.
|
||||
|
||||
Returns:
|
||||
str: The S3 URI of the uploaded file (e.g., "s3://<bucket>/<key>") if successful.
|
||||
None: If the required environment variables for the S3 bucket are not set.
|
||||
str | None: S3 URI of the uploaded artifact, or None if the upload is skipped.
|
||||
|
||||
Raises:
|
||||
botocore.exceptions.ClientError: If the upload attempt to S3 fails for any reason.
|
||||
"""
|
||||
@@ -202,27 +205,19 @@ def _upload_to_s3(tenant_id: str, zip_path: str, scan_id: str) -> str | None:
|
||||
if not bucket:
|
||||
return
|
||||
|
||||
if not relative_key:
|
||||
return
|
||||
|
||||
if not os.path.isfile(local_path):
|
||||
return
|
||||
|
||||
try:
|
||||
s3 = get_s3_client()
|
||||
|
||||
# Upload the ZIP file (outputs) to the S3 bucket
|
||||
zip_key = f"{tenant_id}/{scan_id}/{os.path.basename(zip_path)}"
|
||||
s3.upload_file(
|
||||
Filename=zip_path,
|
||||
Bucket=bucket,
|
||||
Key=zip_key,
|
||||
)
|
||||
s3_key = f"{tenant_id}/{scan_id}/{relative_key}"
|
||||
s3.upload_file(Filename=local_path, Bucket=bucket, Key=s3_key)
|
||||
|
||||
# Upload the compliance directory to the S3 bucket
|
||||
compliance_dir = os.path.join(os.path.dirname(zip_path), "compliance")
|
||||
for filename in os.listdir(compliance_dir):
|
||||
local_path = os.path.join(compliance_dir, filename)
|
||||
if not os.path.isfile(local_path):
|
||||
continue
|
||||
file_key = f"{tenant_id}/{scan_id}/compliance/{filename}"
|
||||
s3.upload_file(Filename=local_path, Bucket=bucket, Key=file_key)
|
||||
|
||||
return f"s3://{base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET}/{zip_key}"
|
||||
return f"s3://{base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET}/{s3_key}"
|
||||
except (ClientError, NoCredentialsError, ParamValidationError, ValueError) as e:
|
||||
logger.error(f"S3 upload failed: {str(e)}")
|
||||
|
||||
|
||||
@@ -1317,7 +1317,12 @@ def generate_threatscore_report_job(
|
||||
min_risk_level=4,
|
||||
)
|
||||
|
||||
upload_uri = _upload_to_s3(tenant_id, pdf_path, scan_id)
|
||||
upload_uri = _upload_to_s3(
|
||||
tenant_id,
|
||||
scan_id,
|
||||
pdf_path,
|
||||
f"threatscore/{Path(pdf_path).name}",
|
||||
)
|
||||
if upload_uri:
|
||||
try:
|
||||
rmtree(Path(pdf_path).parent, ignore_errors=True)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from shutil import rmtree
|
||||
@@ -413,7 +414,24 @@ def generate_outputs_task(scan_id: str, provider_id: str, tenant_id: str):
|
||||
writer._data.clear()
|
||||
|
||||
compressed = _compress_output_files(out_dir)
|
||||
upload_uri = _upload_to_s3(tenant_id, compressed, scan_id)
|
||||
|
||||
upload_uri = _upload_to_s3(
|
||||
tenant_id,
|
||||
scan_id,
|
||||
compressed,
|
||||
os.path.basename(compressed),
|
||||
)
|
||||
|
||||
compliance_dir_path = Path(comp_dir).parent
|
||||
if compliance_dir_path.exists():
|
||||
for artifact_path in sorted(compliance_dir_path.iterdir()):
|
||||
if artifact_path.is_file():
|
||||
_upload_to_s3(
|
||||
tenant_id,
|
||||
scan_id,
|
||||
str(artifact_path),
|
||||
f"compliance/{artifact_path.name}",
|
||||
)
|
||||
|
||||
# S3 integrations (need output_directory)
|
||||
with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS):
|
||||
|
||||
@@ -72,17 +72,26 @@ class TestOutputs:
|
||||
client_mock = MagicMock()
|
||||
mock_get_client.return_value = client_mock
|
||||
|
||||
result = _upload_to_s3("tenant-id", str(zip_path), "scan-id")
|
||||
result = _upload_to_s3(
|
||||
"tenant-id",
|
||||
"scan-id",
|
||||
str(zip_path),
|
||||
"outputs.zip",
|
||||
)
|
||||
|
||||
expected_uri = "s3://test-bucket/tenant-id/scan-id/outputs.zip"
|
||||
assert result == expected_uri
|
||||
assert client_mock.upload_file.call_count == 2
|
||||
client_mock.upload_file.assert_called_once_with(
|
||||
Filename=str(zip_path),
|
||||
Bucket="test-bucket",
|
||||
Key="tenant-id/scan-id/outputs.zip",
|
||||
)
|
||||
|
||||
@patch("tasks.jobs.export.get_s3_client")
|
||||
@patch("tasks.jobs.export.base")
|
||||
def test_upload_to_s3_missing_bucket(self, mock_base, mock_get_client):
|
||||
mock_base.DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET = ""
|
||||
result = _upload_to_s3("tenant", "/tmp/fake.zip", "scan")
|
||||
result = _upload_to_s3("tenant", "scan", "/tmp/fake.zip", "fake.zip")
|
||||
assert result is None
|
||||
|
||||
@patch("tasks.jobs.export.get_s3_client")
|
||||
@@ -101,11 +110,15 @@ class TestOutputs:
|
||||
client_mock = MagicMock()
|
||||
mock_get_client.return_value = client_mock
|
||||
|
||||
result = _upload_to_s3("tenant", str(zip_path), "scan")
|
||||
result = _upload_to_s3(
|
||||
"tenant",
|
||||
"scan",
|
||||
str(compliance_dir / "subdir"),
|
||||
"compliance/subdir",
|
||||
)
|
||||
|
||||
expected_uri = "s3://test-bucket/tenant/scan/results.zip"
|
||||
assert result == expected_uri
|
||||
client_mock.upload_file.assert_called_once()
|
||||
assert result is None
|
||||
client_mock.upload_file.assert_not_called()
|
||||
|
||||
@patch(
|
||||
"tasks.jobs.export.get_s3_client",
|
||||
@@ -126,7 +139,12 @@ class TestOutputs:
|
||||
compliance_dir.mkdir()
|
||||
(compliance_dir / "report.csv").write_text("csv")
|
||||
|
||||
_upload_to_s3("tenant", str(zip_path), "scan")
|
||||
_upload_to_s3(
|
||||
"tenant",
|
||||
"scan",
|
||||
str(zip_path),
|
||||
"zipfile.zip",
|
||||
)
|
||||
mock_logger.assert_called()
|
||||
|
||||
@patch("tasks.jobs.export.rls_transaction")
|
||||
|
||||
@@ -85,6 +85,12 @@ class TestGenerateThreatscoreReport:
|
||||
only_failed=True,
|
||||
min_risk_level=4,
|
||||
)
|
||||
mock_upload.assert_called_once_with(
|
||||
self.tenant_id,
|
||||
self.scan_id,
|
||||
"/tmp/threatscore_path_threatscore_report.pdf",
|
||||
"threatscore/threatscore_path_threatscore_report.pdf",
|
||||
)
|
||||
mock_rmtree.assert_called_once_with(
|
||||
Path("/tmp/threatscore_path_threatscore_report.pdf").parent,
|
||||
ignore_errors=True,
|
||||
|
||||
Reference in New Issue
Block a user