mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-05-09 00:47:04 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bfdacf3f25 | |||
| adc1dbfe7c | |||
| 1b8b5cd18c | |||
| be94b97e49 | |||
| 9840fa640b | |||
| 0aa7b84be3 | |||
| bfa8e811d1 | |||
| 1c29521ebd | |||
| b5abea3e45 | |||
| 68eb946326 |
@@ -145,7 +145,7 @@ SENTRY_RELEASE=local
|
||||
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT}
|
||||
|
||||
#### Prowler release version ####
|
||||
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.25.0
|
||||
NEXT_PUBLIC_PROWLER_RELEASE_VERSION=v5.25.1
|
||||
|
||||
# Social login credentials
|
||||
SOCIAL_GOOGLE_OAUTH_CALLBACK_URL="${AUTH_URL}/api/auth/callback/google"
|
||||
|
||||
+10
-1
@@ -2,6 +2,15 @@
|
||||
|
||||
All notable changes to the **Prowler API** are documented in this file.
|
||||
|
||||
## [1.26.1] (Prowler v5.25.1)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
- Attack Paths: AWS scans no longer fail when enabled regions cannot be retrieved, and scans stuck in `scheduled` state are now cleaned up after the stale threshold [(#10917)](https://github.com/prowler-cloud/prowler/pull/10917)
|
||||
- Scan report and compliance downloads now redirect to a presigned S3 URL instead of streaming through the API worker, preventing gunicorn timeouts on large files [(#10927)](https://github.com/prowler-cloud/prowler/pull/10927)
|
||||
|
||||
---
|
||||
|
||||
## [1.26.0] (Prowler v5.25.0)
|
||||
|
||||
### 🚀 Added
|
||||
@@ -12,7 +21,7 @@ All notable changes to the **Prowler API** are documented in this file.
|
||||
|
||||
### 🔄 Changed
|
||||
|
||||
- Allows tenant owners to expel users from their organizations [(#10787)](https://github.com/prowler-cloud/prowler/pull/10787)
|
||||
- Allows tenant owners to expel users from their organizations [(#10787)](https://github.com/prowler-cloud/prowler/pull/10787)
|
||||
- `aggregate_findings`, `aggregate_attack_surface`, `aggregate_scan_resource_group_summaries` and `aggregate_scan_category_summaries` now upsert via `bulk_create(update_conflicts=True, ...)` instead of the prior `ignore_conflicts=True` / plain INSERT / `already backfilled` short-circuit. Re-runs triggered by the post-mute reaggregation pipeline no longer trip the `unique_*_per_scan` constraints nor silently drop updates, and are race-safe under concurrent writers (e.g. scan completion overlapping with a fresh mute rule) [(#10843)](https://github.com/prowler-cloud/prowler/pull/10843)
|
||||
- Rename the scan-category and scan-resource-group summary aggregators from `backfill_*` to `aggregate_*` [(#10843)](https://github.com/prowler-cloud/prowler/pull/10843)
|
||||
|
||||
|
||||
Generated
+3
-3
@@ -6754,8 +6754,8 @@ uuid6 = "2024.7.10"
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/prowler-cloud/prowler.git"
|
||||
reference = "master"
|
||||
resolved_reference = "ca29e354b622198ff6a70e2ea5eb04e4a44a0903"
|
||||
reference = "v5.25"
|
||||
resolved_reference = "e252058af491b41608dbaaba2975acd7c1728174"
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
@@ -9424,4 +9424,4 @@ files = [
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.11,<3.13"
|
||||
content-hash = "a3ab982d11a87d951ff15694d2ca7fd51f1f51a451abb0baa067ccf6966367a8"
|
||||
content-hash = "7446e89a46709f976a572231862072de86e7bf01ed90a72bea526b9ab05a82b3"
|
||||
|
||||
+2
-2
@@ -25,7 +25,7 @@ dependencies = [
|
||||
"defusedxml==0.7.1",
|
||||
"gunicorn==23.0.0",
|
||||
"lxml==5.3.2",
|
||||
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
|
||||
"prowler @ git+https://github.com/prowler-cloud/prowler.git@v5.25",
|
||||
"psycopg2-binary==2.9.9",
|
||||
"pytest-celery[redis] (==1.3.0)",
|
||||
"sentry-sdk[django] (==2.56.0)",
|
||||
@@ -50,7 +50,7 @@ name = "prowler-api"
|
||||
package-mode = false
|
||||
# Needed for the SDK compatibility
|
||||
requires-python = ">=3.11,<3.13"
|
||||
version = "1.26.0"
|
||||
version = "1.26.1"
|
||||
|
||||
[project.scripts]
|
||||
celery = "src.backend.config.settings.celery"
|
||||
|
||||
@@ -52,7 +52,7 @@ class ApiConfig(AppConfig):
|
||||
"check_and_fix_socialaccount_sites_migration",
|
||||
]
|
||||
|
||||
# Skip Neo4j initialization during tests, some Django commands, and Celery
|
||||
# Skip eager Neo4j init for tests, some Django commands, and Celery (prefork pool: driver must stay lazy, no post_fork hook)
|
||||
if getattr(settings, "TESTING", False) or (
|
||||
len(sys.argv) > 1
|
||||
and (
|
||||
@@ -64,7 +64,7 @@ class ApiConfig(AppConfig):
|
||||
)
|
||||
):
|
||||
logger.info(
|
||||
"Skipping Neo4j initialization because tests, some Django commands or Celery"
|
||||
"Skipping eager Neo4j init: tests, some Django commands, or Celery prefork pool (driver stays lazy)"
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
+1620
-58
File diff suppressed because it is too large
Load Diff
@@ -3841,9 +3841,14 @@ class TestScanViewSet:
|
||||
"prowler-output-123_threatscore_report.pdf",
|
||||
)
|
||||
|
||||
presigned_url = (
|
||||
"https://test-bucket.s3.amazonaws.com/"
|
||||
"tenant-id/scan-id/threatscore/prowler-output-123_threatscore_report.pdf"
|
||||
"?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Expires=300"
|
||||
)
|
||||
mock_s3_client = Mock()
|
||||
mock_s3_client.list_objects_v2.return_value = {"Contents": [{"Key": pdf_key}]}
|
||||
mock_s3_client.get_object.return_value = {"Body": io.BytesIO(b"pdf-bytes")}
|
||||
mock_s3_client.generate_presigned_url.return_value = presigned_url
|
||||
|
||||
mock_env_str.return_value = bucket
|
||||
mock_get_s3_client.return_value = mock_s3_client
|
||||
@@ -3852,19 +3857,26 @@ class TestScanViewSet:
|
||||
url = reverse("scan-threatscore", kwargs={"pk": scan.id})
|
||||
response = authenticated_client.get(url)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response["Content-Type"] == "application/pdf"
|
||||
assert response["Content-Disposition"].endswith(
|
||||
'"prowler-output-123_threatscore_report.pdf"'
|
||||
)
|
||||
assert response.content == b"pdf-bytes"
|
||||
assert response.status_code == status.HTTP_302_FOUND
|
||||
assert response["Location"] == presigned_url
|
||||
mock_s3_client.list_objects_v2.assert_called_once()
|
||||
mock_s3_client.get_object.assert_called_once_with(Bucket=bucket, Key=pdf_key)
|
||||
mock_s3_client.generate_presigned_url.assert_called_once_with(
|
||||
"get_object",
|
||||
Params={
|
||||
"Bucket": bucket,
|
||||
"Key": pdf_key,
|
||||
"ResponseContentDisposition": (
|
||||
'attachment; filename="prowler-output-123_threatscore_report.pdf"'
|
||||
),
|
||||
"ResponseContentType": "application/pdf",
|
||||
},
|
||||
ExpiresIn=300,
|
||||
)
|
||||
|
||||
def test_report_s3_success(self, authenticated_client, scans_fixture, monkeypatch):
|
||||
"""
|
||||
When output_location is an S3 URL and the S3 client returns the file successfully,
|
||||
the view should return the ZIP file with HTTP 200 and proper headers.
|
||||
When output_location is an S3 URL and the object exists,
|
||||
the view should return a 302 redirect to a presigned S3 URL.
|
||||
"""
|
||||
scan = scans_fixture[0]
|
||||
bucket = "test-bucket"
|
||||
@@ -3878,22 +3890,33 @@ class TestScanViewSet:
|
||||
type("env", (), {"str": lambda self, *args, **kwargs: "test-bucket"})(),
|
||||
)
|
||||
|
||||
presigned_url = (
|
||||
"https://test-bucket.s3.amazonaws.com/report.zip"
|
||||
"?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Expires=300"
|
||||
)
|
||||
|
||||
class FakeS3Client:
|
||||
def get_object(self, Bucket, Key):
|
||||
def head_object(self, Bucket, Key):
|
||||
assert Bucket == bucket
|
||||
assert Key == key
|
||||
return {"Body": io.BytesIO(b"s3 zip content")}
|
||||
return {}
|
||||
|
||||
def generate_presigned_url(self, ClientMethod, Params, ExpiresIn):
|
||||
assert ClientMethod == "get_object"
|
||||
assert Params["Bucket"] == bucket
|
||||
assert Params["Key"] == key
|
||||
assert Params["ResponseContentDisposition"] == (
|
||||
'attachment; filename="report.zip"'
|
||||
)
|
||||
assert ExpiresIn == 300
|
||||
return presigned_url
|
||||
|
||||
monkeypatch.setattr("api.v1.views.get_s3_client", lambda: FakeS3Client())
|
||||
|
||||
url = reverse("scan-report", kwargs={"pk": scan.id})
|
||||
response = authenticated_client.get(url)
|
||||
assert response.status_code == 200
|
||||
expected_filename = os.path.basename("report.zip")
|
||||
content_disposition = response.get("Content-Disposition")
|
||||
assert content_disposition.startswith('attachment; filename="')
|
||||
assert f'filename="{expected_filename}"' in content_disposition
|
||||
assert response.content == b"s3 zip content"
|
||||
assert response.status_code == status.HTTP_302_FOUND
|
||||
assert response["Location"] == presigned_url
|
||||
|
||||
def test_report_s3_success_no_local_files(
|
||||
self, authenticated_client, scans_fixture, monkeypatch
|
||||
@@ -4032,23 +4055,31 @@ class TestScanViewSet:
|
||||
)
|
||||
|
||||
match_key = "path/compliance/mitre_attack_aws.csv"
|
||||
presigned_url = (
|
||||
"https://test-bucket.s3.amazonaws.com/path/compliance/mitre_attack_aws.csv"
|
||||
"?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Expires=300"
|
||||
)
|
||||
|
||||
class FakeS3Client:
|
||||
def list_objects_v2(self, Bucket, Prefix):
|
||||
return {"Contents": [{"Key": match_key}]}
|
||||
|
||||
def get_object(self, Bucket, Key):
|
||||
return {"Body": io.BytesIO(b"ignored")}
|
||||
def generate_presigned_url(self, ClientMethod, Params, ExpiresIn):
|
||||
assert ClientMethod == "get_object"
|
||||
assert Params["Key"] == match_key
|
||||
assert Params["ResponseContentDisposition"] == (
|
||||
'attachment; filename="mitre_attack_aws.csv"'
|
||||
)
|
||||
assert ExpiresIn == 300
|
||||
return presigned_url
|
||||
|
||||
monkeypatch.setattr("api.v1.views.get_s3_client", lambda: FakeS3Client())
|
||||
|
||||
framework = match_key.split("/")[-1].split(".")[0]
|
||||
url = reverse("scan-compliance", kwargs={"pk": scan.id, "name": framework})
|
||||
resp = authenticated_client.get(url)
|
||||
assert resp.status_code == status.HTTP_200_OK
|
||||
cd = resp["Content-Disposition"]
|
||||
assert cd.startswith('attachment; filename="')
|
||||
assert cd.endswith('filename="mitre_attack_aws.csv"')
|
||||
assert resp.status_code == status.HTTP_302_FOUND
|
||||
assert resp["Location"] == presigned_url
|
||||
|
||||
def test_compliance_s3_not_found(
|
||||
self, authenticated_client, scans_fixture, monkeypatch
|
||||
@@ -4251,8 +4282,8 @@ class TestScanViewSet:
|
||||
scan.save()
|
||||
|
||||
fake_client = MagicMock()
|
||||
fake_client.get_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey"}}, "GetObject"
|
||||
fake_client.head_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey"}}, "HeadObject"
|
||||
)
|
||||
mock_get_s3_client.return_value = fake_client
|
||||
|
||||
@@ -4275,8 +4306,8 @@ class TestScanViewSet:
|
||||
scan.save()
|
||||
|
||||
fake_client = MagicMock()
|
||||
fake_client.get_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied"}}, "GetObject"
|
||||
fake_client.head_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied"}}, "HeadObject"
|
||||
)
|
||||
mock_get_s3_client.return_value = fake_client
|
||||
|
||||
|
||||
+112
-38
@@ -53,7 +53,7 @@ from django.db.models import (
|
||||
)
|
||||
from django.db.models.fields.json import KeyTextTransform
|
||||
from django.db.models.functions import Cast, Coalesce, RowNumber
|
||||
from django.http import HttpResponse, QueryDict
|
||||
from django.http import HttpResponse, HttpResponseBase, HttpResponseRedirect, QueryDict
|
||||
from django.shortcuts import redirect
|
||||
from django.urls import reverse
|
||||
from django.utils.dateparse import parse_date
|
||||
@@ -422,7 +422,7 @@ class SchemaView(SpectacularAPIView):
|
||||
|
||||
def get(self, request, *args, **kwargs):
|
||||
spectacular_settings.TITLE = "Prowler API"
|
||||
spectacular_settings.VERSION = "1.26.0"
|
||||
spectacular_settings.VERSION = "1.26.1"
|
||||
spectacular_settings.DESCRIPTION = (
|
||||
"Prowler API specification.\n\nThis file is auto-generated."
|
||||
)
|
||||
@@ -2080,24 +2080,38 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
},
|
||||
)
|
||||
|
||||
def _load_file(self, path_pattern, s3=False, bucket=None, list_objects=False):
|
||||
def _load_file(
|
||||
self,
|
||||
path_pattern,
|
||||
s3=False,
|
||||
bucket=None,
|
||||
list_objects=False,
|
||||
content_type=None,
|
||||
):
|
||||
"""
|
||||
Loads a binary file (e.g., ZIP or CSV) and returns its content and filename.
|
||||
Resolve a report file location and return the bytes (filesystem) or a redirect (S3).
|
||||
|
||||
Depending on the input parameters, this method supports loading:
|
||||
- From S3 using a direct key.
|
||||
- From S3 by listing objects under a prefix and matching suffix.
|
||||
- From the local filesystem using glob pattern matching.
|
||||
- From S3 using a direct key, returns a 302 to a short-lived presigned URL.
|
||||
- From S3 by listing objects under a prefix and matching suffix, returns a 302 to a short-lived presigned URL.
|
||||
- From the local filesystem using glob pattern matching, returns the file bytes.
|
||||
|
||||
The S3 branch never streams bytes through the worker; this prevents gunicorn
|
||||
worker timeouts on large reports.
|
||||
|
||||
Args:
|
||||
path_pattern (str): The key or glob pattern representing the file location.
|
||||
s3 (bool, optional): Whether the file is stored in S3. Defaults to False.
|
||||
bucket (str, optional): The name of the S3 bucket, required if `s3=True`. Defaults to None.
|
||||
list_objects (bool, optional): If True and `s3=True`, list objects by prefix to find the file. Defaults to False.
|
||||
content_type (str, optional): On the S3 branch, forwarded as `ResponseContentType`
|
||||
so the presigned download advertises the same Content-Type the API used to send.
|
||||
Ignored on the filesystem branch.
|
||||
|
||||
Returns:
|
||||
tuple[bytes, str]: A tuple containing the file content as bytes and the filename if successful.
|
||||
Response: A DRF `Response` object with an appropriate status and error detail if an error occurs.
|
||||
tuple[bytes, str]: For the filesystem branch, the file content and filename.
|
||||
HttpResponseRedirect: For the S3 branch on success, a 302 redirect to a presigned `GetObject` URL.
|
||||
Response: For any error path, a DRF `Response` with an appropriate status and detail.
|
||||
"""
|
||||
if s3:
|
||||
try:
|
||||
@@ -2144,25 +2158,45 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
# path_pattern here is prefix, but in compliance we build correct suffix check before
|
||||
key = keys[0]
|
||||
else:
|
||||
# path_pattern is exact key
|
||||
# path_pattern is exact key; HEAD before presigning to preserve the 404 contract.
|
||||
key = path_pattern
|
||||
try:
|
||||
s3_obj = client.get_object(Bucket=bucket, Key=key)
|
||||
except ClientError as e:
|
||||
code = e.response.get("Error", {}).get("Code")
|
||||
if code == "NoSuchKey":
|
||||
try:
|
||||
client.head_object(Bucket=bucket, Key=key)
|
||||
except ClientError as e:
|
||||
code = e.response.get("Error", {}).get("Code")
|
||||
if code in ("NoSuchKey", "404"):
|
||||
return Response(
|
||||
{
|
||||
"detail": "The scan has no reports, or the report generation task has not started yet."
|
||||
},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
return Response(
|
||||
{
|
||||
"detail": "The scan has no reports, or the report generation task has not started yet."
|
||||
},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
{"detail": "There is a problem with credentials."},
|
||||
status=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
return Response(
|
||||
{"detail": "There is a problem with credentials."},
|
||||
status=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
content = s3_obj["Body"].read()
|
||||
|
||||
filename = os.path.basename(key)
|
||||
# escape quotes and strip CR/LF so a malformed key cannot break out of the header
|
||||
safe_filename = (
|
||||
filename.replace("\\", "\\\\")
|
||||
.replace('"', '\\"')
|
||||
.replace("\r", "")
|
||||
.replace("\n", "")
|
||||
)
|
||||
params = {
|
||||
"Bucket": bucket,
|
||||
"Key": key,
|
||||
"ResponseContentDisposition": f'attachment; filename="{safe_filename}"',
|
||||
}
|
||||
if content_type:
|
||||
params["ResponseContentType"] = content_type
|
||||
url = client.generate_presigned_url(
|
||||
"get_object",
|
||||
Params=params,
|
||||
ExpiresIn=300,
|
||||
)
|
||||
return HttpResponseRedirect(url)
|
||||
else:
|
||||
files = glob.glob(path_pattern)
|
||||
if not files:
|
||||
@@ -2205,12 +2239,16 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
bucket = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "")
|
||||
key_prefix = scan.output_location.removeprefix(f"s3://{bucket}/")
|
||||
loader = self._load_file(
|
||||
key_prefix, s3=True, bucket=bucket, list_objects=False
|
||||
key_prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=False,
|
||||
content_type="application/x-zip-compressed",
|
||||
)
|
||||
else:
|
||||
loader = self._load_file(scan.output_location, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2248,13 +2286,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
prefix = os.path.join(
|
||||
os.path.dirname(key_prefix), "compliance", f"{name}.csv"
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="text/csv",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "compliance", f"*_{name}.csv")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2287,13 +2331,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"cis",
|
||||
"*_cis_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "cis", "*_cis_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2327,13 +2377,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"threatscore",
|
||||
"*_threatscore_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "threatscore", "*_threatscore_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2367,13 +2423,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"ens",
|
||||
"*_ens_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "ens", "*_ens_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2406,13 +2468,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"nis2",
|
||||
"*_nis2_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "nis2", "*_nis2_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
@@ -2445,13 +2513,19 @@ class ScanViewSet(BaseRLSViewSet):
|
||||
"csa",
|
||||
"*_csa_report.pdf",
|
||||
)
|
||||
loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True)
|
||||
loader = self._load_file(
|
||||
prefix,
|
||||
s3=True,
|
||||
bucket=bucket,
|
||||
list_objects=True,
|
||||
content_type="application/pdf",
|
||||
)
|
||||
else:
|
||||
base = os.path.dirname(scan.output_location)
|
||||
pattern = os.path.join(base, "csa", "*_csa_report.pdf")
|
||||
loader = self._load_file(pattern, s3=False)
|
||||
|
||||
if isinstance(loader, Response):
|
||||
if isinstance(loader, HttpResponseBase):
|
||||
return loader
|
||||
|
||||
content, filename = loader
|
||||
|
||||
@@ -49,7 +49,7 @@ def start_aws_ingestion(
|
||||
}
|
||||
|
||||
boto3_session = get_boto3_session(prowler_api_provider, prowler_sdk_provider)
|
||||
regions: list[str] = list(prowler_sdk_provider._enabled_regions)
|
||||
regions: list[str] = resolve_aws_regions(prowler_api_provider, prowler_sdk_provider)
|
||||
requested_syncs = list(cartography_aws.RESOURCE_FUNCTIONS.keys())
|
||||
|
||||
sync_args = cartography_aws._build_aws_sync_kwargs(
|
||||
@@ -226,6 +226,48 @@ def get_boto3_session(
|
||||
return boto3_session
|
||||
|
||||
|
||||
def resolve_aws_regions(
|
||||
prowler_api_provider: ProwlerAPIProvider,
|
||||
prowler_sdk_provider: ProwlerSDKProvider,
|
||||
) -> list[str]:
|
||||
"""Resolve the regions to scan, falling back when `_enabled_regions` is `None`.
|
||||
|
||||
The SDK silently sets `_enabled_regions` to `None` when `ec2:DescribeRegions`
|
||||
fails (missing IAM permission, transient error). Without a fallback the
|
||||
Cartography ingestion crashes with a non-actionable `TypeError`. Try the
|
||||
user's `audited_regions` next, then the partition's static region list.
|
||||
Excluded regions are honored on every branch.
|
||||
"""
|
||||
if prowler_sdk_provider._enabled_regions is not None:
|
||||
regions = set(prowler_sdk_provider._enabled_regions)
|
||||
|
||||
elif prowler_sdk_provider.identity.audited_regions:
|
||||
regions = set(prowler_sdk_provider.identity.audited_regions)
|
||||
|
||||
else:
|
||||
partition = prowler_sdk_provider.identity.partition
|
||||
try:
|
||||
regions = prowler_sdk_provider.get_available_aws_service_regions(
|
||||
"ec2", partition
|
||||
)
|
||||
|
||||
except KeyError:
|
||||
raise RuntimeError(
|
||||
f"No region data available for partition {partition!r}; "
|
||||
f"cannot determine regions to scan for "
|
||||
f"{prowler_api_provider.uid}"
|
||||
)
|
||||
|
||||
logger.warning(
|
||||
f"Could not enumerate enabled regions for AWS account "
|
||||
f"{prowler_api_provider.uid}; falling back to all regions in "
|
||||
f"partition {partition!r}"
|
||||
)
|
||||
|
||||
excluded = set(getattr(prowler_sdk_provider, "_excluded_regions", None) or ())
|
||||
return sorted(regions - excluded)
|
||||
|
||||
|
||||
def get_aioboto3_session(boto3_session: boto3.Session) -> aioboto3.Session:
|
||||
return aioboto3.Session(botocore_session=boto3_session._session)
|
||||
|
||||
|
||||
@@ -18,28 +18,45 @@ logger = get_task_logger(__name__)
|
||||
|
||||
def cleanup_stale_attack_paths_scans() -> dict:
|
||||
"""
|
||||
Find `EXECUTING` `AttackPathsScan` scans whose workers are dead or that have
|
||||
exceeded the stale threshold, and mark them as `FAILED`.
|
||||
Mark stale `AttackPathsScan` rows as `FAILED`.
|
||||
|
||||
Two-pass detection:
|
||||
Covers two stuck-state scenarios:
|
||||
1. `EXECUTING` scans whose workers are dead, or that have exceeded the
|
||||
stale threshold while alive.
|
||||
2. `SCHEDULED` scans that never made it to a worker — parent scan
|
||||
crashed before dispatch, broker lost the message, etc. Detected by
|
||||
age plus the parent `Scan` no longer being in flight.
|
||||
"""
|
||||
threshold = timedelta(minutes=ATTACK_PATHS_SCAN_STALE_THRESHOLD_MINUTES)
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
cutoff = now - threshold
|
||||
|
||||
cleaned_up: list[str] = []
|
||||
cleaned_up.extend(_cleanup_stale_executing_scans(cutoff))
|
||||
cleaned_up.extend(_cleanup_stale_scheduled_scans(cutoff))
|
||||
|
||||
logger.info(
|
||||
f"Stale `AttackPathsScan` cleanup: {len(cleaned_up)} scan(s) cleaned up"
|
||||
)
|
||||
return {"cleaned_up_count": len(cleaned_up), "scan_ids": cleaned_up}
|
||||
|
||||
|
||||
def _cleanup_stale_executing_scans(cutoff: datetime) -> list[str]:
|
||||
"""
|
||||
Two-pass detection for `EXECUTING` scans:
|
||||
1. If `TaskResult.worker` exists, ping the worker.
|
||||
- Dead worker: cleanup immediately (any age).
|
||||
- Alive + past threshold: revoke the task, then cleanup.
|
||||
- Alive + within threshold: skip.
|
||||
2. If no worker field: fall back to time-based heuristic only.
|
||||
"""
|
||||
threshold = timedelta(minutes=ATTACK_PATHS_SCAN_STALE_THRESHOLD_MINUTES)
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
cutoff = now - threshold
|
||||
|
||||
executing_scans = (
|
||||
executing_scans = list(
|
||||
AttackPathsScan.all_objects.using(MainRouter.admin_db)
|
||||
.filter(state=StateChoices.EXECUTING)
|
||||
.select_related("task__task_runner_task")
|
||||
)
|
||||
|
||||
# Cache worker liveness so each worker is pinged at most once
|
||||
executing_scans = list(executing_scans)
|
||||
workers = {
|
||||
tr.worker
|
||||
for scan in executing_scans
|
||||
@@ -48,7 +65,7 @@ def cleanup_stale_attack_paths_scans() -> dict:
|
||||
}
|
||||
worker_alive = {w: _is_worker_alive(w) for w in workers}
|
||||
|
||||
cleaned_up = []
|
||||
cleaned_up: list[str] = []
|
||||
|
||||
for scan in executing_scans:
|
||||
task_result = (
|
||||
@@ -65,9 +82,7 @@ def cleanup_stale_attack_paths_scans() -> dict:
|
||||
|
||||
# Alive but stale — revoke before cleanup
|
||||
_revoke_task(task_result)
|
||||
reason = (
|
||||
"Scan exceeded stale threshold — " "cleaned up by periodic task"
|
||||
)
|
||||
reason = "Scan exceeded stale threshold — cleaned up by periodic task"
|
||||
else:
|
||||
reason = "Worker dead — cleaned up by periodic task"
|
||||
else:
|
||||
@@ -82,10 +97,57 @@ def cleanup_stale_attack_paths_scans() -> dict:
|
||||
if _cleanup_scan(scan, task_result, reason):
|
||||
cleaned_up.append(str(scan.id))
|
||||
|
||||
logger.info(
|
||||
f"Stale `AttackPathsScan` cleanup: {len(cleaned_up)} scan(s) cleaned up"
|
||||
return cleaned_up
|
||||
|
||||
|
||||
def _cleanup_stale_scheduled_scans(cutoff: datetime) -> list[str]:
|
||||
"""
|
||||
Cleanup `SCHEDULED` scans that never reached a worker.
|
||||
|
||||
Detection:
|
||||
- `state == SCHEDULED`
|
||||
- `started_at < cutoff`
|
||||
- parent `Scan` is no longer in flight (terminal state or missing). This
|
||||
avoids cleaning up rows whose parent Prowler scan is legitimately still
|
||||
running.
|
||||
|
||||
For each match: revoke the queued task (best-effort; harmless if already
|
||||
consumed), atomically flip to `FAILED`, and mark the `TaskResult`. The
|
||||
temp Neo4j database is never created while `SCHEDULED`, so no drop is
|
||||
needed.
|
||||
"""
|
||||
scheduled_scans = list(
|
||||
AttackPathsScan.all_objects.using(MainRouter.admin_db)
|
||||
.filter(
|
||||
state=StateChoices.SCHEDULED,
|
||||
started_at__lt=cutoff,
|
||||
)
|
||||
.select_related("task__task_runner_task", "scan")
|
||||
)
|
||||
return {"cleaned_up_count": len(cleaned_up), "scan_ids": cleaned_up}
|
||||
|
||||
cleaned_up: list[str] = []
|
||||
parent_terminal = (
|
||||
StateChoices.COMPLETED,
|
||||
StateChoices.FAILED,
|
||||
StateChoices.CANCELLED,
|
||||
)
|
||||
|
||||
for scan in scheduled_scans:
|
||||
parent_scan = scan.scan
|
||||
if parent_scan is not None and parent_scan.state not in parent_terminal:
|
||||
continue
|
||||
|
||||
task_result = (
|
||||
getattr(scan.task, "task_runner_task", None) if scan.task else None
|
||||
)
|
||||
if task_result:
|
||||
_revoke_task(task_result, terminate=False)
|
||||
|
||||
reason = "Scan never started — cleaned up by periodic task"
|
||||
if _cleanup_scheduled_scan(scan, task_result, reason):
|
||||
cleaned_up.append(str(scan.id))
|
||||
|
||||
return cleaned_up
|
||||
|
||||
|
||||
def _is_worker_alive(worker: str) -> bool:
|
||||
@@ -98,12 +160,17 @@ def _is_worker_alive(worker: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _revoke_task(task_result) -> None:
|
||||
"""Send `SIGTERM` to a hung Celery task. Non-fatal on failure."""
|
||||
def _revoke_task(task_result, terminate: bool = True) -> None:
|
||||
"""Revoke a Celery task. Non-fatal on failure.
|
||||
|
||||
`terminate=True` SIGTERMs the worker if the task is mid-execution; use
|
||||
for EXECUTING cleanup. `terminate=False` only marks the task id revoked
|
||||
across workers, so any worker pulling the queued message discards it;
|
||||
use for SCHEDULED cleanup where the task hasn't run yet.
|
||||
"""
|
||||
try:
|
||||
current_app.control.revoke(
|
||||
task_result.task_id, terminate=True, signal="SIGTERM"
|
||||
)
|
||||
kwargs = {"terminate": True, "signal": "SIGTERM"} if terminate else {}
|
||||
current_app.control.revoke(task_result.task_id, **kwargs)
|
||||
logger.info(f"Revoked task {task_result.task_id}")
|
||||
except Exception:
|
||||
logger.exception(f"Failed to revoke task {task_result.task_id}")
|
||||
@@ -125,28 +192,64 @@ def _cleanup_scan(scan, task_result, reason: str) -> bool:
|
||||
except Exception:
|
||||
logger.exception(f"Failed to drop temp database {tmp_db_name}")
|
||||
|
||||
# 2. Lock row, verify still EXECUTING, mark FAILED — all atomic
|
||||
with rls_transaction(str(scan.tenant_id)):
|
||||
try:
|
||||
fresh_scan = AttackPathsScan.objects.select_for_update().get(id=scan.id)
|
||||
except AttackPathsScan.DoesNotExist:
|
||||
logger.warning(f"Scan {scan_id_str} no longer exists, skipping")
|
||||
return False
|
||||
fresh_scan = _finalize_failed_scan(scan, StateChoices.EXECUTING, reason)
|
||||
if fresh_scan is None:
|
||||
return False
|
||||
|
||||
if fresh_scan.state != StateChoices.EXECUTING:
|
||||
logger.info(f"Scan {scan_id_str} is now {fresh_scan.state}, skipping")
|
||||
return False
|
||||
|
||||
_mark_scan_finished(fresh_scan, StateChoices.FAILED, {"global_error": reason})
|
||||
|
||||
# 3. Mark `TaskResult` as `FAILURE` (not RLS-protected, outside lock)
|
||||
# Mark `TaskResult` as `FAILURE` (not RLS-protected, outside lock)
|
||||
if task_result:
|
||||
task_result.status = states.FAILURE
|
||||
task_result.date_done = datetime.now(tz=timezone.utc)
|
||||
task_result.save(update_fields=["status", "date_done"])
|
||||
|
||||
# 4. Recover graph_data_ready if provider data still exists
|
||||
recover_graph_data_ready(fresh_scan)
|
||||
|
||||
logger.info(f"Cleaned up stale scan {scan_id_str}: {reason}")
|
||||
return True
|
||||
|
||||
|
||||
def _cleanup_scheduled_scan(scan, task_result, reason: str) -> bool:
|
||||
"""
|
||||
Clean up a `SCHEDULED` scan that never reached a worker.
|
||||
|
||||
Skips the temp Neo4j drop — the database is only created once the worker
|
||||
enters `EXECUTING`, so dropping it here just produces noisy log output.
|
||||
|
||||
Returns `True` if the scan was actually cleaned up, `False` if skipped.
|
||||
"""
|
||||
scan_id_str = str(scan.id)
|
||||
|
||||
fresh_scan = _finalize_failed_scan(scan, StateChoices.SCHEDULED, reason)
|
||||
if fresh_scan is None:
|
||||
return False
|
||||
|
||||
if task_result:
|
||||
task_result.status = states.FAILURE
|
||||
task_result.date_done = datetime.now(tz=timezone.utc)
|
||||
task_result.save(update_fields=["status", "date_done"])
|
||||
|
||||
logger.info(f"Cleaned up scheduled scan {scan_id_str}: {reason}")
|
||||
return True
|
||||
|
||||
|
||||
def _finalize_failed_scan(scan, expected_state: str, reason: str):
|
||||
"""
|
||||
Atomically lock the row, verify it's still in `expected_state`, and
|
||||
mark it `FAILED`. Returns the locked row on success, `None` if the
|
||||
row is gone or has already moved on.
|
||||
"""
|
||||
scan_id_str = str(scan.id)
|
||||
with rls_transaction(str(scan.tenant_id)):
|
||||
try:
|
||||
fresh_scan = AttackPathsScan.objects.select_for_update().get(id=scan.id)
|
||||
except AttackPathsScan.DoesNotExist:
|
||||
logger.warning(f"Scan {scan_id_str} no longer exists, skipping")
|
||||
return None
|
||||
|
||||
if fresh_scan.state != expected_state:
|
||||
logger.info(f"Scan {scan_id_str} is now {fresh_scan.state}, skipping")
|
||||
return None
|
||||
|
||||
_mark_scan_finished(fresh_scan, StateChoices.FAILED, {"global_error": reason})
|
||||
|
||||
return fresh_scan
|
||||
|
||||
@@ -67,25 +67,52 @@ def retrieve_attack_paths_scan(
|
||||
return None
|
||||
|
||||
|
||||
def set_attack_paths_scan_task_id(
|
||||
tenant_id: str,
|
||||
scan_pk: str,
|
||||
task_id: str,
|
||||
) -> None:
|
||||
"""Persist the Celery `task_id` on the `AttackPathsScan` row.
|
||||
|
||||
Called at dispatch time (when `apply_async` returns) so the row carries
|
||||
the task id even while still `SCHEDULED`. This lets the periodic
|
||||
cleanup revoke queued messages for scans that never reached a worker.
|
||||
"""
|
||||
with rls_transaction(tenant_id):
|
||||
ProwlerAPIAttackPathsScan.objects.filter(id=scan_pk).update(task_id=task_id)
|
||||
|
||||
|
||||
def starting_attack_paths_scan(
|
||||
attack_paths_scan: ProwlerAPIAttackPathsScan,
|
||||
task_id: str,
|
||||
cartography_config: CartographyConfig,
|
||||
) -> None:
|
||||
with rls_transaction(attack_paths_scan.tenant_id):
|
||||
attack_paths_scan.task_id = task_id
|
||||
attack_paths_scan.state = StateChoices.EXECUTING
|
||||
attack_paths_scan.started_at = datetime.now(tz=timezone.utc)
|
||||
attack_paths_scan.update_tag = cartography_config.update_tag
|
||||
) -> bool:
|
||||
"""Flip the row from `SCHEDULED` to `EXECUTING` atomically.
|
||||
|
||||
attack_paths_scan.save(
|
||||
update_fields=[
|
||||
"task_id",
|
||||
"state",
|
||||
"started_at",
|
||||
"update_tag",
|
||||
]
|
||||
)
|
||||
Returns `False` if the row is gone or has already moved past
|
||||
`SCHEDULED` (e.g., periodic cleanup raced ahead and marked it
|
||||
`FAILED` while the worker message was still in flight).
|
||||
"""
|
||||
with rls_transaction(attack_paths_scan.tenant_id):
|
||||
try:
|
||||
locked = ProwlerAPIAttackPathsScan.objects.select_for_update().get(
|
||||
id=attack_paths_scan.id
|
||||
)
|
||||
except ProwlerAPIAttackPathsScan.DoesNotExist:
|
||||
return False
|
||||
|
||||
if locked.state != StateChoices.SCHEDULED:
|
||||
return False
|
||||
|
||||
locked.state = StateChoices.EXECUTING
|
||||
locked.started_at = datetime.now(tz=timezone.utc)
|
||||
locked.update_tag = cartography_config.update_tag
|
||||
locked.save(update_fields=["state", "started_at", "update_tag"])
|
||||
|
||||
# Keep the in-memory object the caller is holding in sync.
|
||||
attack_paths_scan.state = locked.state
|
||||
attack_paths_scan.started_at = locked.started_at
|
||||
attack_paths_scan.update_tag = locked.update_tag
|
||||
return True
|
||||
|
||||
|
||||
def _mark_scan_finished(
|
||||
|
||||
@@ -97,6 +97,19 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
|
||||
)
|
||||
attack_paths_scan = db_utils.retrieve_attack_paths_scan(tenant_id, scan_id)
|
||||
|
||||
# Idempotency guard: cleanup may have flipped this row to a terminal state
|
||||
# while the message was still in flight. Bail out before touching state.
|
||||
if attack_paths_scan and attack_paths_scan.state in (
|
||||
StateChoices.FAILED,
|
||||
StateChoices.COMPLETED,
|
||||
StateChoices.CANCELLED,
|
||||
):
|
||||
logger.warning(
|
||||
f"Attack Paths scan {attack_paths_scan.id} already in terminal "
|
||||
f"state {attack_paths_scan.state}; skipping execution"
|
||||
)
|
||||
return {}
|
||||
|
||||
# Checks before starting the scan
|
||||
if not cartography_ingestion_function:
|
||||
ingestion_exceptions = {
|
||||
@@ -114,12 +127,17 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
|
||||
|
||||
else:
|
||||
if not attack_paths_scan:
|
||||
# Safety net for in-flight messages or direct task invocations; dispatcher normally pre-creates the row.
|
||||
logger.warning(
|
||||
f"No Attack Paths Scan found for scan {scan_id} and tenant {tenant_id}, let's create it then"
|
||||
)
|
||||
attack_paths_scan = db_utils.create_attack_paths_scan(
|
||||
tenant_id, scan_id, prowler_api_provider.id
|
||||
)
|
||||
if attack_paths_scan and task_id:
|
||||
db_utils.set_attack_paths_scan_task_id(
|
||||
tenant_id, attack_paths_scan.id, task_id
|
||||
)
|
||||
|
||||
tmp_database_name = graph_database.get_database_name(
|
||||
attack_paths_scan.id, temporary=True
|
||||
@@ -141,9 +159,13 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
|
||||
)
|
||||
|
||||
# Starting the Attack Paths scan
|
||||
db_utils.starting_attack_paths_scan(
|
||||
attack_paths_scan, task_id, tenant_cartography_config
|
||||
)
|
||||
if not db_utils.starting_attack_paths_scan(
|
||||
attack_paths_scan, tenant_cartography_config
|
||||
):
|
||||
logger.warning(
|
||||
f"Attack Paths scan {attack_paths_scan.id} no longer in SCHEDULED state; cleanup likely raced ahead"
|
||||
)
|
||||
return {}
|
||||
|
||||
scan_t0 = time.perf_counter()
|
||||
logger.info(
|
||||
|
||||
@@ -173,10 +173,25 @@ def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str)
|
||||
).apply_async()
|
||||
|
||||
if can_provider_run_attack_paths_scan(tenant_id, provider_id):
|
||||
perform_attack_paths_scan_task.apply_async(
|
||||
# Row is normally created upstream, so this is a safeguard so we can attach the task id below
|
||||
attack_paths_scan = attack_paths_db_utils.retrieve_attack_paths_scan(
|
||||
tenant_id, scan_id
|
||||
)
|
||||
if attack_paths_scan is None:
|
||||
attack_paths_scan = attack_paths_db_utils.create_attack_paths_scan(
|
||||
tenant_id, scan_id, provider_id
|
||||
)
|
||||
|
||||
# Persist the Celery task id so the periodic cleanup can revoke scans stuck in SCHEDULED
|
||||
result = perform_attack_paths_scan_task.apply_async(
|
||||
kwargs={"tenant_id": tenant_id, "scan_id": scan_id}
|
||||
)
|
||||
|
||||
if attack_paths_scan and result:
|
||||
attack_paths_db_utils.set_attack_paths_scan_task_id(
|
||||
tenant_id, attack_paths_scan.id, result.task_id
|
||||
)
|
||||
|
||||
|
||||
@shared_task(base=RLSTask, name="provider-connection-check")
|
||||
@set_tenant
|
||||
|
||||
@@ -135,7 +135,7 @@ class TestAttackPathsRun:
|
||||
assert result == ingestion_result
|
||||
mock_retrieve_scan.assert_called_once_with(str(tenant.id), str(scan.id))
|
||||
mock_starting.assert_called_once()
|
||||
config = mock_starting.call_args[0][2]
|
||||
config = mock_starting.call_args[0][1]
|
||||
assert config.neo4j_database == "tenant-db"
|
||||
mock_get_db_name.assert_has_calls(
|
||||
[call(attack_paths_scan.id, temporary=True), call(provider.tenant_id)]
|
||||
@@ -2732,3 +2732,143 @@ class TestCleanupStaleAttackPathsScans:
|
||||
assert result["cleaned_up_count"] == 2
|
||||
# Worker should be pinged exactly once — cache prevents second ping
|
||||
mock_alive.assert_called_once_with("shared-worker@host")
|
||||
|
||||
# `SCHEDULED` state cleanup
|
||||
def _create_scheduled_scan(
|
||||
self,
|
||||
tenant,
|
||||
provider,
|
||||
*,
|
||||
age_minutes,
|
||||
parent_state,
|
||||
with_task=True,
|
||||
):
|
||||
"""Create a SCHEDULED AttackPathsScan with a parent Scan in `parent_state`.
|
||||
|
||||
`age_minutes` controls how far in the past `started_at` is set, so
|
||||
callers can place rows safely past the cleanup cutoff.
|
||||
"""
|
||||
parent_scan = Scan.objects.create(
|
||||
name="Parent Prowler scan",
|
||||
provider=provider,
|
||||
trigger=Scan.TriggerChoices.MANUAL,
|
||||
state=parent_state,
|
||||
tenant_id=tenant.id,
|
||||
)
|
||||
|
||||
ap_scan = AttackPathsScan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
scan=parent_scan,
|
||||
state=StateChoices.SCHEDULED,
|
||||
started_at=datetime.now(tz=timezone.utc) - timedelta(minutes=age_minutes),
|
||||
)
|
||||
|
||||
task_result = None
|
||||
if with_task:
|
||||
task_result = TaskResult.objects.create(
|
||||
task_id=str(ap_scan.id),
|
||||
task_name="attack-paths-scan-perform",
|
||||
status="PENDING",
|
||||
)
|
||||
task = Task.objects.create(
|
||||
id=task_result.task_id,
|
||||
task_runner_task=task_result,
|
||||
tenant_id=tenant.id,
|
||||
)
|
||||
ap_scan.task = task
|
||||
ap_scan.save(update_fields=["task_id"])
|
||||
|
||||
return ap_scan, task_result
|
||||
|
||||
@patch("tasks.jobs.attack_paths.cleanup.recover_graph_data_ready")
|
||||
@patch("tasks.jobs.attack_paths.cleanup.graph_database.drop_database")
|
||||
@patch(
|
||||
"tasks.jobs.attack_paths.cleanup.rls_transaction",
|
||||
new=lambda *args, **kwargs: nullcontext(),
|
||||
)
|
||||
@patch("tasks.jobs.attack_paths.cleanup._revoke_task")
|
||||
def test_cleans_up_scheduled_scan_when_parent_is_terminal(
|
||||
self,
|
||||
mock_revoke,
|
||||
mock_drop_db,
|
||||
mock_recover,
|
||||
tenants_fixture,
|
||||
providers_fixture,
|
||||
):
|
||||
from tasks.jobs.attack_paths.cleanup import cleanup_stale_attack_paths_scans
|
||||
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
provider.provider = Provider.ProviderChoices.AWS
|
||||
provider.save()
|
||||
|
||||
ap_scan, task_result = self._create_scheduled_scan(
|
||||
tenant,
|
||||
provider,
|
||||
age_minutes=24 * 60 * 3, # 3 days, safely past any threshold
|
||||
parent_state=StateChoices.FAILED,
|
||||
)
|
||||
|
||||
result = cleanup_stale_attack_paths_scans()
|
||||
|
||||
assert result["cleaned_up_count"] == 1
|
||||
assert str(ap_scan.id) in result["scan_ids"]
|
||||
|
||||
ap_scan.refresh_from_db()
|
||||
assert ap_scan.state == StateChoices.FAILED
|
||||
assert ap_scan.progress == 100
|
||||
assert ap_scan.completed_at is not None
|
||||
assert ap_scan.ingestion_exceptions == {
|
||||
"global_error": "Scan never started — cleaned up by periodic task"
|
||||
}
|
||||
|
||||
# SCHEDULED revoke must NOT terminate a running worker
|
||||
mock_revoke.assert_called_once()
|
||||
assert mock_revoke.call_args.kwargs == {"terminate": False}
|
||||
|
||||
# Temp DB never created for SCHEDULED, so no drop attempted
|
||||
mock_drop_db.assert_not_called()
|
||||
# Tenant Neo4j data is untouched in this path
|
||||
mock_recover.assert_not_called()
|
||||
|
||||
task_result.refresh_from_db()
|
||||
assert task_result.status == "FAILURE"
|
||||
assert task_result.date_done is not None
|
||||
|
||||
@patch("tasks.jobs.attack_paths.cleanup.recover_graph_data_ready")
|
||||
@patch("tasks.jobs.attack_paths.cleanup.graph_database.drop_database")
|
||||
@patch(
|
||||
"tasks.jobs.attack_paths.cleanup.rls_transaction",
|
||||
new=lambda *args, **kwargs: nullcontext(),
|
||||
)
|
||||
@patch("tasks.jobs.attack_paths.cleanup._revoke_task")
|
||||
def test_skips_scheduled_scan_when_parent_still_in_flight(
|
||||
self,
|
||||
mock_revoke,
|
||||
mock_drop_db,
|
||||
mock_recover,
|
||||
tenants_fixture,
|
||||
providers_fixture,
|
||||
):
|
||||
from tasks.jobs.attack_paths.cleanup import cleanup_stale_attack_paths_scans
|
||||
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
provider.provider = Provider.ProviderChoices.AWS
|
||||
provider.save()
|
||||
|
||||
ap_scan, _ = self._create_scheduled_scan(
|
||||
tenant,
|
||||
provider,
|
||||
age_minutes=24 * 60 * 3,
|
||||
parent_state=StateChoices.EXECUTING,
|
||||
)
|
||||
|
||||
result = cleanup_stale_attack_paths_scans()
|
||||
|
||||
assert result["cleaned_up_count"] == 0
|
||||
|
||||
ap_scan.refresh_from_db()
|
||||
assert ap_scan.state == StateChoices.SCHEDULED
|
||||
mock_revoke.assert_not_called()
|
||||
|
||||
@@ -842,6 +842,72 @@ class TestScanCompleteTasks:
|
||||
# Attack Paths task should be skipped when provider cannot run it
|
||||
mock_attack_paths_task.assert_not_called()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"row_pre_existing",
|
||||
[True, False],
|
||||
ids=["row-pre-existing", "row-missing-fallback"],
|
||||
)
|
||||
@patch("tasks.tasks.aggregate_attack_surface_task.apply_async")
|
||||
@patch("tasks.tasks.chain")
|
||||
@patch("tasks.tasks.create_compliance_requirements_task.si")
|
||||
@patch("tasks.tasks.update_provider_compliance_scores_task.si")
|
||||
@patch("tasks.tasks.perform_scan_summary_task.si")
|
||||
@patch("tasks.tasks.generate_outputs_task.si")
|
||||
@patch("tasks.tasks.generate_compliance_reports_task.si")
|
||||
@patch("tasks.tasks.check_integrations_task.si")
|
||||
@patch("tasks.tasks.attack_paths_db_utils.set_attack_paths_scan_task_id")
|
||||
@patch("tasks.tasks.attack_paths_db_utils.create_attack_paths_scan")
|
||||
@patch("tasks.tasks.attack_paths_db_utils.retrieve_attack_paths_scan")
|
||||
@patch("tasks.tasks.perform_attack_paths_scan_task.apply_async")
|
||||
@patch("tasks.tasks.can_provider_run_attack_paths_scan", return_value=True)
|
||||
def test_scan_complete_dispatches_attack_paths_scan(
|
||||
self,
|
||||
_mock_can_run_attack_paths,
|
||||
mock_attack_paths_task,
|
||||
mock_retrieve,
|
||||
mock_create,
|
||||
mock_set_task_id,
|
||||
mock_check_integrations_task,
|
||||
mock_compliance_reports_task,
|
||||
mock_outputs_task,
|
||||
mock_scan_summary_task,
|
||||
mock_update_compliance_scores_task,
|
||||
mock_compliance_requirements_task,
|
||||
mock_chain,
|
||||
mock_attack_surface_task,
|
||||
row_pre_existing,
|
||||
):
|
||||
"""When a provider can run Attack Paths, dispatch must:
|
||||
1. Reuse the existing row or create one if missing.
|
||||
2. Call apply_async on the Attack Paths task.
|
||||
3. Persist the returned Celery task id on the row.
|
||||
"""
|
||||
existing_row = MagicMock(id="ap-scan-id")
|
||||
if row_pre_existing:
|
||||
mock_retrieve.return_value = existing_row
|
||||
else:
|
||||
mock_retrieve.return_value = None
|
||||
mock_create.return_value = existing_row
|
||||
|
||||
async_result = MagicMock(task_id="celery-task-id")
|
||||
mock_attack_paths_task.return_value = async_result
|
||||
|
||||
_perform_scan_complete_tasks("tenant-id", "scan-id", "provider-id")
|
||||
|
||||
mock_retrieve.assert_called_once_with("tenant-id", "scan-id")
|
||||
if row_pre_existing:
|
||||
mock_create.assert_not_called()
|
||||
else:
|
||||
mock_create.assert_called_once_with("tenant-id", "scan-id", "provider-id")
|
||||
|
||||
mock_attack_paths_task.assert_called_once_with(
|
||||
kwargs={"tenant_id": "tenant-id", "scan_id": "scan-id"}
|
||||
)
|
||||
|
||||
mock_set_task_id.assert_called_once_with(
|
||||
"tenant-id", "ap-scan-id", "celery-task-id"
|
||||
)
|
||||
|
||||
|
||||
class TestAttackPathsTasks:
|
||||
@staticmethod
|
||||
|
||||
@@ -2,6 +2,15 @@
|
||||
|
||||
All notable changes to the **Prowler SDK** are documented in this file.
|
||||
|
||||
## [5.25.1] (Prowler v5.25.1)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
- `KeyError` when generating compliance outputs after the CLI scan [#10919](https://github.com/prowler-cloud/prowler/pull/10919)
|
||||
- Kubernetes OCSF `provider_uid` now uses the cluster name in in-cluster mode (so `--cluster-name` is correctly reflected in findings) and keeps the kubeconfig context in kubeconfig mode [(#10483)](https://github.com/prowler-cloud/prowler/pull/10483)
|
||||
|
||||
---
|
||||
|
||||
## [5.25.0] (Prowler v5.25.0)
|
||||
|
||||
### 🚀 Added
|
||||
|
||||
+7
-1
@@ -624,8 +624,14 @@ def prowler():
|
||||
)
|
||||
|
||||
# Compliance Frameworks
|
||||
# Source the framework listing from `bulk_compliance_frameworks.keys()`
|
||||
# so it is by construction a subset of what the bulk loader can resolve.
|
||||
# `get_available_compliance_frameworks(provider)` also discovers top-level
|
||||
# multi-provider universal JSONs (e.g. `prowler/compliance/csa_ccm_4.0.json`)
|
||||
# which `Compliance.get_bulk(provider)` does not load, and which the legacy
|
||||
# output handlers below cannot consume — using it as the source produced
|
||||
input_compliance_frameworks = set(output_options.output_modes).intersection(
|
||||
get_available_compliance_frameworks(provider)
|
||||
bulk_compliance_frameworks.keys()
|
||||
)
|
||||
if provider == "aws":
|
||||
for compliance_name in input_compliance_frameworks:
|
||||
|
||||
@@ -48,7 +48,7 @@ class _MutableTimestamp:
|
||||
|
||||
timestamp = _MutableTimestamp(datetime.today())
|
||||
timestamp_utc = _MutableTimestamp(datetime.now(timezone.utc))
|
||||
prowler_version = "5.25.0"
|
||||
prowler_version = "5.25.1"
|
||||
html_logo_url = "https://github.com/prowler-cloud/prowler/"
|
||||
square_logo_img = "https://raw.githubusercontent.com/prowler-cloud/prowler/dc7d2d5aeb92fdf12e8604f42ef6472cd3e8e889/docs/img/prowler-logo-black.png"
|
||||
aws_logo = "https://user-images.githubusercontent.com/38561120/235953920-3e3fba08-0795-41dc-b480-9bea57db9f2e.png"
|
||||
|
||||
@@ -245,15 +245,16 @@ class Finding(BaseModel):
|
||||
elif provider.type == "kubernetes":
|
||||
if provider.identity.context == "In-Cluster":
|
||||
output_data["auth_method"] = "in-cluster"
|
||||
output_data["provider_uid"] = provider.identity.cluster
|
||||
else:
|
||||
output_data["auth_method"] = "kubeconfig"
|
||||
output_data["provider_uid"] = provider.identity.context
|
||||
output_data["resource_name"] = check_output.resource_name
|
||||
output_data["resource_uid"] = check_output.resource_id
|
||||
output_data["account_name"] = f"context: {provider.identity.context}"
|
||||
output_data["account_uid"] = get_nested_attribute(
|
||||
provider, "identity.cluster"
|
||||
)
|
||||
output_data["provider_uid"] = provider.identity.context
|
||||
output_data["region"] = f"namespace: {check_output.namespace}"
|
||||
|
||||
elif provider.type == "github":
|
||||
|
||||
+1
-1
@@ -95,7 +95,7 @@ maintainers = [{name = "Prowler Engineering", email = "engineering@prowler.com"}
|
||||
name = "prowler"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10,<3.13"
|
||||
version = "5.25.0"
|
||||
version = "5.25.1"
|
||||
|
||||
[project.scripts]
|
||||
prowler = "prowler.__main__:prowler"
|
||||
|
||||
@@ -557,7 +557,7 @@ class TestFinding:
|
||||
assert finding_output.resource_tags == {}
|
||||
assert finding_output.partition is None
|
||||
assert finding_output.account_uid == "test_cluster"
|
||||
assert finding_output.provider_uid == "In-Cluster"
|
||||
assert finding_output.provider_uid == "test_cluster"
|
||||
assert finding_output.account_name == "context: In-Cluster"
|
||||
assert finding_output.account_email is None
|
||||
assert finding_output.account_organization_uid is None
|
||||
@@ -591,6 +591,40 @@ class TestFinding:
|
||||
assert finding_output.metadata.Notes == "mock_notes"
|
||||
assert finding_output.metadata.Compliance == []
|
||||
|
||||
def test_generate_output_kubernetes_kubeconfig(self):
|
||||
# Mock provider
|
||||
provider = MagicMock()
|
||||
provider.type = "kubernetes"
|
||||
provider.identity.context = "test-context"
|
||||
provider.identity.cluster = "test_cluster"
|
||||
|
||||
# Mock check result
|
||||
check_output = MagicMock()
|
||||
check_output.resource_name = "test_resource_name"
|
||||
check_output.resource_id = "test_resource_id"
|
||||
check_output.namespace = "test_namespace"
|
||||
check_output.resource_details = "test_resource_details"
|
||||
check_output.status = Status.PASS
|
||||
check_output.status_extended = "mock_status_extended"
|
||||
check_output.muted = False
|
||||
check_output.check_metadata = mock_check_metadata(provider="kubernetes")
|
||||
check_output.timestamp = datetime.now()
|
||||
check_output.resource = {}
|
||||
check_output.compliance = {}
|
||||
|
||||
# Mock Output Options
|
||||
output_options = MagicMock()
|
||||
output_options.unix_timestamp = True
|
||||
|
||||
# Generate the finding
|
||||
finding_output = Finding.generate_output(provider, check_output, output_options)
|
||||
|
||||
assert isinstance(finding_output, Finding)
|
||||
assert finding_output.auth_method == "kubeconfig"
|
||||
assert finding_output.account_uid == "test_cluster"
|
||||
assert finding_output.provider_uid == "test-context"
|
||||
assert finding_output.account_name == "context: test-context"
|
||||
|
||||
def test_generate_output_github_personal_access_token(self):
|
||||
"""Test GitHub output generation with Personal Access Token authentication."""
|
||||
# Mock provider using Personal Access Token
|
||||
|
||||
@@ -2,6 +2,14 @@
|
||||
|
||||
All notable changes to the **Prowler UI** are documented in this file.
|
||||
|
||||
## [1.25.1] (Prowler v5.25.1)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
- Compliance page export menu now scales on small screens, and frameworks load on first render without requiring a manual scan re-selection [(#10918)](https://github.com/prowler-cloud/prowler/pull/10918)
|
||||
|
||||
---
|
||||
|
||||
## [1.25.0] (Prowler v5.25.0)
|
||||
|
||||
### 🚀 Added
|
||||
|
||||
@@ -166,6 +166,7 @@ export default async function Compliance({
|
||||
>
|
||||
<SSRComplianceGrid
|
||||
searchParams={resolvedSearchParams}
|
||||
scanId={selectedScanId}
|
||||
selectedScan={selectedScanData}
|
||||
/>
|
||||
</Suspense>
|
||||
@@ -179,12 +180,13 @@ export default async function Compliance({
|
||||
|
||||
const SSRComplianceGrid = async ({
|
||||
searchParams,
|
||||
scanId,
|
||||
selectedScan,
|
||||
}: {
|
||||
searchParams: SearchParamsProps;
|
||||
scanId: string | null;
|
||||
selectedScan?: ScanEntity;
|
||||
}) => {
|
||||
const scanId = searchParams.scanId?.toString() || "";
|
||||
const regionFilter = searchParams["filter[region__in]"]?.toString() || "";
|
||||
|
||||
// Only fetch compliance data if we have a valid scanId
|
||||
@@ -247,7 +249,7 @@ const SSRComplianceGrid = async ({
|
||||
<ComplianceOverviewPanel>
|
||||
<ComplianceOverviewGrid
|
||||
frameworks={frameworks}
|
||||
scanId={scanId}
|
||||
scanId={scanId ?? ""}
|
||||
selectedScan={selectedScan}
|
||||
latestCisIds={latestCisIds}
|
||||
/>
|
||||
|
||||
@@ -89,12 +89,38 @@ export const ComplianceCard: React.FC<ComplianceCardProps> = ({
|
||||
<Card
|
||||
variant="base"
|
||||
padding="md"
|
||||
className="cursor-pointer transition-shadow hover:shadow-md"
|
||||
className="relative cursor-pointer transition-shadow hover:shadow-md"
|
||||
onClick={navigateToDetail}
|
||||
>
|
||||
<div
|
||||
className="absolute top-2 right-2 z-10"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter" || e.key === " ") {
|
||||
e.stopPropagation();
|
||||
}
|
||||
}}
|
||||
role="group"
|
||||
tabIndex={0}
|
||||
>
|
||||
<ComplianceDownloadContainer
|
||||
compact
|
||||
orientation="column"
|
||||
buttonWidth="icon"
|
||||
presentation="dropdown"
|
||||
scanId={scanId}
|
||||
complianceId={complianceId}
|
||||
reportType={getReportTypeForCompliance(
|
||||
title,
|
||||
complianceId,
|
||||
isLatestCisForProvider,
|
||||
)}
|
||||
disabled={hasRegionFilter}
|
||||
/>
|
||||
</div>
|
||||
<CardContent className="p-0">
|
||||
<div className="flex w-full flex-col gap-3 sm:flex-row sm:items-start">
|
||||
<div className="flex shrink-0 items-center justify-between sm:flex-col sm:items-start sm:gap-2">
|
||||
<div className="flex shrink-0 items-center sm:flex-col sm:items-start sm:gap-2">
|
||||
{getComplianceIcon(title) && (
|
||||
<Image
|
||||
src={getComplianceIcon(title)}
|
||||
@@ -102,37 +128,11 @@ export const ComplianceCard: React.FC<ComplianceCardProps> = ({
|
||||
className="h-10 w-10 min-w-10 self-start rounded-md border border-gray-300 bg-white object-contain p-1"
|
||||
/>
|
||||
)}
|
||||
<div
|
||||
className="shrink-0"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter" || e.key === " ") {
|
||||
e.stopPropagation();
|
||||
}
|
||||
}}
|
||||
role="group"
|
||||
tabIndex={0}
|
||||
>
|
||||
<ComplianceDownloadContainer
|
||||
compact
|
||||
orientation="column"
|
||||
buttonWidth="icon"
|
||||
presentation="dropdown"
|
||||
scanId={scanId}
|
||||
complianceId={complianceId}
|
||||
reportType={getReportTypeForCompliance(
|
||||
title,
|
||||
complianceId,
|
||||
isLatestCisForProvider,
|
||||
)}
|
||||
disabled={hasRegionFilter}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex w-full min-w-0 flex-col gap-3">
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<h4 className="text-small truncate leading-5 font-bold">
|
||||
<h4 className="text-small truncate pr-9 leading-5 font-bold">
|
||||
{formatTitle(title)}
|
||||
{version ? ` - ${version}` : ""}
|
||||
</h4>
|
||||
|
||||
@@ -70,7 +70,7 @@ describe("ComplianceDownloadContainer", () => {
|
||||
const trigger = screen.getByRole("button", {
|
||||
name: "Open compliance export actions",
|
||||
});
|
||||
expect(trigger.className).toContain("border-text-neutral-secondary");
|
||||
expect(trigger.className).toContain("rounded-md");
|
||||
});
|
||||
|
||||
it("should open export actions from the compact trigger", async () => {
|
||||
|
||||
@@ -15,8 +15,7 @@ import {
|
||||
|
||||
const ACTION_TRIGGER_STYLES = {
|
||||
table: "hover:bg-bg-neutral-tertiary rounded-full p-1 transition-colors",
|
||||
bordered:
|
||||
"hover:bg-bg-neutral-tertiary rounded-full border border-text-neutral-secondary p-2 transition-colors",
|
||||
bordered: "hover:bg-bg-neutral-tertiary rounded-md p-1.5 transition-colors",
|
||||
} as const;
|
||||
|
||||
type ActionDropdownVariant = keyof typeof ACTION_TRIGGER_STYLES;
|
||||
@@ -24,7 +23,7 @@ type ActionDropdownVariant = keyof typeof ACTION_TRIGGER_STYLES;
|
||||
interface ActionDropdownProps {
|
||||
/** The dropdown trigger element. Defaults to a vertical dots icon button */
|
||||
trigger?: ReactNode;
|
||||
/** Trigger style variant. "table" = no border, "bordered" = circular border */
|
||||
/** Trigger style variant. "table" = compact pill, "bordered" = card action */
|
||||
variant?: ActionDropdownVariant;
|
||||
/** Alignment of the dropdown content */
|
||||
align?: "start" | "center" | "end";
|
||||
@@ -62,7 +61,12 @@ export function ActionDropdown({
|
||||
aria-label={ariaLabel}
|
||||
className={ACTION_TRIGGER_STYLES[variant]}
|
||||
>
|
||||
<EllipsisVertical className="text-text-neutral-secondary size-6" />
|
||||
<EllipsisVertical
|
||||
className={cn(
|
||||
"text-text-neutral-secondary",
|
||||
variant === "bordered" ? "size-5" : "size-6",
|
||||
)}
|
||||
/>
|
||||
</button>
|
||||
)}
|
||||
</DropdownMenuTrigger>
|
||||
|
||||
Reference in New Issue
Block a user