diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 2033dcc420..6759b28741 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -19,6 +19,7 @@ All notable changes to the **Prowler API** are documented in this file. - Workers now shut down gracefully on deploy or restart, finishing or re-queueing in-flight tasks instead of being force-killed and leaving them stuck [(#11416)](https://github.com/prowler-cloud/prowler/pull/11416) - Resource `name` is now stored and refreshed on every scan, so resources no longer keep an empty name [(#11476)](https://github.com/prowler-cloud/prowler/pull/11476) +- Compliance catalog now warms in a background thread after each worker forks, and `compliance-overviews/attributes` returns `503` while warming, so the first request after a deploy no longer trips the Gunicorn worker timeout [(#4554)](https://github.com/prowler-cloud/prowler-cloud/pull/4554) ### 🔐 Security diff --git a/api/src/backend/api/compliance.py b/api/src/backend/api/compliance.py index 678aff8d57..202825185b 100644 --- a/api/src/backend/api/compliance.py +++ b/api/src/backend/api/compliance.py @@ -1,3 +1,5 @@ +import logging +import threading from collections.abc import Iterable, Mapping from api.models import Provider @@ -6,8 +8,19 @@ from prowler.lib.check.compliance_models import ( ) from prowler.lib.check.models import CheckMetadata +logger = logging.getLogger(__name__) + AVAILABLE_COMPLIANCE_FRAMEWORKS = {} +# Per-process readiness flags for the background compliance warm-up. +# `STARTED` is set as soon as warming begins (only happens under Gunicorn via +# the post_fork hook); `WARMED` is set when it finishes. The attributes +# endpoint checks both: it returns 503 only while warming is in progress. +# Under `runserver` warming never runs, so `STARTED` stays clear and the +# endpoint keeps lazy-loading as before. +COMPLIANCE_WARMING_STARTED = threading.Event() +COMPLIANCE_WARMED = threading.Event() + class LazyComplianceTemplate(Mapping): """Lazy-load compliance templates per provider on first access.""" @@ -174,6 +187,56 @@ def _ensure_provider_loaded(provider_type: Provider.ProviderChoices) -> None: PROWLER_CHECKS._cache[provider_type] = checks +def warm_compliance_caches( + provider_types: Iterable[str] | None = None, +) -> list[str]: + """ + Eagerly populate the per-process compliance caches at server startup. + + Moves the cold-cache catalog load off the request thread so the first + request does not trip the Gunicorn worker timeout. Reads only on-disk + metadata (no database access). Each provider is warmed in isolation; + failures are logged and fall back to lazy loading. + + Args: + provider_types (Iterable[str] | None): Subset to warm. Defaults to all. + + Returns: + list[str]: Provider types that could not be warmed. + """ + if provider_types is None: + provider_types = Provider.ProviderChoices.values + provider_types = list(provider_types) + + COMPLIANCE_WARMING_STARTED.set() + logger.info("Compliance cache warm-up started for providers: %s", provider_types) + + failed = [] + for provider_type in provider_types: + try: + get_compliance_frameworks(provider_type) + _ensure_provider_loaded(provider_type) + # Prowler check loading may sys.exit (SystemExit, not Exception). + except (Exception, SystemExit): + logger.warning( + "Failed to warm compliance caches for provider '%s'; " + "loading lazily on first request", + provider_type, + exc_info=True, + ) + failed.append(provider_type) + + # Mark as warmed even when some providers failed: a failed provider falls + # back to a single-provider lazy load, which stays under the worker timeout. + COMPLIANCE_WARMED.set() + logger.info( + "Compliance cache warm-up finished (providers warmed: %d, failed: %s)", + len(provider_types) - len(failed), + failed, + ) + return failed + + def load_prowler_checks( prowler_compliance, provider_types: Iterable[str] | None = None ): diff --git a/api/src/backend/api/exceptions.py b/api/src/backend/api/exceptions.py index 78f8c64c7d..4f6f26c2ea 100644 --- a/api/src/backend/api/exceptions.py +++ b/api/src/backend/api/exceptions.py @@ -187,6 +187,32 @@ class UpstreamServiceUnavailableError(APIException): ) +class ComplianceWarmingError(APIException): + """Compliance catalog is still warming (503 Service Unavailable). + + Returned by the compliance attributes endpoint while the per-process + catalog warm-up is in progress, so the request thread never triggers the + slow cold load that would trip the Gunicorn worker timeout. + """ + + status_code = status.HTTP_503_SERVICE_UNAVAILABLE + default_detail = ( + "Compliance data is still loading. Please try again in a few seconds." + ) + default_code = "compliance_warming" + + def __init__(self, detail=None): + super().__init__( + detail=[ + { + "detail": detail or self.default_detail, + "status": str(self.status_code), + "code": self.default_code, + } + ] + ) + + class UpstreamInternalError(APIException): """Unexpected error communicating with provider (500 Internal Server Error). diff --git a/api/src/backend/api/tests/test_compliance.py b/api/src/backend/api/tests/test_compliance.py index 508e5abaca..99a31ea12c 100644 --- a/api/src/backend/api/tests/test_compliance.py +++ b/api/src/backend/api/tests/test_compliance.py @@ -10,6 +10,7 @@ from api.compliance import ( get_prowler_provider_checks, get_prowler_provider_compliance, load_prowler_checks, + warm_compliance_caches, ) from api.models import Provider from prowler.lib.check.compliance_models import ( @@ -267,11 +268,17 @@ def reset_compliance_cache(): """Reset the module-level cache so each test starts cold.""" previous = dict(compliance_module.AVAILABLE_COMPLIANCE_FRAMEWORKS) compliance_module.AVAILABLE_COMPLIANCE_FRAMEWORKS.clear() + # The warming flags are module-global; clear them so they do not leak + # between tests that call warm_compliance_caches. + compliance_module.COMPLIANCE_WARMING_STARTED.clear() + compliance_module.COMPLIANCE_WARMED.clear() try: yield finally: compliance_module.AVAILABLE_COMPLIANCE_FRAMEWORKS.clear() compliance_module.AVAILABLE_COMPLIANCE_FRAMEWORKS.update(previous) + compliance_module.COMPLIANCE_WARMING_STARTED.clear() + compliance_module.COMPLIANCE_WARMED.clear() class TestGetComplianceFrameworks: @@ -321,3 +328,89 @@ class TestGetComplianceFrameworks: f"loadable by get_bulk_compliance_frameworks_universal: " f"{sorted(missing)}" ) + + +class TestWarmComplianceCaches: + def test_warms_all_provider_types_by_default(self, reset_compliance_cache): + provider_types = list(Provider.ProviderChoices.values) + with ( + patch("api.compliance.get_compliance_frameworks") as mock_frameworks, + patch("api.compliance._ensure_provider_loaded") as mock_ensure, + ): + warm_compliance_caches() + + warmed = {call.args[0] for call in mock_frameworks.call_args_list} + assert warmed == set(provider_types) + assert mock_frameworks.call_count == len(provider_types) + assert mock_ensure.call_count == len(provider_types) + + def test_warms_only_requested_provider_types(self, reset_compliance_cache): + with ( + patch("api.compliance.get_compliance_frameworks") as mock_frameworks, + patch("api.compliance._ensure_provider_loaded") as mock_ensure, + ): + warm_compliance_caches([Provider.ProviderChoices.AWS]) + + mock_frameworks.assert_called_once_with(Provider.ProviderChoices.AWS) + mock_ensure.assert_called_once_with(Provider.ProviderChoices.AWS) + + def test_populates_module_cache(self, reset_compliance_cache): + with ( + patch( + "api.compliance.get_bulk_compliance_frameworks_universal" + ) as mock_get_bulk, + patch("api.compliance._ensure_provider_loaded"), + ): + mock_get_bulk.return_value = {"cis_1.4_aws": MagicMock()} + warm_compliance_caches([Provider.ProviderChoices.AWS]) + + assert ( + Provider.ProviderChoices.AWS + in compliance_module.AVAILABLE_COMPLIANCE_FRAMEWORKS + ) + + def test_failing_provider_does_not_abort_the_rest(self, reset_compliance_cache): + """A failing provider (even on SystemExit) is isolated; others warm.""" + providers = [Provider.ProviderChoices.AWS, Provider.ProviderChoices.OKTA] + + def fake_frameworks(provider_type): + if provider_type == Provider.ProviderChoices.OKTA: + raise SystemExit(1) + return [] + + with ( + patch( + "api.compliance.get_compliance_frameworks", side_effect=fake_frameworks + ), + patch("api.compliance._ensure_provider_loaded") as mock_ensure, + ): + failed = warm_compliance_caches(providers) + + assert failed == [Provider.ProviderChoices.OKTA] + mock_ensure.assert_called_once_with(Provider.ProviderChoices.AWS) + + def test_sets_readiness_flags(self, reset_compliance_cache): + assert not compliance_module.COMPLIANCE_WARMING_STARTED.is_set() + assert not compliance_module.COMPLIANCE_WARMED.is_set() + + with ( + patch("api.compliance.get_compliance_frameworks"), + patch("api.compliance._ensure_provider_loaded"), + ): + warm_compliance_caches([Provider.ProviderChoices.AWS]) + + assert compliance_module.COMPLIANCE_WARMING_STARTED.is_set() + assert compliance_module.COMPLIANCE_WARMED.is_set() + + def test_marks_warmed_even_when_a_provider_fails(self, reset_compliance_cache): + """A failed provider still leaves the caches flagged as warmed.""" + with ( + patch( + "api.compliance.get_compliance_frameworks", + side_effect=SystemExit(1), + ), + patch("api.compliance._ensure_provider_loaded"), + ): + warm_compliance_caches([Provider.ProviderChoices.AWS]) + + assert compliance_module.COMPLIANCE_WARMED.is_set() diff --git a/api/src/backend/api/tests/test_views.py b/api/src/backend/api/tests/test_views.py index d213e8c855..ab90bdbde0 100644 --- a/api/src/backend/api/tests/test_views.py +++ b/api/src/backend/api/tests/test_views.py @@ -9578,6 +9578,39 @@ class TestComplianceOverviewViewSet: ) assert response.status_code == status.HTTP_400_BAD_REQUEST + def test_compliance_overview_attributes_503_while_warming( + self, authenticated_client + ): + from api.compliance import COMPLIANCE_WARMED, COMPLIANCE_WARMING_STARTED + + COMPLIANCE_WARMING_STARTED.set() + COMPLIANCE_WARMED.clear() + try: + response = authenticated_client.get( + reverse("complianceoverview-attributes"), + {"filter[compliance_id]": "aws_account_security_onboarding_aws"}, + ) + finally: + COMPLIANCE_WARMING_STARTED.clear() + + assert response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + assert response.json()["errors"][0]["code"] == "compliance_warming" + + def test_compliance_overview_attributes_serves_when_warming_not_started( + self, authenticated_client + ): + # Dev fallback: under runserver warming never runs, so the guard must + # not refuse — the endpoint lazily loads and serves as before. + from api.compliance import COMPLIANCE_WARMED, COMPLIANCE_WARMING_STARTED + + COMPLIANCE_WARMING_STARTED.clear() + COMPLIANCE_WARMED.clear() + response = authenticated_client.get( + reverse("complianceoverview-attributes"), + {"filter[compliance_id]": "aws_account_security_onboarding_aws"}, + ) + assert response.status_code == status.HTTP_200_OK + def test_compliance_overview_task_management_integration( self, authenticated_client, compliance_requirements_overviews_fixture ): diff --git a/api/src/backend/api/v1/views.py b/api/src/backend/api/v1/views.py index 9c91c3201f..45100f1018 100644 --- a/api/src/backend/api/v1/views.py +++ b/api/src/backend/api/v1/views.py @@ -114,6 +114,8 @@ from api.attack_paths import get_queries_for_provider, get_query_by_id from api.attack_paths import views_helpers as attack_paths_views_helpers from api.base_views import BaseRLSViewSet, BaseTenantViewset, BaseUserViewset from api.compliance import ( + COMPLIANCE_WARMED, + COMPLIANCE_WARMING_STARTED, PROWLER_COMPLIANCE_OVERVIEW_TEMPLATE, get_compliance_frameworks, get_prowler_provider_compliance, @@ -122,6 +124,7 @@ from api.constants import SEVERITY_ORDER from api.db_router import MainRouter from api.db_utils import rls_transaction from api.exceptions import ( + ComplianceWarmingError, TaskFailedException, UpstreamAccessDeniedError, UpstreamAuthenticationError, @@ -5059,6 +5062,13 @@ class ComplianceOverviewViewSet(BaseRLSViewSet, TaskManagementMixin): @action(detail=False, methods=["get"], url_name="attributes") def attributes(self, request): + # While the background warm-up is in progress, refuse immediately + # instead of falling through to the slow cold load on the request + # thread (which would trip the Gunicorn worker timeout). `is_set()` is + # a non-blocking flag read, so this never touches the loader. + if COMPLIANCE_WARMING_STARTED.is_set() and not COMPLIANCE_WARMED.is_set(): + raise ComplianceWarmingError() + compliance_id = request.query_params.get("filter[compliance_id]") if not compliance_id: raise ValidationError( diff --git a/api/src/backend/config/guniconf.py b/api/src/backend/config/guniconf.py index 536fd97abb..a16c8de9a0 100644 --- a/api/src/backend/config/guniconf.py +++ b/api/src/backend/config/guniconf.py @@ -1,6 +1,7 @@ import logging import multiprocessing import os +import threading from config.env import env @@ -11,6 +12,7 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.django.production") import django # noqa: E402 django.setup() +from api.compliance import warm_compliance_caches # noqa: E402 from config.django.production import LOGGING as DJANGO_LOGGERS, DEBUG # noqa: E402 from config.custom_logging import BackendLogger # noqa: E402 @@ -41,3 +43,26 @@ def on_reload(_): def when_ready(_): gunicorn_logger.info("Gunicorn server is ready") + + +def _warm_compliance_caches_in_background(): + """Warm compliance caches off the request path and log the outcome.""" + failed = warm_compliance_caches() + if failed: + gunicorn_logger.warning("Compliance caches warmed (skipped: %s)", failed) + else: + gunicorn_logger.info("Compliance caches warmed") + + +def post_fork(_server, worker): + """Warm compliance caches after each worker fork. + + Warm compliance caches in a background thread so the worker becomes ready + immediately. A request for a not-yet-warmed provider lazily loads just that + provider, which stays well under the worker timeout. + """ + threading.Thread( + target=_warm_compliance_caches_in_background, + name="warm-compliance-caches", + daemon=True, + ).start() diff --git a/ui/CHANGELOG.md b/ui/CHANGELOG.md index dc8c0fa708..57d0427dc9 100644 --- a/ui/CHANGELOG.md +++ b/ui/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to the **Prowler UI** are documented in this file. ### 🔄 Changed - Renamed "Customer Support" to "Support Desk" in the side menu, showing it only in Prowler Cloud/Enterprise, while "Community Support" now shows only in Prowler OSS [(#11508)](https://github.com/prowler-cloud/prowler/pull/11508) +- Compliance detail page now shows a "still loading" retry state while the API warms its compliance catalog, instead of rendering an empty page [(#4554)](https://github.com/prowler-cloud/prowler-cloud/pull/4554) --- diff --git a/ui/actions/compliances/compliances.ts b/ui/actions/compliances/compliances.ts index d5f4fd4954..58e7c5b5a0 100644 --- a/ui/actions/compliances/compliances.ts +++ b/ui/actions/compliances/compliances.ts @@ -84,6 +84,13 @@ export const getComplianceAttributes = async (complianceId: string) => { headers, }); + // The compliance catalog is still warming after a deploy/restart. Signal + // the page to render the "still loading" state instead of letting this + // become a thrown 5xx (which would be captured as a server error). + if (response.status === 503) { + return { warming: true as const, status: 503 }; + } + return handleApiResponse(response); } catch (error) { console.error("Error fetching compliance attributes:", error); diff --git a/ui/app/(prowler)/compliance/[compliancetitle]/page.tsx b/ui/app/(prowler)/compliance/[compliancetitle]/page.tsx index 71e7870f8d..445710e48f 100644 --- a/ui/app/(prowler)/compliance/[compliancetitle]/page.tsx +++ b/ui/app/(prowler)/compliance/[compliancetitle]/page.tsx @@ -13,6 +13,7 @@ import { ClientAccordionWrapper, ComplianceDownloadContainer, ComplianceHeader, + ComplianceWarming, RequirementsStatusCard, RequirementsStatusCardSkeleton, // SectionsFailureRateCard, @@ -92,6 +93,16 @@ export default async function ComplianceDetail({ : Promise.resolve(null), ]); + // The compliance catalog is still warming after a deploy/restart. Show the + // "still loading" state with a Try Again instead of rendering an empty page. + if (attributesData?.warming) { + return ( + + + + ); + } + if (selectedScanResponse?.data) { const scan = selectedScanResponse.data; const providerId = scan.relationships?.provider?.data?.id; diff --git a/ui/components/compliance/compliance-warming.tsx b/ui/components/compliance/compliance-warming.tsx new file mode 100644 index 0000000000..19864ae121 --- /dev/null +++ b/ui/components/compliance/compliance-warming.tsx @@ -0,0 +1,49 @@ +"use client"; + +import { Icon } from "@iconify/react"; +import { useRouter } from "next/navigation"; + +import { Button } from "@/components/shadcn/button/button"; +import { Card, CardContent } from "@/components/shadcn/card/card"; + +export const ComplianceWarming = () => { + const router = useRouter(); + + return ( +
+
+ + +
+
+ +
+

+ Compliance data is still loading +

+

+ This can happen for a few seconds right after an update. + Please try again shortly. +

+
+
+ +
+
+
+
+
+ ); +}; diff --git a/ui/components/compliance/index.ts b/ui/components/compliance/index.ts index e0bd631200..caea32e264 100644 --- a/ui/components/compliance/index.ts +++ b/ui/components/compliance/index.ts @@ -19,6 +19,7 @@ export * from "./compliance-header/compliance-scan-info"; export * from "./compliance-header/data-compliance"; export * from "./compliance-header/scan-selector"; export * from "./compliance-overview-grid"; +export * from "./compliance-warming"; export * from "./no-scans-available"; export * from "./skeletons/bar-chart-skeleton"; export * from "./skeletons/compliance-accordion-skeleton";