diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index ab40e6abfa..af49ee2348 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -2,13 +2,20 @@ All notable changes to the **Prowler API** are documented in this file. -## [1.18.0] (Prowler UUNRELEASED) +## [1.18.0] (Prowler UNRELEASED) ### Added - Support AlibabaCloud provider [(#9485)](https://github.com/prowler-cloud/prowler/pull/9485) --- +## [1.17.1] (Prowler UNRELEASED) + +### Fixed +- Orphan scheduled scans caused by transaction isolation during provider creation [(#9633)](https://github.com/prowler-cloud/prowler/pull/9633) + +--- + ## [1.17.0] (Prowler v5.16.0) ### Added diff --git a/api/src/backend/tasks/tasks.py b/api/src/backend/tasks/tasks.py index a29c74fd36..b4994fa41d 100644 --- a/api/src/backend/tasks/tasks.py +++ b/api/src/backend/tasks/tasks.py @@ -61,6 +61,58 @@ from prowler.lib.outputs.finding import Finding as FindingOutput logger = get_task_logger(__name__) +def _cleanup_orphan_scheduled_scans( + tenant_id: str, + provider_id: str, + scheduler_task_id: int, +) -> int: + """ + TEMPORARY WORKAROUND: Clean up orphan AVAILABLE scans. + + Detects and removes AVAILABLE scans that were never used due to an + issue during the first scheduled scan setup. + + An AVAILABLE scan is considered orphan if there's also a SCHEDULED scan for + the same provider with the same scheduler_task_id. This situation indicates + that the first scan execution didn't find the AVAILABLE scan (because it + wasn't committed yet, probably) and created a new one, leaving the AVAILABLE orphaned. + + Args: + tenant_id: The tenant ID. + provider_id: The provider ID. + scheduler_task_id: The PeriodicTask ID that triggers these scans. + + Returns: + Number of orphan scans deleted (0 if none found). + """ + orphan_available_scans = Scan.objects.filter( + tenant_id=tenant_id, + provider_id=provider_id, + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=scheduler_task_id, + ) + + scheduled_scan_exists = Scan.objects.filter( + tenant_id=tenant_id, + provider_id=provider_id, + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.SCHEDULED, + scheduler_task_id=scheduler_task_id, + ).exists() + + if scheduled_scan_exists and orphan_available_scans.exists(): + orphan_count = orphan_available_scans.count() + logger.warning( + f"[WORKAROUND] Found {orphan_count} orphan AVAILABLE scan(s) for " + f"provider {provider_id} alongside a SCHEDULED scan. Cleaning up orphans..." + ) + orphan_available_scans.delete() + return orphan_count + + return 0 + + def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str): """ Helper function to perform tasks after a scan is completed. @@ -247,6 +299,14 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str): return serializer.data next_scan_datetime = get_next_execution_datetime(task_id, provider_id) + + # TEMPORARY WORKAROUND: Clean up orphan scans from transaction isolation issue + _cleanup_orphan_scheduled_scans( + tenant_id=tenant_id, + provider_id=provider_id, + scheduler_task_id=periodic_task_instance.id, + ) + scan_instance, _ = Scan.objects.get_or_create( tenant_id=tenant_id, provider_id=provider_id, diff --git a/api/src/backend/tasks/tests/test_tasks.py b/api/src/backend/tasks/tests/test_tasks.py index 6a4072881c..ceb7f608db 100644 --- a/api/src/backend/tasks/tests/test_tasks.py +++ b/api/src/backend/tasks/tests/test_tasks.py @@ -4,11 +4,13 @@ from unittest.mock import MagicMock, patch import openai import pytest from botocore.exceptions import ClientError +from django_celery_beat.models import IntervalSchedule, PeriodicTask from tasks.jobs.lighthouse_providers import ( _create_bedrock_client, _extract_bedrock_credentials, ) from tasks.tasks import ( + _cleanup_orphan_scheduled_scans, _perform_scan_complete_tasks, check_integrations_task, check_lighthouse_provider_connection_task, @@ -22,6 +24,8 @@ from api.models import ( Integration, LighthouseProviderConfiguration, LighthouseProviderModels, + Scan, + StateChoices, ) @@ -1715,3 +1719,343 @@ class TestRefreshLighthouseProviderModelsTask: assert result["deleted"] == 0 assert "error" in result assert result["error"] is not None + + +@pytest.mark.django_db +class TestCleanupOrphanScheduledScans: + """Unit tests for _cleanup_orphan_scheduled_scans helper function.""" + + def _create_periodic_task(self, provider_id, tenant_id): + """Helper to create a PeriodicTask for testing.""" + interval, _ = IntervalSchedule.objects.get_or_create(every=24, period="hours") + return PeriodicTask.objects.create( + name=f"scan-perform-scheduled-{provider_id}", + task="scan-perform-scheduled", + interval=interval, + kwargs=f'{{"tenant_id": "{tenant_id}", "provider_id": "{provider_id}"}}', + enabled=True, + ) + + def test_cleanup_deletes_orphan_when_both_available_and_scheduled_exist( + self, tenants_fixture, providers_fixture + ): + """Test that AVAILABLE scan is deleted when SCHEDULED also exists.""" + tenant = tenants_fixture[0] + provider = providers_fixture[0] + periodic_task = self._create_periodic_task(provider.id, tenant.id) + + # Create orphan AVAILABLE scan + orphan_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task.id, + ) + + # Create SCHEDULED scan (next execution) + scheduled_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.SCHEDULED, + scheduler_task_id=periodic_task.id, + ) + + # Execute cleanup + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider.id), + scheduler_task_id=periodic_task.id, + ) + + # Verify orphan was deleted + assert deleted_count == 1 + assert not Scan.objects.filter(id=orphan_scan.id).exists() + assert Scan.objects.filter(id=scheduled_scan.id).exists() + + def test_cleanup_does_not_delete_when_only_available_exists( + self, tenants_fixture, providers_fixture + ): + """Test that AVAILABLE scan is NOT deleted when no SCHEDULED exists.""" + tenant = tenants_fixture[0] + provider = providers_fixture[0] + periodic_task = self._create_periodic_task(provider.id, tenant.id) + + # Create only AVAILABLE scan (normal first scan scenario) + available_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task.id, + ) + + # Execute cleanup + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider.id), + scheduler_task_id=periodic_task.id, + ) + + # Verify nothing was deleted + assert deleted_count == 0 + assert Scan.objects.filter(id=available_scan.id).exists() + + def test_cleanup_does_not_delete_when_only_scheduled_exists( + self, tenants_fixture, providers_fixture + ): + """Test that nothing is deleted when only SCHEDULED exists.""" + tenant = tenants_fixture[0] + provider = providers_fixture[0] + periodic_task = self._create_periodic_task(provider.id, tenant.id) + + # Create only SCHEDULED scan (normal subsequent scan scenario) + scheduled_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.SCHEDULED, + scheduler_task_id=periodic_task.id, + ) + + # Execute cleanup + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider.id), + scheduler_task_id=periodic_task.id, + ) + + # Verify nothing was deleted + assert deleted_count == 0 + assert Scan.objects.filter(id=scheduled_scan.id).exists() + + def test_cleanup_returns_zero_when_no_scans_exist( + self, tenants_fixture, providers_fixture + ): + """Test that cleanup returns 0 when no scans exist.""" + tenant = tenants_fixture[0] + provider = providers_fixture[0] + periodic_task = self._create_periodic_task(provider.id, tenant.id) + + # Execute cleanup with no scans + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider.id), + scheduler_task_id=periodic_task.id, + ) + + assert deleted_count == 0 + + def test_cleanup_deletes_multiple_orphan_available_scans( + self, tenants_fixture, providers_fixture + ): + """Test that multiple AVAILABLE orphan scans are all deleted.""" + tenant = tenants_fixture[0] + provider = providers_fixture[0] + periodic_task = self._create_periodic_task(provider.id, tenant.id) + + # Create multiple orphan AVAILABLE scans + orphan_scan_1 = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task.id, + ) + orphan_scan_2 = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task.id, + ) + + # Create SCHEDULED scan + scheduled_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.SCHEDULED, + scheduler_task_id=periodic_task.id, + ) + + # Execute cleanup + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider.id), + scheduler_task_id=periodic_task.id, + ) + + # Verify all orphans were deleted + assert deleted_count == 2 + assert not Scan.objects.filter(id=orphan_scan_1.id).exists() + assert not Scan.objects.filter(id=orphan_scan_2.id).exists() + assert Scan.objects.filter(id=scheduled_scan.id).exists() + + def test_cleanup_does_not_affect_different_provider( + self, tenants_fixture, providers_fixture + ): + """Test that cleanup only affects scans for the specified provider.""" + tenant = tenants_fixture[0] + provider1 = providers_fixture[0] + provider2 = providers_fixture[1] + periodic_task1 = self._create_periodic_task(provider1.id, tenant.id) + periodic_task2 = self._create_periodic_task(provider2.id, tenant.id) + + # Create orphan scenario for provider1 + orphan_scan_p1 = Scan.objects.create( + tenant_id=tenant.id, + provider=provider1, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task1.id, + ) + scheduled_scan_p1 = Scan.objects.create( + tenant_id=tenant.id, + provider=provider1, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.SCHEDULED, + scheduler_task_id=periodic_task1.id, + ) + + # Create AVAILABLE scan for provider2 (should not be affected) + available_scan_p2 = Scan.objects.create( + tenant_id=tenant.id, + provider=provider2, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task2.id, + ) + + # Execute cleanup for provider1 only + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider1.id), + scheduler_task_id=periodic_task1.id, + ) + + # Verify only provider1's orphan was deleted + assert deleted_count == 1 + assert not Scan.objects.filter(id=orphan_scan_p1.id).exists() + assert Scan.objects.filter(id=scheduled_scan_p1.id).exists() + assert Scan.objects.filter(id=available_scan_p2.id).exists() + + def test_cleanup_does_not_affect_manual_scans( + self, tenants_fixture, providers_fixture + ): + """Test that cleanup only affects SCHEDULED trigger scans, not MANUAL.""" + tenant = tenants_fixture[0] + provider = providers_fixture[0] + periodic_task = self._create_periodic_task(provider.id, tenant.id) + + # Create orphan AVAILABLE scheduled scan + orphan_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task.id, + ) + + # Create SCHEDULED scan + scheduled_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.SCHEDULED, + scheduler_task_id=periodic_task.id, + ) + + # Create AVAILABLE manual scan (should not be affected) + manual_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Manual scan", + trigger=Scan.TriggerChoices.MANUAL, + state=StateChoices.AVAILABLE, + ) + + # Execute cleanup + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider.id), + scheduler_task_id=periodic_task.id, + ) + + # Verify only scheduled orphan was deleted + assert deleted_count == 1 + assert not Scan.objects.filter(id=orphan_scan.id).exists() + assert Scan.objects.filter(id=scheduled_scan.id).exists() + assert Scan.objects.filter(id=manual_scan.id).exists() + + def test_cleanup_does_not_affect_different_scheduler_task( + self, tenants_fixture, providers_fixture + ): + """Test that cleanup only affects scans with the specified scheduler_task_id.""" + tenant = tenants_fixture[0] + provider = providers_fixture[0] + periodic_task1 = self._create_periodic_task(provider.id, tenant.id) + + # Create another periodic task + interval, _ = IntervalSchedule.objects.get_or_create(every=24, period="hours") + periodic_task2 = PeriodicTask.objects.create( + name=f"scan-perform-scheduled-other-{provider.id}", + task="scan-perform-scheduled", + interval=interval, + kwargs=f'{{"tenant_id": "{tenant.id}", "provider_id": "{provider.id}"}}', + enabled=True, + ) + + # Create orphan scenario for periodic_task1 + orphan_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task1.id, + ) + scheduled_scan = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.SCHEDULED, + scheduler_task_id=periodic_task1.id, + ) + + # Create AVAILABLE scan for periodic_task2 (should not be affected) + available_scan_other_task = Scan.objects.create( + tenant_id=tenant.id, + provider=provider, + name="Daily scheduled scan", + trigger=Scan.TriggerChoices.SCHEDULED, + state=StateChoices.AVAILABLE, + scheduler_task_id=periodic_task2.id, + ) + + # Execute cleanup for periodic_task1 only + deleted_count = _cleanup_orphan_scheduled_scans( + tenant_id=str(tenant.id), + provider_id=str(provider.id), + scheduler_task_id=periodic_task1.id, + ) + + # Verify only periodic_task1's orphan was deleted + assert deleted_count == 1 + assert not Scan.objects.filter(id=orphan_scan.id).exists() + assert Scan.objects.filter(id=scheduled_scan.id).exists() + assert Scan.objects.filter(id=available_scan_other_task.id).exists()