mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-01-25 02:08:11 +00:00
fix(api): add cleanup for orphan scheduled scans caused by transaction isolation (#9633)
This commit is contained in:
@@ -2,13 +2,20 @@
|
||||
|
||||
All notable changes to the **Prowler API** are documented in this file.
|
||||
|
||||
## [1.18.0] (Prowler UUNRELEASED)
|
||||
## [1.18.0] (Prowler UNRELEASED)
|
||||
|
||||
### Added
|
||||
- Support AlibabaCloud provider [(#9485)](https://github.com/prowler-cloud/prowler/pull/9485)
|
||||
|
||||
---
|
||||
|
||||
## [1.17.1] (Prowler UNRELEASED)
|
||||
|
||||
### Fixed
|
||||
- Orphan scheduled scans caused by transaction isolation during provider creation [(#9633)](https://github.com/prowler-cloud/prowler/pull/9633)
|
||||
|
||||
---
|
||||
|
||||
## [1.17.0] (Prowler v5.16.0)
|
||||
|
||||
### Added
|
||||
|
||||
@@ -61,6 +61,58 @@ from prowler.lib.outputs.finding import Finding as FindingOutput
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
def _cleanup_orphan_scheduled_scans(
|
||||
tenant_id: str,
|
||||
provider_id: str,
|
||||
scheduler_task_id: int,
|
||||
) -> int:
|
||||
"""
|
||||
TEMPORARY WORKAROUND: Clean up orphan AVAILABLE scans.
|
||||
|
||||
Detects and removes AVAILABLE scans that were never used due to an
|
||||
issue during the first scheduled scan setup.
|
||||
|
||||
An AVAILABLE scan is considered orphan if there's also a SCHEDULED scan for
|
||||
the same provider with the same scheduler_task_id. This situation indicates
|
||||
that the first scan execution didn't find the AVAILABLE scan (because it
|
||||
wasn't committed yet, probably) and created a new one, leaving the AVAILABLE orphaned.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant ID.
|
||||
provider_id: The provider ID.
|
||||
scheduler_task_id: The PeriodicTask ID that triggers these scans.
|
||||
|
||||
Returns:
|
||||
Number of orphan scans deleted (0 if none found).
|
||||
"""
|
||||
orphan_available_scans = Scan.objects.filter(
|
||||
tenant_id=tenant_id,
|
||||
provider_id=provider_id,
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=scheduler_task_id,
|
||||
)
|
||||
|
||||
scheduled_scan_exists = Scan.objects.filter(
|
||||
tenant_id=tenant_id,
|
||||
provider_id=provider_id,
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.SCHEDULED,
|
||||
scheduler_task_id=scheduler_task_id,
|
||||
).exists()
|
||||
|
||||
if scheduled_scan_exists and orphan_available_scans.exists():
|
||||
orphan_count = orphan_available_scans.count()
|
||||
logger.warning(
|
||||
f"[WORKAROUND] Found {orphan_count} orphan AVAILABLE scan(s) for "
|
||||
f"provider {provider_id} alongside a SCHEDULED scan. Cleaning up orphans..."
|
||||
)
|
||||
orphan_available_scans.delete()
|
||||
return orphan_count
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _perform_scan_complete_tasks(tenant_id: str, scan_id: str, provider_id: str):
|
||||
"""
|
||||
Helper function to perform tasks after a scan is completed.
|
||||
@@ -247,6 +299,14 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
|
||||
return serializer.data
|
||||
|
||||
next_scan_datetime = get_next_execution_datetime(task_id, provider_id)
|
||||
|
||||
# TEMPORARY WORKAROUND: Clean up orphan scans from transaction isolation issue
|
||||
_cleanup_orphan_scheduled_scans(
|
||||
tenant_id=tenant_id,
|
||||
provider_id=provider_id,
|
||||
scheduler_task_id=periodic_task_instance.id,
|
||||
)
|
||||
|
||||
scan_instance, _ = Scan.objects.get_or_create(
|
||||
tenant_id=tenant_id,
|
||||
provider_id=provider_id,
|
||||
|
||||
@@ -4,11 +4,13 @@ from unittest.mock import MagicMock, patch
|
||||
import openai
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
from django_celery_beat.models import IntervalSchedule, PeriodicTask
|
||||
from tasks.jobs.lighthouse_providers import (
|
||||
_create_bedrock_client,
|
||||
_extract_bedrock_credentials,
|
||||
)
|
||||
from tasks.tasks import (
|
||||
_cleanup_orphan_scheduled_scans,
|
||||
_perform_scan_complete_tasks,
|
||||
check_integrations_task,
|
||||
check_lighthouse_provider_connection_task,
|
||||
@@ -22,6 +24,8 @@ from api.models import (
|
||||
Integration,
|
||||
LighthouseProviderConfiguration,
|
||||
LighthouseProviderModels,
|
||||
Scan,
|
||||
StateChoices,
|
||||
)
|
||||
|
||||
|
||||
@@ -1715,3 +1719,343 @@ class TestRefreshLighthouseProviderModelsTask:
|
||||
assert result["deleted"] == 0
|
||||
assert "error" in result
|
||||
assert result["error"] is not None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCleanupOrphanScheduledScans:
|
||||
"""Unit tests for _cleanup_orphan_scheduled_scans helper function."""
|
||||
|
||||
def _create_periodic_task(self, provider_id, tenant_id):
|
||||
"""Helper to create a PeriodicTask for testing."""
|
||||
interval, _ = IntervalSchedule.objects.get_or_create(every=24, period="hours")
|
||||
return PeriodicTask.objects.create(
|
||||
name=f"scan-perform-scheduled-{provider_id}",
|
||||
task="scan-perform-scheduled",
|
||||
interval=interval,
|
||||
kwargs=f'{{"tenant_id": "{tenant_id}", "provider_id": "{provider_id}"}}',
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
def test_cleanup_deletes_orphan_when_both_available_and_scheduled_exist(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that AVAILABLE scan is deleted when SCHEDULED also exists."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
periodic_task = self._create_periodic_task(provider.id, tenant.id)
|
||||
|
||||
# Create orphan AVAILABLE scan
|
||||
orphan_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Create SCHEDULED scan (next execution)
|
||||
scheduled_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.SCHEDULED,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Execute cleanup
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider.id),
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Verify orphan was deleted
|
||||
assert deleted_count == 1
|
||||
assert not Scan.objects.filter(id=orphan_scan.id).exists()
|
||||
assert Scan.objects.filter(id=scheduled_scan.id).exists()
|
||||
|
||||
def test_cleanup_does_not_delete_when_only_available_exists(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that AVAILABLE scan is NOT deleted when no SCHEDULED exists."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
periodic_task = self._create_periodic_task(provider.id, tenant.id)
|
||||
|
||||
# Create only AVAILABLE scan (normal first scan scenario)
|
||||
available_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Execute cleanup
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider.id),
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Verify nothing was deleted
|
||||
assert deleted_count == 0
|
||||
assert Scan.objects.filter(id=available_scan.id).exists()
|
||||
|
||||
def test_cleanup_does_not_delete_when_only_scheduled_exists(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that nothing is deleted when only SCHEDULED exists."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
periodic_task = self._create_periodic_task(provider.id, tenant.id)
|
||||
|
||||
# Create only SCHEDULED scan (normal subsequent scan scenario)
|
||||
scheduled_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.SCHEDULED,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Execute cleanup
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider.id),
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Verify nothing was deleted
|
||||
assert deleted_count == 0
|
||||
assert Scan.objects.filter(id=scheduled_scan.id).exists()
|
||||
|
||||
def test_cleanup_returns_zero_when_no_scans_exist(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that cleanup returns 0 when no scans exist."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
periodic_task = self._create_periodic_task(provider.id, tenant.id)
|
||||
|
||||
# Execute cleanup with no scans
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider.id),
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
assert deleted_count == 0
|
||||
|
||||
def test_cleanup_deletes_multiple_orphan_available_scans(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that multiple AVAILABLE orphan scans are all deleted."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
periodic_task = self._create_periodic_task(provider.id, tenant.id)
|
||||
|
||||
# Create multiple orphan AVAILABLE scans
|
||||
orphan_scan_1 = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
orphan_scan_2 = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Create SCHEDULED scan
|
||||
scheduled_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.SCHEDULED,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Execute cleanup
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider.id),
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Verify all orphans were deleted
|
||||
assert deleted_count == 2
|
||||
assert not Scan.objects.filter(id=orphan_scan_1.id).exists()
|
||||
assert not Scan.objects.filter(id=orphan_scan_2.id).exists()
|
||||
assert Scan.objects.filter(id=scheduled_scan.id).exists()
|
||||
|
||||
def test_cleanup_does_not_affect_different_provider(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that cleanup only affects scans for the specified provider."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider1 = providers_fixture[0]
|
||||
provider2 = providers_fixture[1]
|
||||
periodic_task1 = self._create_periodic_task(provider1.id, tenant.id)
|
||||
periodic_task2 = self._create_periodic_task(provider2.id, tenant.id)
|
||||
|
||||
# Create orphan scenario for provider1
|
||||
orphan_scan_p1 = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider1,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task1.id,
|
||||
)
|
||||
scheduled_scan_p1 = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider1,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.SCHEDULED,
|
||||
scheduler_task_id=periodic_task1.id,
|
||||
)
|
||||
|
||||
# Create AVAILABLE scan for provider2 (should not be affected)
|
||||
available_scan_p2 = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider2,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task2.id,
|
||||
)
|
||||
|
||||
# Execute cleanup for provider1 only
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider1.id),
|
||||
scheduler_task_id=periodic_task1.id,
|
||||
)
|
||||
|
||||
# Verify only provider1's orphan was deleted
|
||||
assert deleted_count == 1
|
||||
assert not Scan.objects.filter(id=orphan_scan_p1.id).exists()
|
||||
assert Scan.objects.filter(id=scheduled_scan_p1.id).exists()
|
||||
assert Scan.objects.filter(id=available_scan_p2.id).exists()
|
||||
|
||||
def test_cleanup_does_not_affect_manual_scans(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that cleanup only affects SCHEDULED trigger scans, not MANUAL."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
periodic_task = self._create_periodic_task(provider.id, tenant.id)
|
||||
|
||||
# Create orphan AVAILABLE scheduled scan
|
||||
orphan_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Create SCHEDULED scan
|
||||
scheduled_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.SCHEDULED,
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Create AVAILABLE manual scan (should not be affected)
|
||||
manual_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Manual scan",
|
||||
trigger=Scan.TriggerChoices.MANUAL,
|
||||
state=StateChoices.AVAILABLE,
|
||||
)
|
||||
|
||||
# Execute cleanup
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider.id),
|
||||
scheduler_task_id=periodic_task.id,
|
||||
)
|
||||
|
||||
# Verify only scheduled orphan was deleted
|
||||
assert deleted_count == 1
|
||||
assert not Scan.objects.filter(id=orphan_scan.id).exists()
|
||||
assert Scan.objects.filter(id=scheduled_scan.id).exists()
|
||||
assert Scan.objects.filter(id=manual_scan.id).exists()
|
||||
|
||||
def test_cleanup_does_not_affect_different_scheduler_task(
|
||||
self, tenants_fixture, providers_fixture
|
||||
):
|
||||
"""Test that cleanup only affects scans with the specified scheduler_task_id."""
|
||||
tenant = tenants_fixture[0]
|
||||
provider = providers_fixture[0]
|
||||
periodic_task1 = self._create_periodic_task(provider.id, tenant.id)
|
||||
|
||||
# Create another periodic task
|
||||
interval, _ = IntervalSchedule.objects.get_or_create(every=24, period="hours")
|
||||
periodic_task2 = PeriodicTask.objects.create(
|
||||
name=f"scan-perform-scheduled-other-{provider.id}",
|
||||
task="scan-perform-scheduled",
|
||||
interval=interval,
|
||||
kwargs=f'{{"tenant_id": "{tenant.id}", "provider_id": "{provider.id}"}}',
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
# Create orphan scenario for periodic_task1
|
||||
orphan_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task1.id,
|
||||
)
|
||||
scheduled_scan = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.SCHEDULED,
|
||||
scheduler_task_id=periodic_task1.id,
|
||||
)
|
||||
|
||||
# Create AVAILABLE scan for periodic_task2 (should not be affected)
|
||||
available_scan_other_task = Scan.objects.create(
|
||||
tenant_id=tenant.id,
|
||||
provider=provider,
|
||||
name="Daily scheduled scan",
|
||||
trigger=Scan.TriggerChoices.SCHEDULED,
|
||||
state=StateChoices.AVAILABLE,
|
||||
scheduler_task_id=periodic_task2.id,
|
||||
)
|
||||
|
||||
# Execute cleanup for periodic_task1 only
|
||||
deleted_count = _cleanup_orphan_scheduled_scans(
|
||||
tenant_id=str(tenant.id),
|
||||
provider_id=str(provider.id),
|
||||
scheduler_task_id=periodic_task1.id,
|
||||
)
|
||||
|
||||
# Verify only periodic_task1's orphan was deleted
|
||||
assert deleted_count == 1
|
||||
assert not Scan.objects.filter(id=orphan_scan.id).exists()
|
||||
assert Scan.objects.filter(id=scheduled_scan.id).exists()
|
||||
assert Scan.objects.filter(id=available_scan_other_task.id).exists()
|
||||
|
||||
Reference in New Issue
Block a user