fix(scans): Handle duplicated scan tasks (#7409)

Co-authored-by: Víctor Fernández Poyatos <victor@prowler.com>
This commit is contained in:
Prowler Bot
2025-04-03 09:15:58 +02:00
committed by GitHub
parent 56d0c2fbea
commit a5aa4c30d7
5 changed files with 54 additions and 4 deletions

View File

@@ -4,6 +4,13 @@ All notable changes to the **Prowler API** are documented in this file.
---
## [v1.5.3] (Prowler v5.4.3)
### Fixed
- Added duplicated scheduled scans handling ([#7401])(https://github.com/prowler-cloud/prowler/pull/7401).
---
## [v1.5.2] (Prowler v5.4.2)
### Changed

View File

@@ -8,7 +8,7 @@ description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
name = "prowler-api"
package-mode = false
version = "1.5.2"
version = "1.5.3"
[tool.poetry.dependencies]
celery = {extras = ["pytest"], version = "^5.4.0"}

View File

@@ -1,7 +1,7 @@
openapi: 3.0.3
info:
title: Prowler API
version: 1.5.2
version: 1.5.3
description: |-
Prowler API specification.

View File

@@ -241,7 +241,7 @@ class SchemaView(SpectacularAPIView):
def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.5.2"
spectacular_settings.VERSION = "1.5.3"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)

View File

@@ -1,3 +1,4 @@
from datetime import datetime, timedelta, timezone
from pathlib import Path
from shutil import rmtree
@@ -21,6 +22,7 @@ from api.db_utils import rls_transaction
from api.decorators import set_tenant
from api.models import Finding, Provider, Scan, ScanSummary, StateChoices
from api.utils import initialize_prowler_provider
from api.v1.serializers import ScanTaskSerializer
from prowler.lib.outputs.finding import Finding as FindingOutput
logger = get_task_logger(__name__)
@@ -128,6 +130,43 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
periodic_task_instance = PeriodicTask.objects.get(
name=f"scan-perform-scheduled-{provider_id}"
)
executed_scan = Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
task__task_runner_task__task_id=task_id,
).order_by("completed_at")
if (
Scan.objects.filter(
tenant_id=tenant_id,
provider_id=provider_id,
trigger=Scan.TriggerChoices.SCHEDULED,
state=StateChoices.EXECUTING,
scheduler_task_id=periodic_task_instance.id,
scheduled_at__date=datetime.now(timezone.utc).date(),
).exists()
or executed_scan.exists()
):
# Duplicated task execution due to visibility timeout or scan is already running
logger.warning(f"Duplicated scheduled scan for provider {provider_id}.")
try:
affected_scan = executed_scan.first()
if not affected_scan:
raise ValueError(
"Error retrieving affected scan details after detecting duplicated scheduled "
"scan."
)
# Return the affected scan details to avoid losing data
serializer = ScanTaskSerializer(instance=affected_scan)
except Exception as duplicated_scan_exception:
logger.error(
f"Duplicated scheduled scan for provider {provider_id}. Error retrieving affected scan details: "
f"{str(duplicated_scan_exception)}"
)
raise duplicated_scan_exception
return serializer.data
next_scan_datetime = get_next_execution_datetime(task_id, provider_id)
scan_instance, _ = Scan.objects.get_or_create(
tenant_id=tenant_id,
@@ -135,7 +174,11 @@ def perform_scheduled_scan_task(self, tenant_id: str, provider_id: str):
trigger=Scan.TriggerChoices.SCHEDULED,
state__in=(StateChoices.SCHEDULED, StateChoices.AVAILABLE),
scheduler_task_id=periodic_task_instance.id,
defaults={"state": StateChoices.SCHEDULED},
defaults={
"state": StateChoices.SCHEDULED,
"name": "Daily scheduled scan",
"scheduled_at": next_scan_datetime - timedelta(days=1),
},
)
scan_instance.task_id = task_id