diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 05520c2cf3..8a3ec882e3 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to the **Prowler API** are documented in this file. - Attack paths findings loading query to use streaming generator for O(batch_size) memory instead of O(total_findings) [(#9862)](https://github.com/prowler-cloud/prowler/pull/9862) - Lazy load Neo4j driver [(#9868)](https://github.com/prowler-cloud/prowler/pull/9868) - Use `Findings.all_objects` to avoid the `ActiveProviderPartitionedManager` [(#9869)](https://github.com/prowler-cloud/prowler/pull/9869) +- Lazy load Neo4j driver for workers only [(#9872)](https://github.com/prowler-cloud/prowler/pull/9872) ## [1.18.0] (Prowler v5.17.0) diff --git a/api/src/backend/api/apps.py b/api/src/backend/api/apps.py index a690e501a3..5ff37a3a88 100644 --- a/api/src/backend/api/apps.py +++ b/api/src/backend/api/apps.py @@ -30,6 +30,7 @@ class ApiConfig(AppConfig): def ready(self): from api import schema_extensions # noqa: F401 from api import signals # noqa: F401 + from api.attack_paths import database as graph_database from api.compliance import load_prowler_compliance # Generate required cryptographic keys if not present, but only if: @@ -41,8 +42,37 @@ class ApiConfig(AppConfig): ): self._ensure_crypto_keys() - # Neo4j driver is initialized lazily on first use (see api.attack_paths.database) - # This avoids connection attempts during regular scans that don't need graph database + # Commands that don't need Neo4j + SKIP_NEO4J_DJANGO_COMMANDS = [ + "makemigrations", + "migrate", + "pgpartition", + "check", + "help", + "showmigrations", + "check_and_fix_socialaccount_sites_migration", + ] + + # Skip Neo4j initialization during tests, some Django commands, and Celery + if getattr(settings, "TESTING", False) or ( + len(sys.argv) > 1 + and ( + ( + "manage.py" in sys.argv[0] + and sys.argv[1] in SKIP_NEO4J_DJANGO_COMMANDS + ) + or "celery" in sys.argv[0] + ) + ): + logger.info( + "Skipping Neo4j initialization because tests, some Django commands or Celery" + ) + + else: + graph_database.init_driver() + + # Neo4j driver is initialized at API startup (see api.attack_paths.database) + # It remains lazy for Celery workers and selected Django commands load_prowler_compliance() diff --git a/api/src/backend/api/tests/test_apps.py b/api/src/backend/api/tests/test_apps.py index 1509705894..712bc33882 100644 --- a/api/src/backend/api/tests/test_apps.py +++ b/api/src/backend/api/tests/test_apps.py @@ -1,10 +1,13 @@ import os +import sys +import types from pathlib import Path -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest from django.conf import settings +import api import api.apps as api_apps_module from api.apps import ( ApiConfig, @@ -150,3 +153,82 @@ def test_ensure_crypto_keys_skips_when_env_vars(monkeypatch, tmp_path): # Assert: orchestrator did not trigger generation when env present assert called["ensure"] is False + + +@pytest.fixture(autouse=True) +def stub_api_modules(): + """Provide dummy modules imported during ApiConfig.ready().""" + created = [] + for name in ("api.schema_extensions", "api.signals"): + if name not in sys.modules: + sys.modules[name] = types.ModuleType(name) + created.append(name) + + yield + + for name in created: + sys.modules.pop(name, None) + + +def _set_argv(monkeypatch, argv): + monkeypatch.setattr(sys, "argv", argv, raising=False) + + +def _set_testing(monkeypatch, value): + monkeypatch.setattr(settings, "TESTING", value, raising=False) + + +def _make_app(): + return ApiConfig("api", api) + + +def test_ready_initializes_driver_for_api_process(monkeypatch): + config = _make_app() + _set_argv(monkeypatch, ["gunicorn"]) + _set_testing(monkeypatch, False) + + with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch( + "api.attack_paths.database.init_driver" + ) as init_driver: + config.ready() + + init_driver.assert_called_once() + + +def test_ready_skips_driver_for_celery(monkeypatch): + config = _make_app() + _set_argv(monkeypatch, ["celery", "-A", "api"]) + _set_testing(monkeypatch, False) + + with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch( + "api.attack_paths.database.init_driver" + ) as init_driver: + config.ready() + + init_driver.assert_not_called() + + +def test_ready_skips_driver_for_manage_py_skip_command(monkeypatch): + config = _make_app() + _set_argv(monkeypatch, ["manage.py", "migrate"]) + _set_testing(monkeypatch, False) + + with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch( + "api.attack_paths.database.init_driver" + ) as init_driver: + config.ready() + + init_driver.assert_not_called() + + +def test_ready_skips_driver_when_testing(monkeypatch): + config = _make_app() + _set_argv(monkeypatch, ["gunicorn"]) + _set_testing(monkeypatch, True) + + with patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), patch( + "api.attack_paths.database.init_driver" + ) as init_driver: + config.ready() + + init_driver.assert_not_called() diff --git a/api/src/backend/api/tests/test_attack_paths_database.py b/api/src/backend/api/tests/test_attack_paths_database.py index 143e8b5a51..46ba101c4a 100644 --- a/api/src/backend/api/tests/test_attack_paths_database.py +++ b/api/src/backend/api/tests/test_attack_paths_database.py @@ -1,8 +1,9 @@ """ Tests for Neo4j database lazy initialization. -The Neo4j driver should only connect when actually needed (lazy initialization), -not at Django app startup. This allows regular scans to run without Neo4j. +The Neo4j driver connects on first use by default. API processes may +eagerly initialize the driver during app startup, while Celery workers +remain lazy. These tests validate the database module behavior itself. """ import threading