From 28b045302f2c623d944e83e2a87f0da7f521c760 Mon Sep 17 00:00:00 2001 From: Josema Camacho Date: Mon, 8 Jun 2026 13:30:18 +0200 Subject: [PATCH] fix(api): create Neo4j driver lazily so an outage can't block API startup (#11491) --- api/CHANGELOG.md | 8 +++ api/src/backend/api/apps.py | 40 ++----------- api/src/backend/api/attack_paths/database.py | 22 ++++++-- api/src/backend/api/tests/test_apps.py | 56 ++++--------------- .../api/tests/test_attack_paths_database.py | 43 ++++++++++++-- 5 files changed, 81 insertions(+), 88 deletions(-) diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 4f4d3426f6..e9dac993ce 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -21,6 +21,14 @@ All notable changes to the **Prowler API** are documented in this file. --- +## [1.30.3] (Prowler v5.29.3) + +### 🐞 Fixed + +- API startup no longer crashes when Neo4j is unreachable, as the Neo4j driver now connects lazily on first use rather than during app initialization [(#11491)](https://github.com/prowler-cloud/prowler/pull/11491) + +--- + ## [1.30.1] (Prowler v5.29.1) ### 🐞 Fixed diff --git a/api/src/backend/api/apps.py b/api/src/backend/api/apps.py index 3209f75888..6c3a4ec2d9 100644 --- a/api/src/backend/api/apps.py +++ b/api/src/backend/api/apps.py @@ -1,12 +1,14 @@ import logging import os import sys + from pathlib import Path +from django.apps import AppConfig +from django.conf import settings + from config.custom_logging import BackendLogger from config.env import env -from django.apps import AppConfig -from django.conf import settings logger = logging.getLogger(BackendLogger.API) @@ -30,7 +32,6 @@ class ApiConfig(AppConfig): def ready(self): from api import schema_extensions # noqa: F401 from api import signals # noqa: F401 - from api.attack_paths import database as graph_database # Generate required cryptographic keys if not present, but only if: # `"manage.py" not in sys.argv[0]`: If an external server (e.g., Gunicorn) is running the app @@ -41,37 +42,8 @@ class ApiConfig(AppConfig): ): self._ensure_crypto_keys() - # Commands that don't need Neo4j - SKIP_NEO4J_DJANGO_COMMANDS = [ - "makemigrations", - "migrate", - "pgpartition", - "check", - "help", - "showmigrations", - "check_and_fix_socialaccount_sites_migration", - ] - - # Skip eager Neo4j init for tests, some Django commands, and Celery (prefork pool: driver must stay lazy, no post_fork hook) - if getattr(settings, "TESTING", False) or ( - len(sys.argv) > 1 - and ( - ( - "manage.py" in sys.argv[0] - and sys.argv[1] in SKIP_NEO4J_DJANGO_COMMANDS - ) - or "celery" in sys.argv[0] - ) - ): - logger.info( - "Skipping eager Neo4j init: tests, some Django commands, or Celery prefork pool (driver stays lazy)" - ) - - else: - graph_database.init_driver() - - # Neo4j driver is initialized at API startup (see api.attack_paths.database) - # It remains lazy for Celery workers and selected Django commands + # Neo4j driver is created lazily on first use (see api.attack_paths.database). + # App init never contacts Neo4j, so a Neo4j outage cannot block API startup. def _ensure_crypto_keys(self): """ diff --git a/api/src/backend/api/attack_paths/database.py b/api/src/backend/api/attack_paths/database.py index f5fddd0613..d5cc1698a7 100644 --- a/api/src/backend/api/attack_paths/database.py +++ b/api/src/backend/api/attack_paths/database.py @@ -1,22 +1,24 @@ import atexit import logging import threading + from contextlib import contextmanager from typing import Any, Iterator from uuid import UUID import neo4j import neo4j.exceptions + from config.env import env from django.conf import settings + +from api.attack_paths.retryable_session import RetryableSession from tasks.jobs.attack_paths.config import ( BATCH_SIZE, PROVIDER_RESOURCE_LABEL, get_provider_label, ) -from api.attack_paths.retryable_session import RetryableSession - # Without this Celery goes crazy with Neo4j logging logging.getLogger("neo4j").setLevel(logging.ERROR) logging.getLogger("neo4j").propagate = False @@ -28,6 +30,9 @@ READ_QUERY_TIMEOUT_SECONDS = env.int( "ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30 ) MAX_CUSTOM_QUERY_NODES = env.int("ATTACK_PATHS_MAX_CUSTOM_QUERY_NODES", default=250) +# Shorter than CONN_ACQUISITION_TIMEOUT — the driver requires acquisition to be +# the longer of the two (it may include opening a new connection). +CONNECTION_TIMEOUT = env.int("NEO4J_CONNECTION_TIMEOUT", default=5) CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15) READ_EXCEPTION_CODES = [ "Neo.ClientError.Statement.AccessMode", @@ -58,15 +63,24 @@ def init_driver() -> neo4j.Driver: uri = get_uri() config = settings.DATABASES["neo4j"] - _driver = neo4j.GraphDatabase.driver( + driver = neo4j.GraphDatabase.driver( uri, auth=(config["USER"], config["PASSWORD"]), keep_alive=True, max_connection_lifetime=7200, + connection_timeout=CONNECTION_TIMEOUT, connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT, max_connection_pool_size=50, ) - _driver.verify_connectivity() + # Publish the singleton only after connectivity is verified so a + # failed probe does not leave an unverified driver behind. Close the + # driver on failure so a repeatedly-probed outage cannot leak pools. + try: + driver.verify_connectivity() + except Exception: + driver.close() + raise + _driver = driver # Register cleanup handler (only runs once since we're inside the _driver is None block) atexit.register(close_driver) diff --git a/api/src/backend/api/tests/test_apps.py b/api/src/backend/api/tests/test_apps.py index 5889b4e2cb..2f5b55a6e2 100644 --- a/api/src/backend/api/tests/test_apps.py +++ b/api/src/backend/api/tests/test_apps.py @@ -182,23 +182,19 @@ def _make_app(): return ApiConfig("api", api) -def test_ready_initializes_driver_for_api_process(monkeypatch): +@pytest.mark.parametrize( + "argv", + [ + ["gunicorn"], + ["celery", "-A", "api"], + ["manage.py", "migrate"], + ], + ids=["api", "celery", "manage_py"], +) +def test_ready_never_eagerly_initializes_neo4j_driver(monkeypatch, argv): + """ready() must never contact Neo4j; the driver is created lazily on first use.""" config = _make_app() - _set_argv(monkeypatch, ["gunicorn"]) - _set_testing(monkeypatch, False) - - with ( - patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), - patch("api.attack_paths.database.init_driver") as init_driver, - ): - config.ready() - - init_driver.assert_called_once() - - -def test_ready_skips_driver_for_celery(monkeypatch): - config = _make_app() - _set_argv(monkeypatch, ["celery", "-A", "api"]) + _set_argv(monkeypatch, argv) _set_testing(monkeypatch, False) with ( @@ -208,31 +204,3 @@ def test_ready_skips_driver_for_celery(monkeypatch): config.ready() init_driver.assert_not_called() - - -def test_ready_skips_driver_for_manage_py_skip_command(monkeypatch): - config = _make_app() - _set_argv(monkeypatch, ["manage.py", "migrate"]) - _set_testing(monkeypatch, False) - - with ( - patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), - patch("api.attack_paths.database.init_driver") as init_driver, - ): - config.ready() - - init_driver.assert_not_called() - - -def test_ready_skips_driver_when_testing(monkeypatch): - config = _make_app() - _set_argv(monkeypatch, ["gunicorn"]) - _set_testing(monkeypatch, True) - - with ( - patch.object(ApiConfig, "_ensure_crypto_keys", return_value=None), - patch("api.attack_paths.database.init_driver") as init_driver, - ): - config.ready() - - init_driver.assert_not_called() diff --git a/api/src/backend/api/tests/test_attack_paths_database.py b/api/src/backend/api/tests/test_attack_paths_database.py index 8828d23911..3a29a1007d 100644 --- a/api/src/backend/api/tests/test_attack_paths_database.py +++ b/api/src/backend/api/tests/test_attack_paths_database.py @@ -1,15 +1,16 @@ """ Tests for Neo4j database lazy initialization. -The Neo4j driver connects on first use by default. API processes may -eagerly initialize the driver during app startup, while Celery workers -remain lazy. These tests validate the database module behavior itself. +The Neo4j driver is created on first use for every process type; app startup +never contacts Neo4j. These tests validate the database module behavior itself. """ import threading + from unittest.mock import MagicMock, patch import neo4j +import neo4j.exceptions import pytest import api.attack_paths.database as db_module @@ -59,6 +60,32 @@ class TestLazyInitialization: assert result is mock_driver assert db_module._driver is mock_driver + @patch("api.attack_paths.database.settings") + @patch("api.attack_paths.database.neo4j.GraphDatabase.driver") + def test_init_driver_leaves_driver_none_when_verify_fails( + self, mock_driver_factory, mock_settings + ): + """A failed verify_connectivity() must not publish or leak the driver.""" + mock_driver = MagicMock() + mock_driver.verify_connectivity.side_effect = ( + neo4j.exceptions.ServiceUnavailable("down") + ) + mock_driver_factory.return_value = mock_driver + mock_settings.DATABASES = { + "neo4j": { + "HOST": "localhost", + "PORT": 7687, + "USER": "neo4j", + "PASSWORD": "password", + } + } + + with pytest.raises(neo4j.exceptions.ServiceUnavailable): + db_module.init_driver() + + assert db_module._driver is None + mock_driver.close.assert_called_once() + @patch("api.attack_paths.database.settings") @patch("api.attack_paths.database.neo4j.GraphDatabase.driver") def test_init_driver_returns_cached_driver_on_subsequent_calls( @@ -116,21 +143,23 @@ class TestConnectionAcquisitionTimeout: @pytest.fixture(autouse=True) def reset_module_state(self): original_driver = db_module._driver - original_timeout = db_module.CONN_ACQUISITION_TIMEOUT + original_acq_timeout = db_module.CONN_ACQUISITION_TIMEOUT + original_conn_timeout = db_module.CONNECTION_TIMEOUT db_module._driver = None yield db_module._driver = original_driver - db_module.CONN_ACQUISITION_TIMEOUT = original_timeout + db_module.CONN_ACQUISITION_TIMEOUT = original_acq_timeout + db_module.CONNECTION_TIMEOUT = original_conn_timeout @patch("api.attack_paths.database.settings") @patch("api.attack_paths.database.neo4j.GraphDatabase.driver") def test_driver_receives_configured_timeout( self, mock_driver_factory, mock_settings ): - """init_driver() should pass CONN_ACQUISITION_TIMEOUT to the neo4j driver.""" + """init_driver() should pass the configured timeouts to the neo4j driver.""" mock_driver_factory.return_value = MagicMock() mock_settings.DATABASES = { "neo4j": { @@ -141,11 +170,13 @@ class TestConnectionAcquisitionTimeout: } } db_module.CONN_ACQUISITION_TIMEOUT = 42 + db_module.CONNECTION_TIMEOUT = 7 db_module.init_driver() _, kwargs = mock_driver_factory.call_args assert kwargs["connection_acquisition_timeout"] == 42 + assert kwargs["connection_timeout"] == 7 class TestAtexitRegistration: