Compare commits

...

1 Commits

Author SHA1 Message Date
Josema Camacho
69dfed977e feat(api): replace Neo4j with Grafeo as temporary scan database for Attack Paths 2026-03-25 19:19:22 +01:00
11 changed files with 471 additions and 194 deletions

134
api/poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
[[package]]
name = "about-time"
@@ -916,19 +916,6 @@ files = [
{file = "astroid-3.2.4.tar.gz", hash = "sha256:0e14202810b30da1b735827f78f5157be2bbd4a7a59b7707ca0bfc2fb4c0063a"},
]
[[package]]
name = "async-timeout"
version = "5.0.1"
description = "Timeout context manager for asyncio programs"
optional = false
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\""
files = [
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
]
[[package]]
name = "attrs"
version = "25.4.0"
@@ -2296,7 +2283,7 @@ files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
markers = {dev = "sys_platform == \"win32\" or platform_system == \"Windows\""}
markers = {dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}
[[package]]
name = "contextlib2"
@@ -2961,7 +2948,7 @@ files = [
[package.dependencies]
autopep8 = "*"
Django = ">=4.2"
gprof2dot = ">=2017.9.19"
gprof2dot = ">=2017.09.19"
sqlparse = "*"
[[package]]
@@ -3882,6 +3869,45 @@ files = [
{file = "gprof2dot-2025.4.14.tar.gz", hash = "sha256:35743e2d2ca027bf48fa7cba37021aaf4a27beeae1ae8e05a50b55f1f921a6ce"},
]
[[package]]
name = "grafeo"
version = "0.5.25"
description = "A high-performance, embeddable graph database with Python bindings"
optional = false
python-versions = ">=3.12"
groups = ["main"]
files = [
{file = "grafeo-0.5.25-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3b2f78b3b8dbad8322e49a54667d5f0c44c6b731213a25fcc282485c69433f33"},
{file = "grafeo-0.5.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33aa7f2a7b33854ee24023f506c1bb2daadb98e8da50f3b74aab3ece2732ac2f"},
{file = "grafeo-0.5.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24bd3b583c4be1ffadbd0d93084b15af56d9d2a32c2f923647b41badc8350f7c"},
{file = "grafeo-0.5.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aab5054c0cdfb8e0426e702f8e22906065d4cff9156b84a17b5d24f5695a846c"},
{file = "grafeo-0.5.25-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d28723894c49b8b4402dc40da8af89ce0b9d67ab5aba4fa9de7280b4554642f1"},
{file = "grafeo-0.5.25-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ea185c14eddd96d4fec1c6758439f2f640fd60470100b8b0faa2d16a29f1e104"},
{file = "grafeo-0.5.25-cp312-cp312-win32.whl", hash = "sha256:bab1671375a1d8af94a2da676d59fde635022b02a9d29fa073e79ef6c54a34fc"},
{file = "grafeo-0.5.25-cp312-cp312-win_amd64.whl", hash = "sha256:32a97346ed68b5744fb3e8b0fa6ac3ca8fcb5775ee11c71651fb07bb74665d2b"},
{file = "grafeo-0.5.25-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a31cf7581063dc2f957b476d7a22a595dc46b6c6ee30db29214b8c890c89518f"},
{file = "grafeo-0.5.25-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:917b9dde43f7c3b6cf92947fd869ab42fcddf9e963f44936d9d800c2c5127940"},
{file = "grafeo-0.5.25-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a9892368a6779ac035e3957bd256c6ffd83dbb1a4cac5974daa55610c81feee6"},
{file = "grafeo-0.5.25-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4ac78646e1c840e55da2ba3e8f5cff9da96fc590614591fc725abd0c3d118d65"},
{file = "grafeo-0.5.25-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27c0d1b31adbd0b1d519c8540c4b2b4f28dd79e75f3c050c219387a4d2f149d0"},
{file = "grafeo-0.5.25-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:647217e24b4df89fccfa36484e40bb5a586d069248cefcc0745c7a5ba606b5a9"},
{file = "grafeo-0.5.25-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2764d194b3947282bc0499bb8b59d5217366e579d68abf5dc18ca20f40c9fa14"},
{file = "grafeo-0.5.25-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a81c5a18ff1092fccd4f571a909c680b699c975d7b4bc9526e5ed902123c0e8"},
{file = "grafeo-0.5.25-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:539a28b3f381e401e36a5b32382f12bde21e4afc2e56129bc2aa982efd6b7506"},
{file = "grafeo-0.5.25-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5607dff71c39746224d1fb04320b79330d86d98ff9a89e89917c37dd9f991abc"},
{file = "grafeo-0.5.25-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0a85297324ed9c3036f104d509b92342a02c7f03e7a84aeb2b5ee5664af71417"},
{file = "grafeo-0.5.25-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5d466df240c01f22be601d257eccc221f2a5dae39fbdc1d12b1f5ada32fce38"},
{file = "grafeo-0.5.25-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:32d0e9cddb2eae1e512875aa5cd404cd3516ada0810746241a8222ad5f29ee7b"},
{file = "grafeo-0.5.25-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:37f6dfd2501117761058afc5709aaed7cd5e42d05c042fd051b8a8b5dc084553"},
{file = "grafeo-0.5.25.tar.gz", hash = "sha256:fd90cdff4194a00f9c086231e1977ed88e906848a220515b3d2fe872eeb9ace8"},
]
[package.dependencies]
solvor = ">=0.6.1"
[package.extras]
dev = ["pytest (>=9)", "pytest-asyncio (>=1.3)", "ruff (>=0.15)", "ty (>=0.0.24)"]
[[package]]
name = "graphemeu"
version = "0.7.2"
@@ -4569,7 +4595,7 @@ files = [
[package.dependencies]
attrs = ">=22.2.0"
jsonschema-specifications = ">=2023.3.6"
jsonschema-specifications = ">=2023.03.6"
referencing = ">=0.28.4"
rpds-py = ">=0.7.1"
@@ -4777,7 +4803,7 @@ librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""]
mongodb = ["pymongo (==4.15.3)"]
msgpack = ["msgpack (==1.1.2)"]
pyro = ["pyro4 (==4.82)"]
qpid = ["qpid-python (==1.36.0.post1)", "qpid-tools (==1.36.0.post1)"]
qpid = ["qpid-python (==1.36.0-1)", "qpid-tools (==1.36.0-1)"]
redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2,<6.5)"]
slmq = ["softlayer_messaging (>=1.0.3)"]
sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"]
@@ -4798,7 +4824,7 @@ files = [
]
[package.dependencies]
certifi = ">=14.5.14"
certifi = ">=14.05.14"
durationpy = ">=0.7"
google-auth = ">=1.0.1"
oauthlib = ">=3.2.2"
@@ -6180,10 +6206,7 @@ files = [
]
[package.dependencies]
numpy = [
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
]
numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""}
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
tzdata = ">=2022.7"
@@ -7161,12 +7184,9 @@ files = [
]
[package.dependencies]
astroid = ">=3.2.2,<=3.3.0.dev0"
astroid = ">=3.2.2,<=3.3.0-dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
{version = ">=0.3.7", markers = "python_version >= \"3.12\""},
{version = ">=0.3.6", markers = "python_version == \"3.11\""},
]
dill = {version = ">=0.3.7", markers = "python_version >= \"3.12\""}
isort = ">=4.2.5,<5.13.0 || >5.13.0,<6"
mccabe = ">=0.6,<0.8"
platformdirs = ">=2.2.0"
@@ -7588,7 +7608,7 @@ files = [
{file = "pywin32-311-cp39-cp39-win_amd64.whl", hash = "sha256:e0c4cfb0621281fe40387df582097fd796e80430597cb9944f0ae70447bacd91"},
{file = "pywin32-311-cp39-cp39-win_arm64.whl", hash = "sha256:62ea666235135fee79bb154e695f3ff67370afefd71bd7fea7512fc70ef31e3d"},
]
markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\""}
markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\"", dev = "sys_platform == \"win32\""}
[[package]]
name = "pyyaml"
@@ -7685,9 +7705,6 @@ files = [
{file = "redis-7.1.0.tar.gz", hash = "sha256:b1cc3cfa5a2cb9c2ab3ba700864fb0ad75617b41f01352ce5779dabf6d5f9c3c"},
]
[package.dependencies]
async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""}
[package.extras]
circuit-breaker = ["pybreaker (>=1.4.0)"]
hiredis = ["hiredis (>=3.2.0)"]
@@ -8174,10 +8191,10 @@ files = [
]
[package.dependencies]
botocore = ">=1.37.4,<2.0a0"
botocore = ">=1.37.4,<2.0a.0"
[package.extras]
crt = ["botocore[crt] (>=1.37.4,<2.0a0)"]
crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
[[package]]
name = "safety"
@@ -8436,6 +8453,48 @@ files = [
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
name = "solvor"
version = "0.6.1"
description = "Solvor all your optimization needs."
optional = false
python-versions = ">=3.12"
groups = ["main"]
files = [
{file = "solvor-0.6.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2cdd87a9fc28989ce0d8d7872105bc86428605311bc587d9e4e037548c3fe8de"},
{file = "solvor-0.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7aa1f16f2a4cf2ab77ab2b6f3a208b6f3a11efd2306bb2bc7661cf711fdd2056"},
{file = "solvor-0.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248a6e043d36f9faaa0155b7a28701c01fa0a9fe286b1959142a2c76c44f8670"},
{file = "solvor-0.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4657a2e6b4d201c8af7cc15a62db2a9b372aff23cc72ca276ad67a15893fdd75"},
{file = "solvor-0.6.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8426f34c996997bdcd792425420c3f9cccf88e23aa56c569faa94145a5003d84"},
{file = "solvor-0.6.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6acf4abdddf8a1f6ab2aaf9b528970b9cf0be0d136848761e829c577294d788e"},
{file = "solvor-0.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:d6c299d7ba7b10e8f7bcd61d8ad9664e0551e90ab3f3344da0b6382e1c6daec6"},
{file = "solvor-0.6.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:aa85afc3272aed7d2c5a725fe27c4c7bd050ae338b886e826e73b85c4107ca01"},
{file = "solvor-0.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4f999ef79ce456895b3f384360e07e8e68453decaedcbed71f4abf6e709ceac8"},
{file = "solvor-0.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:054897097e1727859ece528410ad9ac90878a862e2194afc3f48243b997de0e1"},
{file = "solvor-0.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9cb0f28f00ceb25ff7bf8db97b3f0cdf07d2a08cfaef4294cdfad845a44afde"},
{file = "solvor-0.6.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7dccb766351e7640c4c0af1b0b407a078231fb220ddcac6d5a5e8b69d406e0f8"},
{file = "solvor-0.6.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8d6af66c1a2c549c5cd9660ab8842232b4f8709355cc28c4bf304239aa5ee94a"},
{file = "solvor-0.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:85669d0f7289fa206d85345a0f02a1cfe8f0c589f4500a8e7ea3e6c1cbf39f80"},
{file = "solvor-0.6.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a6d5d81de9f8e58506c918ed1a4e72f06fb76ed0cda2161545b18433a09f72"},
{file = "solvor-0.6.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6f1f1efe6e76d69985165a08626cceea7deeec1617a77ca8dc254da099faab50"},
{file = "solvor-0.6.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:85624d5d421845083f877b29b2ec3e26bf09ed94d2db4089cc3331b2423bd2f6"},
{file = "solvor-0.6.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3e380235ecaffe2d694bd61c78b9f27a5df75840a63338c33b7769838a2cfcde"},
{file = "solvor-0.6.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0c3c32be778c02b9868254e6d8d3c4cccce9844fdd06330ed0bd5b7049a76b0e"},
{file = "solvor-0.6.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1fd6ef33e65f454215cb44754a2f4efc2d62c4078e4f64a6c99b4e16f7394c0"},
{file = "solvor-0.6.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a18a106c8cfee5c5ecd561f4db0f6c2037fb933ec7e3ce368a05c559eead0154"},
{file = "solvor-0.6.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f0c3f0b1c31f01129c66298fb0669b4ae47814e4beb06e902db6c3bdbb4969f3"},
{file = "solvor-0.6.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bd022df7d942302c6a012f6074432f4d284077ae9ab092578b0721e6e1a82df6"},
{file = "solvor-0.6.1-cp314-cp314-win_amd64.whl", hash = "sha256:1b24b9e055567376a52e56db24c28e54a30fdfd6b95c5978d24f084903fd6e7b"},
{file = "solvor-0.6.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4061530ec39bf4383c0a0079d13593dae90a61d68efd487c6b600554b75b2674"},
{file = "solvor-0.6.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2d1b219dce482af186e1f7a291515281c7451acfc51859eb725a23d9b88e2354"},
{file = "solvor-0.6.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0d5545c57c0f9460f5b1dcbf579b0651850e2e7b7b075eabab8b1229af5d132d"},
{file = "solvor-0.6.1.tar.gz", hash = "sha256:c8cfa4d9ba6e60f05ce7dff4737740d17e1efa3f3b52b2f0cf14b33b3160db94"},
]
[package.extras]
dev = ["maturin (>=1.7,<2.0)", "pre-commit (>=4.5.1)", "prek (>=0.3.1)", "pytest (>=9.0.2)", "pytest-cov (>=7.0.0)", "ruff (>=0.14.14)", "ty (>=0.0.14)"]
docs = ["mkdocs (>=1.6)", "mkdocs-git-revision-date-localized-plugin (>=1.5.1)", "mkdocs-material (>=9.5)", "mkdocstrings[python] (>=1.0.2)"]
[[package]]
name = "sqlparse"
version = "0.5.5"
@@ -8605,9 +8664,6 @@ files = [
{file = "types_aiobotocore_ecr-3.1.1.tar.gz", hash = "sha256:155edc63c612e1a7861fa746376a5143cc4f3ca05b60c27d68ced23e8567a344"},
]
[package.dependencies]
typing-extensions = {version = "*", markers = "python_version < \"3.12\""}
[[package]]
name = "typing-extensions"
version = "4.15.0"
@@ -9371,5 +9427,5 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.11,<3.13"
content-hash = "167d4549788b8bc8bb7772b9a81ade1eab73d8f354251a8d6af4901223cc7f67"
python-versions = ">=3.12,<3.13"
content-hash = "bd1941ce2a04c8460bad0949cd951f19d6cd270bbe816622ca77ba20ec23ae08"

View File

@@ -11,33 +11,34 @@ dependencies = [
"django-allauth[saml] (==65.15.0)",
"django-celery-beat (==2.9.0)",
"django-celery-results (==2.6.0)",
"django-cors-headers==4.4.0",
"django-environ==0.11.2",
"django-filter==24.3",
"django-guid==3.5.0",
"django-cors-headers (==4.4.0)",
"django-environ (==0.11.2)",
"django-filter (==24.3)",
"django-guid (==3.5.0)",
"django-postgres-extra (==2.0.9)",
"djangorestframework==3.15.2",
"djangorestframework-jsonapi==7.0.2",
"djangorestframework (==3.15.2)",
"djangorestframework-jsonapi (==7.0.2)",
"djangorestframework-simplejwt (==5.5.1)",
"drf-nested-routers (==0.95.0)",
"drf-spectacular==0.27.2",
"drf-spectacular-jsonapi==0.5.1",
"defusedxml==0.7.1",
"gunicorn==23.0.0",
"lxml==5.3.2",
"drf-spectacular (==0.27.2)",
"drf-spectacular-jsonapi (==0.5.1)",
"defusedxml (==0.7.1)",
"gunicorn (==23.0.0)",
"lxml (==5.3.2)",
"prowler @ git+https://github.com/prowler-cloud/prowler.git@master",
"psycopg2-binary==2.9.9",
"psycopg2-binary (==2.9.9)",
"pytest-celery[redis] (==1.3.0)",
"sentry-sdk[django] (==2.56.0)",
"uuid6==2024.7.10",
"uuid6 (==2024.7.10)",
"openai (==1.109.1)",
"xmlsec==1.3.14",
"xmlsec (==1.3.14)",
"h2 (==4.3.0)",
"markdown (==3.10.2)",
"drf-simple-apikey (==2.2.1)",
"matplotlib (==3.10.8)",
"reportlab (==4.4.10)",
"neo4j (==6.1.0)",
"grafeo (==0.5.25)",
"cartography (==0.132.0)",
"gevent (==25.9.1)",
"werkzeug (==3.1.7)",
@@ -49,7 +50,7 @@ license = "Apache-2.0"
name = "prowler-api"
package-mode = false
# Needed for the SDK compatibility
requires-python = ">=3.11,<3.13"
requires-python = ">=3.12,<3.13"
version = "1.24.0"
[project.scripts]

View File

@@ -0,0 +1,148 @@
"""
Local embedded graph database for temporary scan databases.
Mirrors `database.py`'s public API (`get_session`, `create_database`, `drop_database`).
Uses Grafeo as the embedded graph engine. Grafeo is an implementation detail;
if it gets swapped for something else, only this file changes.
"""
import logging
import os
import shutil
import tempfile
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Callable, Iterator, cast
import neo4j
from grafeo import GrafeoDB
logger = logging.getLogger(__name__)
# Result / Record wrappers
class _Record:
"""Wraps a dict to behave like a `neo4j.Record`."""
def __init__(self, data: dict[str, Any]):
self._data = data
def __getitem__(self, key: str) -> Any:
return self._data[key]
def get(self, key: str, default: Any = None) -> Any:
return self._data.get(key, default)
def data(self) -> dict[str, Any]:
return self._data
def value(self) -> Any:
return next(iter(self._data.values())) if self._data else None
def values(self) -> tuple:
return tuple(self._data.values())
def keys(self) -> list[str]:
return list(self._data.keys())
class _Result:
"""Wraps Grafeo query results to satisfy `GraphResult` protocol."""
def __init__(self, records: list[_Record]):
self._records = records
def __iter__(self) -> Iterator[_Record]:
return iter(self._records)
def single(self) -> _Record | None:
return self._records[0] if self._records else None
def values(self) -> list[tuple]:
return [r.values() for r in self._records]
def consume(self) -> None:
pass
# Session adapter
class _Session:
"""
Wraps GrafeoDB to behave like a `neo4j.Session`.
All Cypher passes through to `execute_cypher()`.
Cartography calls `session.execute_write(tx_func, args, kwargs)` where
`tx_func` receives the session as first arg and calls `.run()` on it.
That's why `execute_write` just calls `fn(self, ...)`.
"""
def __init__(self, db: GrafeoDB):
self._db = db
def run(self, query: str, parameters: dict[str, Any] | None = None, **kwargs: Any) -> neo4j.Result:
parameters = {**(parameters or {}), **kwargs}
try:
raw = list(self._db.execute_cypher(query, parameters))
except RuntimeError:
logger.error("Grafeo query failed:\n%s\nParameters: %s", query, list(parameters.keys()))
raise
records = [_Record(row) for row in raw]
return cast(neo4j.Result, _Result(records))
def execute_write(self, fn: Callable, *args: Any, **kwargs: Any) -> Any:
return fn(self, *args, **kwargs)
def execute_read(self, fn: Callable, *args: Any, **kwargs: Any) -> Any:
return fn(self, *args, **kwargs)
def close(self) -> None:
pass
# Public API (mirrors database.py)
@contextmanager
def get_session(database_name: str) -> Iterator[neo4j.Session]:
"""Open a session to a local embedded graph database.
Returns a neo4j.Session-typed handle so callers (including Cartography,
which expects neo4j.Session) can use it directly without casting.
"""
path = _resolve_path(database_name)
db = GrafeoDB(path)
session = _Session(db)
try:
yield cast(neo4j.Session, session)
finally:
session.close()
def create_database(database_name: str) -> None:
"""Create an empty local graph database."""
path = _resolve_path(database_name)
GrafeoDB(path)
def drop_database(database_name: str) -> None:
"""Delete a local graph database."""
path = _resolve_path(database_name)
target = Path(path)
if target.is_dir():
shutil.rmtree(target)
elif target.exists():
target.unlink()
# Internal
def _resolve_path(database_name: str) -> str:
return os.path.join(tempfile.gettempdir(), "prowler-attack-paths", database_name)

View File

@@ -7,7 +7,6 @@ from tasks.jobs.attack_paths.config import PROVIDER_ID_PROPERTY, PROWLER_FINDING
# Custom Attack Path Queries
# --------------------------
AWS_INTERNET_EXPOSED_EC2_SENSITIVE_S3_ACCESS = AttackPathsQueryDefinition(
id="aws-internet-exposed-ec2-sensitive-s3-access",
@@ -62,7 +61,6 @@ AWS_INTERNET_EXPOSED_EC2_SENSITIVE_S3_ACCESS = AttackPathsQueryDefinition(
# Basic Resource Queries
# ----------------------
AWS_RDS_INSTANCES = AttackPathsQueryDefinition(
id="aws-rds-instances",
@@ -206,7 +204,6 @@ AWS_IAM_STATEMENTS_ALLOW_CREATE_ACTIONS = AttackPathsQueryDefinition(
# Network Exposure Queries
# ------------------------
AWS_EC2_INSTANCES_INTERNET_EXPOSED = AttackPathsQueryDefinition(
id="aws-ec2-instances-internet-exposed",
@@ -356,7 +353,6 @@ AWS_PUBLIC_IP_RESOURCE_LOOKUP = AttackPathsQueryDefinition(
# Privilege Escalation Queries (based on pathfinding.cloud research)
# https://github.com/DataDog/pathfinding.cloud
# -------------------------------------------------------------------
# APPRUNNER-001
AWS_APPRUNNER_PRIVESC_PASSROLE_CREATE_SERVICE = AttackPathsQueryDefinition(
@@ -3739,7 +3735,6 @@ AWS_STS_PRIVESC_ASSUME_ROLE = AttackPathsQueryDefinition(
)
# AWS Queries List
# ----------------
AWS_QUERIES: list[AttackPathsQueryDefinition] = [
AWS_INTERNET_EXPOSED_EC2_SENSITIVE_S3_ACCESS,

View File

@@ -40,7 +40,6 @@ class ProviderConfig:
# Provider Configurations
# -----------------------
AWS_CONFIG = ProviderConfig(
name="aws",
@@ -86,7 +85,6 @@ INTERNAL_PROPERTIES: list[str] = [
# Provider Config Accessors
# -------------------------
def is_provider_available(provider_type: str) -> bool:
@@ -119,7 +117,6 @@ def get_provider_resource_label(provider_type: str) -> str:
# Dynamic Isolation Label Helpers
# --------------------------------
def _normalize_uuid(value: str | UUID) -> str:

View File

@@ -26,6 +26,7 @@ from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
from tasks.jobs.attack_paths.queries import (
ADD_RESOURCE_LABEL_TEMPLATE,
CLEANUP_FINDINGS_TEMPLATE,
COUNT_UNLABELED_TEMPLATE,
INSERT_FINDING_TEMPLATE,
render_cypher_template,
)
@@ -81,7 +82,6 @@ def _to_neo4j_dict(record: dict[str, Any], resource_uid: str) -> dict[str, Any]:
# Public API
# ----------
def create_findings_indexes(neo4j_session: neo4j.Session) -> None:
@@ -110,45 +110,41 @@ def analysis(
def add_resource_label(
neo4j_session: neo4j.Session, provider_type: str, provider_uid: str
) -> int:
) -> None:
"""
Add a common resource label to all nodes connected to the provider account.
This enables index usage for resource lookups in the findings query,
since Cartography nodes don't have a common parent label.
Returns the total number of nodes labeled.
"""
query = render_cypher_template(
ADD_RESOURCE_LABEL_TEMPLATE,
{
"__ROOT_LABEL__": get_root_node_label(provider_type),
"__RESOURCE_LABEL__": get_provider_resource_label(provider_type),
},
)
replacements = {
"__ROOT_LABEL__": get_root_node_label(provider_type),
"__RESOURCE_LABEL__": get_provider_resource_label(provider_type),
}
set_query = render_cypher_template(ADD_RESOURCE_LABEL_TEMPLATE, replacements)
count_query = render_cypher_template(COUNT_UNLABELED_TEMPLATE, replacements)
logger.info(
f"Adding {get_provider_resource_label(provider_type)} label to all resources for {provider_uid}"
)
total_labeled = 0
labeled_count = 1
remaining = 1
while labeled_count > 0:
result = neo4j_session.run(
query,
while remaining > 0:
neo4j_session.run(
set_query,
{"provider_uid": provider_uid, "batch_size": BATCH_SIZE},
)
labeled_count = result.single().get("labeled_count", 0)
total_labeled += labeled_count
result = neo4j_session.run(count_query, {"provider_uid": provider_uid})
remaining = result.single().get("remaining", 0)
total_labeled += BATCH_SIZE
if labeled_count > 0:
if remaining > 0:
logger.info(
f"Labeled {total_labeled} nodes with {get_provider_resource_label(provider_type)}"
f"Labeled batch, {remaining} nodes remaining for {get_provider_resource_label(provider_type)}"
)
return total_labeled
def load_findings(
neo4j_session: neo4j.Session,
@@ -213,7 +209,6 @@ def cleanup_findings(
# Findings Streaming (Generator-based)
# -------------------------------------
def stream_findings_with_resources(
@@ -289,7 +284,6 @@ def _fetch_findings_batch(
# Batch Enrichment
# -----------------
def _enrich_batch_with_resources(

View File

@@ -4,7 +4,6 @@ import neo4j
from cartography.client.core.tx import run_write_query
from celery.utils.log import get_task_logger
from tasks.jobs.attack_paths.config import (
INTERNET_NODE_LABEL,
PROWLER_FINDING_LABEL,
@@ -67,3 +66,36 @@ def create_all_indexes(neo4j_session: neo4j.Session) -> None:
"""Create all indexes (both findings and sync)."""
create_indexes(neo4j_session, IndexType.FINDINGS)
create_indexes(neo4j_session, IndexType.SYNC)
# Local (embedded) database indexes
# Grafeo indexes are property-level (not label-scoped like Neo4j).
# This list combines Cartography and Prowler properties used in queries.
LOCAL_DB_INDEXED_PROPERTIES = [
# Cartography (from cartography/data/indexes.cypher)
"id",
"lastupdated",
"arn",
"name",
"email",
"key",
"ip",
"dnsname",
"keyfingerprint",
"cve_id",
"host_info_local_ip",
# Prowler findings
"provider_uid",
"status",
]
def create_local_indexes(database_name: str) -> None:
"""Create property indexes on a local embedded graph database."""
from api.attack_paths.local_database import _resolve_path
from grafeo import GrafeoDB
db = GrafeoDB(_resolve_path(database_name))
for prop in LOCAL_DB_INDEXED_PROPERTIES:
db.create_property_index(prop)

View File

@@ -22,14 +22,18 @@ def render_cypher_template(template: str, replacements: dict[str, str]) -> str:
# Findings queries (used by findings.py)
# ---------------------------------------
ADD_RESOURCE_LABEL_TEMPLATE = """
MATCH (account:__ROOT_LABEL__ {id: $provider_uid})-->(r)
WHERE NOT r:__ROOT_LABEL__ AND NOT r:__RESOURCE_LABEL__
WITH r LIMIT $batch_size
SET r:__RESOURCE_LABEL__
RETURN COUNT(r) AS labeled_count
"""
COUNT_UNLABELED_TEMPLATE = """
MATCH (account:__ROOT_LABEL__ {id: $provider_uid})-->(r)
WHERE NOT r:__ROOT_LABEL__ AND NOT r:__RESOURCE_LABEL__
RETURN count(r) AS remaining
"""
INSERT_FINDING_TEMPLATE = f"""
@@ -95,7 +99,6 @@ CLEANUP_FINDINGS_TEMPLATE = f"""
"""
# Internet queries (used by internet.py)
# ---------------------------------------
CREATE_INTERNET_NODE = f"""
MERGE (internet:{INTERNET_NODE_LABEL} {{id: 'Internet'}})
@@ -126,11 +129,10 @@ CREATE_CAN_ACCESS_RELATIONSHIPS_TEMPLATE = f"""
"""
# Sync queries (used by sync.py)
# -------------------------------
NODE_FETCH_QUERY = """
MATCH (n)
WHERE id(n) > $last_id
WHERE id(n) > $last_id AND size(labels(n)) > 0
RETURN id(n) AS internal_id,
elementId(n) AS element_id,
labels(n) AS labels,
@@ -140,12 +142,12 @@ NODE_FETCH_QUERY = """
"""
RELATIONSHIPS_FETCH_QUERY = """
MATCH ()-[r]->()
MATCH (s)-[r]->(t)
WHERE id(r) > $last_id
RETURN id(r) AS internal_id,
type(r) AS rel_type,
elementId(startNode(r)) AS start_element_id,
elementId(endNode(r)) AS end_element_id,
elementId(s) AS start_element_id,
elementId(t) AS end_element_id,
properties(r) AS props
ORDER BY internal_id
LIMIT $batch_size

View File

@@ -2,13 +2,13 @@
Attack Paths scan orchestrator.
Runs the full scan lifecycle for a single provider, called from a Celery task.
The idea is simple: ingest everything into a throwaway Neo4j database, enrich
it with Prowler-specific data, then swap it into the tenant's long-lived
database so queries never see a half-built graph.
The idea is simple: ingest everything into a throwaway embedded database,
enrich it with Prowler-specific data, then swap it into the tenant's
long-lived Neo4j database so queries never see a half-built graph.
Two databases are involved:
- Temporary (db-tmp-scan-<attack_paths_scan_id>): short-lived, single-provider, dropped after sync.
- Tenant (db-tenant-<tenant_uuid>): long-lived, multi-provider, what the API queries against.
- Temporary (local embedded via local_database): short-lived, single-provider, deleted after sync.
- Tenant (db-tenant-<tenant_uuid> on Neo4j): long-lived, multi-provider, what the API queries against.
Pipeline steps:
@@ -16,7 +16,7 @@ Pipeline steps:
Retrieve or create the AttackPathsScan row. Exit early if the provider
type has no ingestion function (only AWS is supported today).
2. Create a fresh temporary Neo4j database and set up Cartography indexes
2. Create a fresh temporary embedded database and set up Cartography indexes
plus ProwlerFinding indexes before writing any data.
3. Run the provider-specific Cartography ingestion (e.g. aws.start_aws_ingestion).
@@ -46,9 +46,9 @@ Pipeline steps:
properties for multi-provider isolation.
- Set graph_data_ready back to True.
8. Drop the temporary database, mark the AttackPathsScan as COMPLETED.
8. Delete the temporary database, mark the AttackPathsScan as COMPLETED.
On failure the temp database is dropped, the scan is marked FAILED, and the
On failure the temp database is deleted, the scan is marked FAILED, and the
exception propagates to Celery.
"""
@@ -64,8 +64,10 @@ from cartography.intel import ontology as cartography_ontology
from celery.utils.log import get_task_logger
from tasks.jobs.attack_paths import db_utils, findings, internet, sync, utils
from tasks.jobs.attack_paths.config import get_cartography_ingestion_function
from tasks.jobs.attack_paths.indexes import create_local_indexes
from api.attack_paths import database as graph_database
from api.attack_paths import local_database as local_graph_database
from api.db_utils import rls_transaction
from api.models import Provider as ProwlerAPIProvider
from api.models import StateChoices
@@ -150,22 +152,18 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
try:
logger.info(
f"Creating Neo4j database {tmp_cartography_config.neo4j_database} for tenant {prowler_api_provider.tenant_id}"
f"Creating temporary database {tmp_database_name} for tenant {prowler_api_provider.tenant_id}"
)
graph_database.create_database(tmp_cartography_config.neo4j_database)
local_graph_database.create_database(tmp_database_name)
create_local_indexes(tmp_database_name)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 1)
logger.info(
f"Starting Cartography ({attack_paths_scan.id}) for "
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
with graph_database.get_session(
tmp_cartography_config.neo4j_database
) as tmp_neo4j_session:
# Indexes creation
cartography_create_indexes.run(tmp_neo4j_session, tmp_cartography_config)
findings.create_findings_indexes(tmp_neo4j_session)
with local_graph_database.get_session(tmp_database_name) as tmp_neo4j_session:
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 2)
# The real scan, where iterates over cloud services
@@ -195,9 +193,7 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
logger.info(
f"Creating Internet graph for AWS account {prowler_api_provider.uid}"
)
internet.analysis(
tmp_neo4j_session, prowler_api_provider, tmp_cartography_config
)
internet.analysis(tmp_neo4j_session, prowler_api_provider, tmp_cartography_config)
# Adding Prowler Finding nodes and relationships
logger.info(
@@ -208,11 +204,6 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
)
db_utils.update_attack_paths_scan_progress(attack_paths_scan, 97)
logger.info(
f"Clearing Neo4j cache for database {tmp_cartography_config.neo4j_database}"
)
graph_database.clear_cache(tmp_cartography_config.neo4j_database)
logger.info(
f"Ensuring tenant database {tenant_database_name}, and its indexes, exists for tenant {prowler_api_provider.tenant_id}"
)
@@ -255,8 +246,8 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
f"{prowler_api_provider.provider.upper()} provider {prowler_api_provider.id}"
)
logger.info(f"Dropping temporary Neo4j database {tmp_database_name}")
graph_database.drop_database(tmp_database_name)
logger.info(f"Dropping temporary database {tmp_database_name}")
local_graph_database.drop_database(tmp_database_name)
db_utils.finish_attack_paths_scan(
attack_paths_scan, StateChoices.COMPLETED, ingestion_exceptions
@@ -285,11 +276,11 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
# Dropping the temporary database if it still exists
try:
graph_database.drop_database(tmp_cartography_config.neo4j_database)
local_graph_database.drop_database(tmp_database_name)
except Exception as e:
logger.error(
f"Failed to drop temporary Neo4j database `{tmp_cartography_config.neo4j_database}` during cleanup: {e}",
f"Failed to drop temporary database `{tmp_database_name}` during cleanup: {e}",
exc_info=True,
)

View File

@@ -12,6 +12,7 @@ import neo4j
from celery.utils.log import get_task_logger
from api.attack_paths import database as graph_database
from api.attack_paths import local_database as local_graph_database
from tasks.jobs.attack_paths.config import (
PROVIDER_ISOLATION_PROPERTIES,
PROVIDER_RESOURCE_LABEL,
@@ -85,7 +86,7 @@ def sync_nodes(
Also adds dynamic `_Tenant_{id}` and `_Provider_{id}` isolation labels.
Source and target sessions are opened sequentially per batch to avoid
holding two Bolt connections simultaneously for the entire sync duration.
holding connections for the entire sync duration.
"""
last_id = -1
total_synced = 0
@@ -94,7 +95,7 @@ def sync_nodes(
grouped: dict[tuple[str, ...], list[dict[str, Any]]] = defaultdict(list)
batch_count = 0
with graph_database.get_session(source_database) as source_session:
with local_graph_database.get_session(source_database) as source_session:
result = source_session.run(
NODE_FETCH_QUERY,
{"last_id": last_id, "batch_size": SYNC_BATCH_SIZE},
@@ -146,7 +147,7 @@ def sync_relationships(
Adds `_provider_id` property to all relationships.
Source and target sessions are opened sequentially per batch to avoid
holding two Bolt connections simultaneously for the entire sync duration.
holding connections for the entire sync duration.
"""
last_id = -1
total_synced = 0
@@ -155,7 +156,7 @@ def sync_relationships(
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
batch_count = 0
with graph_database.get_session(source_database) as source_session:
with local_graph_database.get_session(source_database) as source_session:
result = source_session.run(
RELATIONSHIPS_FETCH_QUERY,
{"last_id": last_id, "batch_size": SYNC_BATCH_SIZE},

View File

@@ -24,7 +24,7 @@ from prowler.lib.check.models import Severity
@pytest.mark.django_db
class TestAttackPathsRun:
# Patching with decorators as we got a `SyntaxError: too many statically nested blocks` error if we use context managers
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.local_database.drop_database")
@patch(
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
@@ -45,6 +45,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
@@ -61,6 +62,7 @@ class TestAttackPathsRun:
self,
mock_init_provider,
mock_get_uri,
mock_local_create_db,
mock_create_db,
mock_clear_cache,
mock_cartography_indexes,
@@ -110,10 +112,14 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
side_effect=["db-scan-id", "tenant-db"],
) as mock_get_db_name,
patch(
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
) as mock_get_session,
),
patch(
"tasks.jobs.attack_paths.scan.db_utils.retrieve_attack_paths_scan",
return_value=attack_paths_scan,
@@ -134,12 +140,11 @@ class TestAttackPathsRun:
[call(attack_paths_scan.id, temporary=True), call(provider.tenant_id)]
)
mock_create_db.assert_has_calls([call("db-scan-id"), call("tenant-db")])
mock_get_session.assert_has_calls([call("db-scan-id"), call("tenant-db")])
assert mock_cartography_indexes.call_count == 2
mock_findings_indexes.assert_has_calls([call(mock_session), call(mock_session)])
mock_local_create_db.assert_called_once_with("db-scan-id")
mock_create_db.assert_called_once_with("tenant-db")
mock_cartography_indexes.assert_called_once()
mock_findings_indexes.assert_called_once_with(mock_session)
mock_sync_indexes.assert_called_once_with(mock_session)
# These use tmp_cartography_config (neo4j_database="db-scan-id")
mock_cartography_analysis.assert_called_once()
mock_cartography_ontology.assert_called_once()
mock_internet_analysis.assert_called_once()
@@ -178,7 +183,7 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.local_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@@ -189,7 +194,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
return_value="db-scan-id",
@@ -208,7 +213,7 @@ class TestAttackPathsRun:
mock_init_provider,
mock_get_uri,
mock_get_db_name,
mock_create_db,
mock_local_create_db,
mock_cartography_indexes,
mock_cartography_analysis,
mock_findings_indexes,
@@ -249,7 +254,7 @@ class TestAttackPathsRun:
with (
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
@@ -277,7 +282,7 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.local_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@@ -288,7 +293,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
return_value="db-scan-id",
@@ -307,7 +312,7 @@ class TestAttackPathsRun:
mock_init_provider,
mock_get_uri,
mock_get_db_name,
mock_create_db,
mock_local_create_db,
mock_cartography_indexes,
mock_cartography_analysis,
mock_findings_indexes,
@@ -350,7 +355,7 @@ class TestAttackPathsRun:
with (
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
@@ -378,8 +383,8 @@ class TestAttackPathsRun:
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch(
"tasks.jobs.attack_paths.scan.graph_database.drop_database",
side_effect=ConnectionError("neo4j down"),
"tasks.jobs.attack_paths.scan.local_database.drop_database",
side_effect=ConnectionError("cleanup failed"),
)
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_graph_data_ready")
@@ -391,7 +396,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.findings.create_findings_indexes")
@patch("tasks.jobs.attack_paths.scan.cartography_analysis.run")
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
return_value="db-scan-id",
@@ -410,7 +415,7 @@ class TestAttackPathsRun:
mock_init_provider,
mock_get_uri,
mock_get_db_name,
mock_create_db,
mock_local_create_db,
mock_cartography_indexes,
mock_cartography_analysis,
mock_findings_indexes,
@@ -451,7 +456,7 @@ class TestAttackPathsRun:
with (
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
@@ -479,7 +484,7 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.local_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@@ -499,6 +504,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
@@ -515,6 +521,7 @@ class TestAttackPathsRun:
self,
mock_init_provider,
mock_get_uri,
mock_local_create_db,
mock_create_db,
mock_clear_cache,
mock_cartography_indexes,
@@ -563,6 +570,10 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
side_effect=["db-scan-id", "tenant-db"],
),
patch(
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
@@ -592,7 +603,7 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.local_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@@ -612,6 +623,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
@@ -628,6 +640,7 @@ class TestAttackPathsRun:
self,
mock_init_provider,
mock_get_uri,
mock_local_create_db,
mock_create_db,
mock_clear_cache,
mock_cartography_indexes,
@@ -676,6 +689,10 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
side_effect=["db-scan-id", "tenant-db"],
),
patch(
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
@@ -705,7 +722,7 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.local_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch(
"tasks.jobs.attack_paths.scan.db_utils.set_graph_data_ready",
@@ -725,6 +742,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
@@ -741,6 +759,7 @@ class TestAttackPathsRun:
self,
mock_init_provider,
mock_get_uri,
mock_local_create_db,
mock_create_db,
mock_clear_cache,
mock_cartography_indexes,
@@ -789,6 +808,10 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
side_effect=["db-scan-id", "tenant-db"],
),
patch(
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
@@ -823,7 +846,7 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.utils.call_within_event_loop",
side_effect=lambda fn, *a, **kw: fn(*a, **kw),
)
@patch("tasks.jobs.attack_paths.scan.graph_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.local_database.drop_database")
@patch("tasks.jobs.attack_paths.scan.db_utils.finish_attack_paths_scan")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_graph_data_ready")
@patch("tasks.jobs.attack_paths.scan.db_utils.set_provider_graph_data_ready")
@@ -843,6 +866,7 @@ class TestAttackPathsRun:
@patch("tasks.jobs.attack_paths.scan.cartography_create_indexes.run")
@patch("tasks.jobs.attack_paths.scan.graph_database.clear_cache")
@patch("tasks.jobs.attack_paths.scan.graph_database.create_database")
@patch("tasks.jobs.attack_paths.scan.local_database.create_database")
@patch(
"tasks.jobs.attack_paths.scan.graph_database.get_uri",
return_value="bolt://neo4j",
@@ -859,6 +883,7 @@ class TestAttackPathsRun:
self,
mock_init_provider,
mock_get_uri,
mock_local_create_db,
mock_create_db,
mock_clear_cache,
mock_cartography_indexes,
@@ -913,6 +938,10 @@ class TestAttackPathsRun:
"tasks.jobs.attack_paths.scan.graph_database.get_database_name",
side_effect=["db-scan-id", "tenant-db"],
),
patch(
"tasks.jobs.attack_paths.scan.local_database.get_session",
return_value=session_ctx,
),
patch(
"tasks.jobs.attack_paths.scan.graph_database.get_session",
return_value=session_ctx,
@@ -1727,19 +1756,25 @@ class TestAddResourceLabel:
def test_add_resource_label_applies_private_label(self):
mock_session = MagicMock()
first_result = MagicMock()
first_result.single.return_value = {"labeled_count": 5}
second_result = MagicMock()
second_result.single.return_value = {"labeled_count": 0}
mock_session.run.side_effect = [first_result, second_result]
# SET query returns are ignored; COUNT queries return remaining
set_result = MagicMock()
count_result_more = MagicMock()
count_result_more.single.return_value = {"remaining": 5}
count_result_done = MagicMock()
count_result_done.single.return_value = {"remaining": 0}
mock_session.run.side_effect = [
set_result, # 1st SET (labels a batch)
count_result_more, # 1st COUNT (5 remaining)
set_result, # 2nd SET (labels another batch)
count_result_done, # 2nd COUNT (0 remaining, loop ends)
]
total = findings_module.add_resource_label(mock_session, "aws", "123456789012")
findings_module.add_resource_label(mock_session, "aws", "123456789012")
assert total == 5
assert mock_session.run.call_count == 2
query = mock_session.run.call_args_list[0].args[0]
assert "_AWSResource" in query
assert "AWSResource" not in query.replace("_AWSResource", "")
assert mock_session.run.call_count == 4
set_query = mock_session.run.call_args_list[0].args[0]
assert "_AWSResource" in set_query
assert "AWSResource" not in set_query.replace("_AWSResource", "")
def _make_session_ctx(session, call_order=None, name=None):
@@ -1773,13 +1808,18 @@ class TestSyncNodes:
mock_source_2 = MagicMock()
mock_source_2.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(mock_source_1),
_make_session_ctx(mock_target),
_make_session_ctx(mock_source_2),
],
with (
patch(
"tasks.jobs.attack_paths.sync.local_database.get_session",
side_effect=[
_make_session_ctx(mock_source_1),
_make_session_ctx(mock_source_2),
],
),
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[_make_session_ctx(mock_target)],
),
):
total = sync_module.sync_nodes(
"source-db", "target-db", "tenant-1", "prov-1"
@@ -1807,13 +1847,18 @@ class TestSyncNodes:
src_2 = MagicMock()
src_2.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(src_1, call_order, "source1"),
_make_session_ctx(tgt, call_order, "target"),
_make_session_ctx(src_2, call_order, "source2"),
],
with (
patch(
"tasks.jobs.attack_paths.sync.local_database.get_session",
side_effect=[
_make_session_ctx(src_1, call_order, "source1"),
_make_session_ctx(src_2, call_order, "source2"),
],
),
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[_make_session_ctx(tgt, call_order, "target")],
),
):
sync_module.sync_nodes("src-db", "tgt-db", "t-1", "p-1")
@@ -1844,15 +1889,20 @@ class TestSyncNodes:
with (
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
"tasks.jobs.attack_paths.sync.local_database.get_session",
side_effect=[
_make_session_ctx(src_1),
_make_session_ctx(tgt_1),
_make_session_ctx(src_2),
_make_session_ctx(tgt_2),
_make_session_ctx(src_3),
],
),
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(tgt_1),
_make_session_ctx(tgt_2),
],
),
patch("tasks.jobs.attack_paths.sync.SYNC_BATCH_SIZE", 1),
):
total = sync_module.sync_nodes("src", "tgt", "t-1", "p-1")
@@ -1866,13 +1916,13 @@ class TestSyncNodes:
src.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
"tasks.jobs.attack_paths.sync.local_database.get_session",
side_effect=[_make_session_ctx(src)],
) as mock_get_session:
) as mock_local_get_session:
total = sync_module.sync_nodes("src", "tgt", "t-1", "p-1")
assert total == 0
assert mock_get_session.call_count == 1
assert mock_local_get_session.call_count == 1
class TestSyncRelationships:
@@ -1893,13 +1943,18 @@ class TestSyncRelationships:
src_2 = MagicMock()
src_2.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(src_1, call_order, "source1"),
_make_session_ctx(tgt, call_order, "target"),
_make_session_ctx(src_2, call_order, "source2"),
],
with (
patch(
"tasks.jobs.attack_paths.sync.local_database.get_session",
side_effect=[
_make_session_ctx(src_1, call_order, "source1"),
_make_session_ctx(src_2, call_order, "source2"),
],
),
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[_make_session_ctx(tgt, call_order, "target")],
),
):
sync_module.sync_relationships("src", "tgt", "p-1")
@@ -1932,15 +1987,20 @@ class TestSyncRelationships:
with (
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
"tasks.jobs.attack_paths.sync.local_database.get_session",
side_effect=[
_make_session_ctx(src_1),
_make_session_ctx(tgt_1),
_make_session_ctx(src_2),
_make_session_ctx(tgt_2),
_make_session_ctx(src_3),
],
),
patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
side_effect=[
_make_session_ctx(tgt_1),
_make_session_ctx(tgt_2),
],
),
patch("tasks.jobs.attack_paths.sync.SYNC_BATCH_SIZE", 1),
):
total = sync_module.sync_relationships("src", "tgt", "p-1")
@@ -1954,13 +2014,13 @@ class TestSyncRelationships:
src.run.return_value = []
with patch(
"tasks.jobs.attack_paths.sync.graph_database.get_session",
"tasks.jobs.attack_paths.sync.local_database.get_session",
side_effect=[_make_session_ctx(src)],
) as mock_get_session:
) as mock_local_get_session:
total = sync_module.sync_relationships("src", "tgt", "p-1")
assert total == 0
assert mock_get_session.call_count == 1
assert mock_local_get_session.call_count == 1
class TestInternetAnalysis: