mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-04-04 22:46:55 +00:00
171 lines
5.7 KiB
Python
171 lines
5.7 KiB
Python
"""
|
|
Cypher sanitizer for custom (user-supplied) Attack Paths queries.
|
|
|
|
Two responsibilities:
|
|
|
|
1. **Validation** - reject queries containing SSRF or dangerous procedure
|
|
patterns (defense-in-depth; the primary control is ``neo4j.READ_ACCESS``).
|
|
|
|
2. **Provider-scoped label injection** - inject a dynamic
|
|
``_Provider_{uuid}`` label into every node pattern so the database can
|
|
use its native label index for provider isolation.
|
|
|
|
Label-injection pipeline:
|
|
|
|
1. **Protect** string literals and line comments (placeholder replacement).
|
|
2. **Split** by top-level clause keywords to track clause context.
|
|
3. **Pass A** - inject into *labeled* node patterns in ALL segments.
|
|
4. **Pass B** - inject into *bare* node patterns in MATCH segments only.
|
|
5. **Restore** protected regions.
|
|
"""
|
|
|
|
import re
|
|
|
|
from rest_framework.exceptions import ValidationError
|
|
|
|
from tasks.jobs.attack_paths.config import get_provider_label
|
|
|
|
|
|
# Step 1 - String / comment protection
|
|
# Single combined regex: strings first, then line comments.
|
|
# The regex engine finds the leftmost match, so a string like 'https://prowler.com'
|
|
# is consumed as a string before the // inside it can match as a comment.
|
|
_PROTECTED_RE = re.compile(r"'(?:[^'\\]|\\.)*'|\"(?:[^\"\\]|\\.)*\"|//[^\n]*")
|
|
|
|
# Step 2 - Clause splitting
|
|
# OPTIONAL MATCH must come before MATCH to avoid partial matching.
|
|
_CLAUSE_RE = re.compile(
|
|
r"\b(OPTIONAL\s+MATCH|MATCH|WHERE|RETURN|WITH|ORDER\s+BY"
|
|
r"|SKIP|LIMIT|UNION|UNWIND|CALL)\b",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
# Pass A - Labeled node patterns (all segments)
|
|
# Matches node patterns that have at least one :Label.
|
|
# (?<!\w)\( - open paren NOT preceded by a word char (excludes function calls).
|
|
# Group 1: optional variable + one or more :Label
|
|
# Group 2: optional {properties} + closing paren
|
|
_LABELED_NODE_RE = re.compile(
|
|
r"(?<!\w)\("
|
|
r"("
|
|
r"\s*(?:[a-zA-Z_]\w*)?"
|
|
r"(?:\s*:\s*(?:`[^`]*`|[a-zA-Z_]\w*))+"
|
|
r")"
|
|
r"("
|
|
r"\s*(?:\{[^}]*\})?"
|
|
r"\s*\)"
|
|
r")"
|
|
)
|
|
|
|
# Pass B - Bare node patterns (MATCH segments only)
|
|
# Matches (identifier) or (identifier {properties}) without any :Label.
|
|
# Only applied in MATCH/OPTIONAL MATCH segments.
|
|
_BARE_NODE_RE = re.compile(
|
|
r"(?<!\w)\(" r"(\s*[a-zA-Z_]\w*)" r"(\s*(?:\{[^}]*\})?)" r"\s*\)"
|
|
)
|
|
|
|
_MATCH_CLAUSES = frozenset({"MATCH", "OPTIONAL MATCH"})
|
|
|
|
|
|
def _inject_labeled(segment: str, label: str) -> str:
|
|
"""Inject provider label into all node patterns that have existing labels."""
|
|
return _LABELED_NODE_RE.sub(rf"(\1:{label}\2", segment)
|
|
|
|
|
|
def _inject_bare(segment: str, label: str) -> str:
|
|
"""Inject provider label into bare `(identifier)` node patterns."""
|
|
|
|
def _replace(match):
|
|
var = match.group(1)
|
|
props = match.group(2).strip()
|
|
if props:
|
|
return f"({var}:{label} {props})"
|
|
return f"({var}:{label})"
|
|
|
|
return _BARE_NODE_RE.sub(_replace, segment)
|
|
|
|
|
|
def inject_provider_label(cypher: str, provider_id: str) -> str:
|
|
"""Rewrite a Cypher query to scope every node pattern to a provider.
|
|
|
|
Args:
|
|
cypher: The original Cypher query string.
|
|
provider_id: The provider UUID (will be converted to a label via
|
|
`get_provider_label`).
|
|
|
|
Returns:
|
|
The rewritten Cypher with `:_Provider_{uuid}` appended to every
|
|
node pattern.
|
|
"""
|
|
label = get_provider_label(provider_id)
|
|
|
|
# Step 1: Protect strings and comments (single pass, leftmost-first)
|
|
protected: list[str] = []
|
|
|
|
def _save(match):
|
|
protected.append(match.group(0))
|
|
return f"\x00P{len(protected) - 1}\x00"
|
|
|
|
work = _PROTECTED_RE.sub(_save, cypher)
|
|
|
|
# Step 2: Split by clause keywords
|
|
parts = _CLAUSE_RE.split(work)
|
|
|
|
# Steps 3-4: Apply injection passes per segment
|
|
result: list[str] = []
|
|
current_clause: str | None = None
|
|
|
|
for i, part in enumerate(parts):
|
|
if i % 2 == 1:
|
|
# Keyword token - normalize for clause tracking
|
|
current_clause = re.sub(r"\s+", " ", part.strip()).upper()
|
|
result.append(part)
|
|
else:
|
|
# Content segment - apply injection based on clause context
|
|
part = _inject_labeled(part, label)
|
|
if current_clause in _MATCH_CLAUSES:
|
|
part = _inject_bare(part, label)
|
|
result.append(part)
|
|
|
|
work = "".join(result)
|
|
|
|
# Step 5: Restore protected regions
|
|
for i, original in enumerate(protected):
|
|
work = work.replace(f"\x00P{i}\x00", original)
|
|
|
|
return work
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Validation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Patterns that indicate SSRF or dangerous procedure calls
|
|
# Defense-in-depth layer - the primary control is `neo4j.READ_ACCESS`
|
|
_BLOCKED_PATTERNS = [
|
|
re.compile(r"\bLOAD\s+CSV\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.load\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.import\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.export\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.cypher\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.systemdb\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.config\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.periodic\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.do\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.trigger\b", re.IGNORECASE),
|
|
re.compile(r"\bapoc\.custom\b", re.IGNORECASE),
|
|
]
|
|
|
|
|
|
def validate_custom_query(cypher: str) -> None:
|
|
"""Reject queries containing known SSRF or dangerous procedure patterns.
|
|
|
|
Raises ValidationError if a blocked pattern is found.
|
|
String literals and comments are stripped before matching to avoid
|
|
false positives.
|
|
"""
|
|
stripped = _PROTECTED_RE.sub("", cypher)
|
|
for pattern in _BLOCKED_PATTERNS:
|
|
if pattern.search(stripped):
|
|
raise ValidationError({"query": "Query contains a blocked operation"})
|