feat(mcp_server): implement new Prowler App MCP server design (#9300)

Co-authored-by: Pepe Fagoaga <pepe@prowler.com>
This commit is contained in:
Rubén De la Torre Vico
2025-12-04 11:00:19 +01:00
committed by GitHub
parent a622b9d965
commit c7abd77a1c
17 changed files with 1789 additions and 2245 deletions

4
.gitignore vendored
View File

@@ -150,9 +150,5 @@ _data/
# Claude
CLAUDE.md
# MCP Server
mcp_server/prowler_mcp_server/prowler_app/server.py
mcp_server/prowler_mcp_server/prowler_app/utils/schema.yaml
# Compliance report
*.pdf

View File

@@ -2,14 +2,26 @@
All notable changes to the **Prowler MCP Server** are documented in this file.
## [0.1.1] (Prowler 5.14.0)
### Fixed
- Fix documentation MCP Server to return list of dictionaries [(#9205)](https://github.com/prowler-cloud/prowler/pull/9205)
## [0.1.0] (Prowler 5.13.0)
## [0.2.0] (Prowler UNRELEASED)
### Added
- Remove all Prowler App MCP tools; and add new MCP Server tools for Prowler Findings and Compliance [(#9300)](https://github.com/prowler-cloud/prowler/pull/9300)
---
## [0.1.1] (Prowler v5.14.0)
### Fixed
- Fix documentation MCP Server to return list of dictionaries [(#9205)](https://github.com/prowler-cloud/prowler/pull/9205)
---
## [0.1.0] (Prowler v5.13.0)
### Added
- Initial release of Prowler MCP Server [(#8695)](https://github.com/prowler-cloud/prowler/pull/8695)
- Set appropiate user-agent in requests [(#8724)](https://github.com/prowler-cloud/prowler/pull/8724)
- Basic logger functionality [(#8740)](https://github.com/prowler-cloud/prowler/pull/8740)

View File

@@ -33,8 +33,6 @@ def main():
try:
args = parse_arguments()
print(f"args.transport: {args.transport}")
if args.transport is None:
args.transport = os.getenv("PROWLER_MCP_TRANSPORT_MODE", "stdio")
else:

View File

@@ -0,0 +1,30 @@
"""Pydantic models for Prowler App MCP Server."""
from prowler_mcp_server.prowler_app.models.base import MinimalSerializerMixin
from prowler_mcp_server.prowler_app.models.compliance import (
ComplianceFramework,
ComplianceFrameworksListResponse,
)
from prowler_mcp_server.prowler_app.models.findings import (
CheckMetadata,
CheckRemediation,
DetailedFinding,
FindingsListResponse,
FindingsOverview,
SimplifiedFinding,
)
__all__ = [
# Base models
"MinimalSerializerMixin",
# Compliance models
"ComplianceFramework",
"ComplianceFrameworksListResponse",
# Findings models
"CheckMetadata",
"CheckRemediation",
"DetailedFinding",
"FindingsListResponse",
"FindingsOverview",
"SimplifiedFinding",
]

View File

@@ -0,0 +1,59 @@
"""Base models and mixins for Prowler MCP Server models."""
from typing import Any
from pydantic import BaseModel, SerializerFunctionWrapHandler, model_serializer
class MinimalSerializerMixin(BaseModel):
"""Mixin that excludes empty values from serialization.
This mixin optimizes model serialization for LLM consumption by removing noise
and reducing token usage. It excludes:
- None values
- Empty strings
- Empty lists
- Empty dicts
"""
@model_serializer(mode="wrap")
def _serialize(self, handler: SerializerFunctionWrapHandler) -> dict[str, Any]:
"""Serialize model excluding empty values.
Args:
handler: Pydantic serializer function wrapper
Returns:
Dictionary with non-empty values only
"""
data = handler(self)
return {k: v for k, v in data.items() if not self._should_exclude(v)}
def _should_exclude(self, value: Any) -> bool:
"""Determine if a value should be excluded from serialization.
Override this method in subclasses for custom exclusion logic.
Args:
value: Field value
Returns:
True if the value should be excluded, False otherwise
"""
# None values
if value is None:
return True
# Empty strings
if value == "":
return True
# Empty lists
if isinstance(value, list) and not value:
return True
# Empty dicts
if isinstance(value, dict) and not value:
return True
return False

View File

@@ -0,0 +1,333 @@
"""Pydantic models for simplified security findings responses."""
from typing import Literal
from prowler_mcp_server.prowler_app.models.base import MinimalSerializerMixin
from pydantic import BaseModel, ConfigDict, Field
class CheckRemediation(MinimalSerializerMixin, BaseModel):
"""Remediation information for a security check."""
model_config = ConfigDict(frozen=True)
cli: str | None = Field(
default=None,
description="Command-line interface commands for remediation",
)
terraform: str | None = Field(
default=None,
description="Terraform code snippet with best practices for remediation",
)
recommendation_text: str | None = Field(
default=None, description="Text description with best practices"
)
recommendation_url: str | None = Field(
default=None,
description="URL to external remediation documentation",
)
class CheckMetadata(MinimalSerializerMixin, BaseModel):
"""Essential metadata for a security check."""
model_config = ConfigDict(frozen=True)
check_id: str = Field(
description="Unique provider identifier for the security check (e.g., 's3_bucket_public_access')",
)
title: str = Field(
description="Human-readable title of the security check",
)
description: str = Field(
description="Detailed description of what the check validates",
)
provider: str = Field(
description="Prowler provider this check belongs to (e.g., 'aws', 'azure', 'gcp')",
)
service: str = Field(
description="Prowler service being checked (e.g., 's3', 'ec2', 'keyvault')",
)
resource_type: str = Field(
description="Type of resource being evaluated (e.g., 'AwsS3Bucket')",
)
risk: str | None = Field(
default=None,
description="Risk description if the check fails",
)
remediation: CheckRemediation | None = Field(
default=None,
description="Remediation guidance including CLI commands and recommendations",
)
related_url: str | None = Field(
default=None,
description="URL to additional documentation or references",
)
categories: list[str] = Field(
default_factory=list,
description="Categories this check belongs to (e.g., ['encryption', 'logging'])",
)
@classmethod
def from_api_response(cls, data: dict) -> "CheckMetadata":
"""Transform API check_metadata to simplified format."""
remediation_data = data.get("remediation")
remediation = None
if remediation_data:
code = remediation_data.get("code", {})
recommendation = remediation_data.get("recommendation", {})
remediation = CheckRemediation(
cli=code.get("cli"),
terraform=code.get("terraform"),
recommendation_text=recommendation.get("text"),
recommendation_url=recommendation.get("url"),
)
return cls(
check_id=data["checkid"],
title=data["checktitle"],
description=data["description"],
provider=data["provider"],
risk=data.get("risk"),
service=data["servicename"],
resource_type=data["resourcetype"],
remediation=remediation,
related_url=data.get("relatedurl"),
categories=data.get("categories", []),
)
class SimplifiedFinding(MinimalSerializerMixin, BaseModel):
"""Simplified security finding with only LLM-relevant information."""
model_config = ConfigDict(frozen=True)
id: str = Field(
description="Unique UUIDv4 identifier for this finding in Prowler database"
)
uid: str = Field(
description="Human-readable unique identifier assigned by Prowler. Format: prowler-{provider}-{check_id}-{account_uid}-{region}-{resource_name}",
)
status: Literal["FAIL", "PASS", "MANUAL"] = Field(
description="Result status: FAIL (security issue found), PASS (no issue), MANUAL (requires manual verification)",
)
severity: Literal["critical", "high", "medium", "low", "informational"] = Field(
description="Severity level of the finding",
)
check_metadata: CheckMetadata = Field(
description="Metadata about the security check that generated this finding",
)
status_extended: str = Field(
description="Extended status information providing additional context",
)
delta: Literal["new", "changed"] = Field(
description="Change status: 'new' (not seen before), 'changed' (modified since last scan), or None (unchanged)",
)
muted: bool = Field(
description="Whether this finding has been muted/suppressed by the user",
)
muted_reason: str = Field(
default=None,
description="Reason provided when muting this finding (3-500 chars if muted)",
)
@classmethod
def from_api_response(cls, data: dict) -> "SimplifiedFinding":
"""Transform JSON:API finding response to simplified format."""
attributes = data["attributes"]
check_metadata = attributes["check_metadata"]
return cls(
id=data["id"],
uid=attributes["uid"],
status=attributes["status"],
severity=attributes["severity"],
check_metadata=CheckMetadata.from_api_response(check_metadata),
status_extended=attributes["status_extended"],
delta=attributes["delta"],
muted=attributes["muted"],
muted_reason=attributes["muted_reason"],
)
class DetailedFinding(SimplifiedFinding):
"""Detailed security finding with comprehensive information for deep analysis.
Extends SimplifiedFinding with temporal metadata and relationships to scans and resources.
Use this when you need complete context about a specific finding.
"""
model_config = ConfigDict(frozen=True)
inserted_at: str = Field(
description="ISO 8601 timestamp when this finding was first inserted into the database",
)
updated_at: str = Field(
description="ISO 8601 timestamp when this finding was last updated",
)
first_seen_at: str | None = Field(
default=None,
description="ISO 8601 timestamp when this finding was first detected across all scans",
)
scan_id: str | None = Field(
default=None,
description="UUID of the scan that generated this finding",
)
resource_ids: list[str] = Field(
default_factory=list,
description="List of UUIDs for cloud resources associated with this finding",
)
@classmethod
def from_api_response(cls, data: dict) -> "DetailedFinding":
"""Transform JSON:API finding response to detailed format."""
attributes = data["attributes"]
check_metadata = attributes["check_metadata"]
relationships = data.get("relationships", {})
# Parse scan relationship
scan_id = None
scan_data = relationships.get("scan", {}).get("data")
if scan_data:
scan_id = scan_data["id"]
# Parse resources relationship
resource_ids = []
resources_data = relationships.get("resources", {}).get("data", [])
if resources_data:
resource_ids = [r["id"] for r in resources_data]
return cls(
id=data["id"],
uid=attributes["uid"],
status=attributes["status"],
severity=attributes["severity"],
check_metadata=CheckMetadata.from_api_response(check_metadata),
status_extended=attributes.get("status_extended"),
delta=attributes.get("delta"),
muted=attributes["muted"],
muted_reason=attributes.get("muted_reason"),
inserted_at=attributes["inserted_at"],
updated_at=attributes["updated_at"],
first_seen_at=attributes.get("first_seen_at"),
scan_id=scan_id,
resource_ids=resource_ids,
)
class FindingsListResponse(BaseModel):
"""Simplified response for findings list queries."""
model_config = ConfigDict(frozen=True)
findings: list[SimplifiedFinding] = Field(
description="List of security findings matching the query",
)
total_num_finding: int = Field(
description="Total number of findings matching the query across all pages",
ge=0,
)
total_num_pages: int = Field(
description="Total number of pages available",
ge=0,
)
current_page: int = Field(
description="Current page number (1-indexed)",
ge=1,
)
@classmethod
def from_api_response(cls, response: dict) -> "FindingsListResponse":
"""Transform JSON:API response to simplified format."""
data = response["data"]
meta = response["meta"]
pagination = meta["pagination"]
findings = [SimplifiedFinding.from_api_response(item) for item in data]
return cls(
findings=findings,
total_num_finding=pagination["count"],
total_num_pages=pagination["pages"],
current_page=pagination["page"],
)
class FindingsOverview(BaseModel):
"""Simplified findings overview with aggregate statistics."""
model_config = ConfigDict(frozen=True)
total: int = Field(
description="Total number of findings",
ge=0,
)
fail: int = Field(
description="Total number of failed security checks",
ge=0,
)
passed: int = ( # Using 'passed' instead of 'pass' since 'pass' is a Python keyword
Field(
description="Total number of passed security checks",
ge=0,
)
)
muted: int = Field(
description="Total number of muted findings",
ge=0,
)
new: int = Field(
description="Total number of new findings (not seen in previous scans)",
ge=0,
)
changed: int = Field(
description="Total number of changed findings (modified since last scan)",
ge=0,
)
fail_new: int = Field(
description="Number of new findings with FAIL status",
ge=0,
)
fail_changed: int = Field(
description="Number of changed findings with FAIL status",
ge=0,
)
pass_new: int = Field(
description="Number of new findings with PASS status",
ge=0,
)
pass_changed: int = Field(
description="Number of changed findings with PASS status",
ge=0,
)
muted_new: int = Field(
description="Number of new muted findings",
ge=0,
)
muted_changed: int = Field(
description="Number of changed muted findings",
ge=0,
)
@classmethod
def from_api_response(cls, response: dict) -> "FindingsOverview":
"""Transform JSON:API overview response to simplified format."""
data = response["data"]
attributes = data["attributes"]
return cls(
total=attributes["total"],
fail=attributes["fail"],
passed=attributes["pass"],
muted=attributes["muted"],
new=attributes["new"],
changed=attributes["changed"],
fail_new=attributes["fail_new"],
fail_changed=attributes["fail_changed"],
pass_new=attributes["pass_new"],
pass_changed=attributes["pass_changed"],
muted_new=attributes["muted_new"],
muted_changed=attributes["muted_changed"],
)

View File

@@ -0,0 +1,8 @@
from fastmcp import FastMCP
from prowler_mcp_server.prowler_app.utils.tool_loader import load_all_tools
# Initialize MCP server
app_mcp_server = FastMCP("prowler-app")
# Auto-discover and load all tools from the tools package
load_all_tools(app_mcp_server)

View File

@@ -0,0 +1,7 @@
"""Domain-specific tools for Prowler App MCP Server.
Each module in this package contains a BaseTool subclass that registers
and implements tools for a specific domain (findings, providers, scans, etc.).
Tools are automatically discovered and loaded by the load_all_tools() function.
"""

View File

@@ -0,0 +1,102 @@
import inspect
from abc import ABC
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from fastmcp import FastMCP
from prowler_mcp_server.lib.logger import logger
from prowler_mcp_server.prowler_app.utils.api_client import ProwlerAPIClient
class BaseTool(ABC):
"""Abstract base class for all MCP tools.
This class defines the contract that all domain-specific tools must follow.
It ensures consistency across tool registration and provides common utilities.
Key responsibilities:
- Enforce implementation of register_tools() via ABC
- Provide shared access to API client and logger
- Define common patterns for tool registration
- Support dependency injection for the FastMCP instance
Attributes:
_api_client: Singleton instance of ProwlerAPIClient for API requests
_logger: Logger instance for structured logging
Example:
class FindingsTools(BaseTool):
def register_tools(self, mcp: FastMCP) -> None:
mcp.tool(self.search_security_findings)
mcp.tool(self.get_finding_details)
async def search_security_findings(self, severity: list[str] = Field(...)):
# Implementation with access to self.api_client
response = await self.api_client.get("/api/v1/findings")
return response
"""
def __init__(self):
"""Initialize the tool.
Sets up shared dependencies that all tools can access:
- API client (singleton) for making authenticated requests
- Logger instance for structured logging
"""
self._api_client = ProwlerAPIClient()
self._logger = logger
@property
def api_client(self) -> ProwlerAPIClient:
"""Get the shared API client instance.
Returns:
Singleton instance of ProwlerAPIClient for making API requests
"""
return self._api_client
@property
def logger(self):
"""Get the logger instance.
Returns:
Logger instance for structured logging
"""
return self._logger
def register_tools(self, mcp: "FastMCP") -> None:
"""Automatically register all public async methods as tools with FastMCP.
This method inspects the subclass and automatically registers all public
async methods (not starting with '_') as tools. Subclasses do not need
to override this method.
Args:
mcp: The FastMCP instance to register tools with
"""
# Get all methods from the subclass
registered_count = 0
for name, method in inspect.getmembers(self, predicate=inspect.ismethod):
# Skip private/protected methods
if name.startswith("_"):
continue
# Skip methods inherited from BaseTool
if name in ["register_tools"]:
continue
# Skip property getters
if name in ["api_client", "logger"]:
continue
# Check if the method is a coroutine function (async)
if inspect.iscoroutinefunction(method):
mcp.tool(method)
registered_count += 1
self.logger.debug(f"Auto-registered tool: {name}")
self.logger.info(
f"Auto-registered {registered_count} tools from {self.__class__.__name__}"
)

View File

@@ -0,0 +1,300 @@
"""Security Findings tools for Prowler App MCP Server.
This module provides tools for searching, viewing, and analyzing security findings
across all cloud providers.
"""
from typing import Any, Literal
from prowler_mcp_server.prowler_app.models.findings import (
DetailedFinding,
FindingsListResponse,
FindingsOverview,
)
from prowler_mcp_server.prowler_app.tools.base import BaseTool
from pydantic import Field
class FindingsTools(BaseTool):
"""Tools for security findings operations.
Provides tools for:
- Searching and filtering security findings
- Getting detailed finding information
- Viewing findings overview/statistics
"""
async def search_security_findings(
self,
severity: list[
Literal["critical", "high", "medium", "low", "informational"]
] = Field(
default=[],
description="Filter by severity levels. Multiple values allowed: critical, high, medium, low, informational. If empty, all severities are returned.",
),
status: list[Literal["FAIL", "PASS", "MANUAL"]] = Field(
default=["FAIL"],
description="Filter by finding status. Multiple values allowed: FAIL (security issue found), PASS (no issue found), MANUAL (requires manual verification). Default: ['FAIL'] - only returns findings with security issues. To get all findings, pass an empty list [].",
),
provider_type: list[str] = Field(
default=[],
description="Filter by cloud provider type. Multiple values allowed. If the parameter is not provided, all providers are returned. For valid values, please refer to Prowler Hub/Prowler Documentation that you can also find in form of tools in this MCP Server.",
),
provider_alias: str | None = Field(
default=None,
description="Filter by specific provider alias/name (partial match supported)",
),
region: list[str] = Field(
default=[],
description="Filter by cloud regions. Multiple values allowed (e.g., us-east-1, eu-west-1). If empty, all regions are returned.",
),
service: list[str] = Field(
default=[],
description="Filter by cloud service. Multiple values allowed (e.g., s3, ec2, iam, keyvault). If empty, all services are returned.",
),
resource_type: list[str] = Field(
default=[],
description="Filter by resource type. Multiple values allowed. If empty, all resource types are returned.",
),
check_id: list[str] = Field(
default=[],
description="Filter by specific security check IDs. Multiple values allowed. If empty, all check IDs are returned.",
),
muted: (
bool | str | None
) = Field( # Wrong `str` hint type due to bad MCP Clients implementation
default=None,
description="Filter by muted status. True for muted findings only, False for active findings only. If not specified, returns both",
),
delta: list[Literal["new", "changed"]] = Field(
default=[],
description="Show only new or changed findings. Multiple values allowed: new (not seen in previous scans), changed (modified since last scan). If empty, all findings are returned.",
),
date_from: str | None = Field(
default=None,
description="Start date for range query in ISO 8601 format (YYYY-MM-DD, e.g., '2025-01-15'). Full date required - partial dates like '2025' or '2025-01' are not accepted. IMPORTANT: Maximum date range is 2 days. If only date_from is provided, date_to is automatically set to 2 days later. If only one boundary is provided, the other will be auto-calculated to maintain the 2-day window.",
),
date_to: str | None = Field(
default=None,
description="End date for range query in ISO 8601 format (YYYY-MM-DD, e.g., '2025-01-15'). Full date required - partial dates are not accepted. If only date_to is provided, date_from is automatically set to 2 days earlier. Can be used alone or with date_from.",
),
search: str | None = Field(
default=None, description="Free-text search term across finding details"
),
page_size: int = Field(
default=50, description="Number of results to return per page"
),
page_number: int = Field(
default=1, description="Page number to retrieve (1-indexed)"
),
) -> dict[str, Any]:
"""Search and filter security findings across all cloud providers with rich filtering capabilities.
This is the primary tool for browsing and filtering security findings. Returns lightweight findings
optimized for searching across large result sets. For detailed information about a specific finding,
use get_finding_details.
Default behavior:
- Returns latest findings from most recent scans (no date parameters needed)
- Filters to FAIL status only (security issues found)
- Returns 100 results per page
Date filtering:
- Without dates: queries findings from the most recent completed scan across all providers (most efficient). This returns the latest snapshot of findings, not a time-based query.
- With dates: queries historical findings (2-day maximum range)
Each finding includes:
- Core identification: id, uid, check_id
- Security context: status, severity, check_metadata (title, description, remediation)
- State tracking: delta (new/changed), muted status
- Extended details: status_extended for additional context
Returns:
Paginated list of simplified findings with total count and pagination metadata
"""
# Validate page_size parameter
self.api_client.validate_page_size(page_size)
# Determine endpoint based on date parameters
date_range = self.api_client.normalize_date_range(
date_from, date_to, max_days=2
)
if date_range is None:
# No dates provided - use latest findings endpoint
endpoint = "/api/v1/findings/latest"
params = {}
else:
# Dates provided - use historical findings endpoint
endpoint = "/api/v1/findings"
params = {
"filter[inserted_at__gte]": date_range[0],
"filter[inserted_at__lte]": date_range[1],
}
# Build filter parameters
if severity:
params["filter[severity__in]"] = severity
if status:
params["filter[status__in]"] = status
if provider_type:
params["filter[provider_type__in]"] = provider_type
if provider_alias:
params["filter[provider_alias__icontains]"] = provider_alias
if region:
params["filter[region__in]"] = region
if service:
params["filter[service__in]"] = service
if resource_type:
params["filter[resource_type__in]"] = resource_type
if check_id:
params["filter[check_id__in]"] = check_id
if muted is not None:
params["filter[muted]"] = (
muted if isinstance(muted, bool) else muted == "true"
)
if delta:
params["filter[delta__in]"] = delta
if search:
params["filter[search]"] = search
# Pagination
params["page[size]"] = page_size
params["page[number]"] = page_number
# Return only LLM-relevant fields
params["fields[findings]"] = (
"uid,status,severity,check_id,check_metadata,status_extended,delta,muted,muted_reason"
)
params["sort"] = "severity,-inserted_at"
# Convert lists to comma-separated strings
clean_params = self.api_client.build_filter_params(params)
# Get API response and transform to simplified format
api_response = await self.api_client.get(endpoint, params=clean_params)
simplified_response = FindingsListResponse.from_api_response(api_response)
return simplified_response.model_dump()
async def get_finding_details(
self,
finding_id: str = Field(
description="UUID of the finding to retrieve (must be a valid UUID format, e.g., '019ac0d6-90d5-73e9-9acf-c22e256f1bac'). Returns an error if the finding ID is invalid or not found."
),
) -> dict[str, Any]:
"""Retrieve comprehensive details about a specific security finding by its ID.
This tool provides MORE detailed information than search_security_findings. Use this when you need
to deeply analyze a specific finding or understand its complete context and history.
Additional information compared to search_security_findings:
- Temporal metadata: when the finding was first seen, inserted, and last updated
- Scan relationship: ID of the scan that generated this finding
- Resource relationships: IDs of all cloud resources associated with this finding
Workflow:
1. Use search_security_findings to browse and filter across many findings
2. Use get_finding_details to drill down into specific findings of interest
Returns:
dict containing detailed finding with comprehensive security metadata, temporal information,
and relationships to scans and resources
"""
params = {
# Return comprehensive fields including temporal metadata
"fields[findings]": "uid,status,severity,check_id,check_metadata,status_extended,delta,muted,muted_reason,inserted_at,updated_at,first_seen_at",
# Include relationships to scan and resources
"include": "scan,resources",
}
# Get API response and transform to detailed format
api_response = await self.api_client.get(
f"/api/v1/findings/{finding_id}", params=params
)
detailed_finding = DetailedFinding.from_api_response(
api_response.get("data", {})
)
return detailed_finding.model_dump()
async def get_findings_overview(
self,
provider_type: list[str] = Field(
default=[],
description="Filter statistics by cloud provider. Multiple values allowed. If empty, all providers are returned. For valid values, please refer to Prowler Hub/Prowler Documentation that you can also find in form of tools in this MCP Server.",
),
) -> dict[str, Any]:
"""Get high-level statistics about security findings formatted as a human-readable markdown report.
Use this tool to get a quick overview of your security posture without retrieving individual findings.
Perfect for understanding trends, identifying areas of concern, and tracking improvements over time.
The report includes:
- Summary statistics: total findings, fail/pass/muted counts with percentages
- Delta analysis: breakdown of new vs changed findings
- Trending information: how findings are evolving over time
Output format: Markdown-formatted report ready to present to users or include in documentation.
Use cases:
- Quick security posture assessment
- Tracking remediation progress over time
- Identifying which providers have most issues
- Understanding finding trends (improving or degrading)
Returns:
Dictionary with 'report' key containing markdown-formatted summary statistics
"""
params = {
# Return only LLM-relevant aggregate statistics
"fields[findings-overview]": "new,changed,fail_new,fail_changed,pass_new,pass_changed,muted_new,muted_changed,total,fail,muted,pass"
}
if provider_type:
params["filter[provider_type__in]"] = provider_type
clean_params = self.api_client.build_filter_params(params)
# Get API response and transform to simplified format
api_response = await self.api_client.get(
"/api/v1/overviews/findings", params=clean_params
)
overview = FindingsOverview.from_api_response(api_response)
# Format as markdown report
total = overview.total
fail = overview.fail
passed = overview.passed
muted = overview.muted
new = overview.new
changed = overview.changed
# Calculate percentages
fail_pct = (fail / total * 100) if total > 0 else 0
passed_pct = (passed / total * 100) if total > 0 else 0
muted_pct = (muted / total * 100) if total > 0 else 0
unchanged = total - new - changed
# Build markdown report
report = f"""# Security Findings Overview
## Summary Statistics
- **Total Findings**: {total:,}
- **Failed Checks**: {fail:,} ({fail_pct:.1f}%)
- **Passed Checks**: {passed:,} ({passed_pct:.1f}%)
- **Muted Findings**: {muted:,} ({muted_pct:.1f}%)
## Delta Analysis
- **New Findings**: {new:,}
- New failures: {overview.fail_new:,}
- New passes: {overview.pass_new:,}
- New muted: {overview.muted_new:,}
- **Changed Findings**: {changed:,}
- Changed to fail: {overview.fail_changed:,}
- Changed to pass: {overview.pass_changed:,}
- Changed to muted: {overview.muted_changed:,}
- **Unchanged**: {unchanged:,}
"""
return {"report": report}

View File

@@ -0,0 +1,292 @@
"""Shared API client utilities for Prowler App tools."""
from datetime import datetime, timedelta
from enum import Enum
from typing import Any, Dict
import httpx
from prowler_mcp_server.lib.logger import logger
from prowler_mcp_server.prowler_app.utils.auth import ProwlerAppAuth
class HTTPMethod(str, Enum):
"""HTTP methods enum."""
GET = "GET"
POST = "POST"
PATCH = "PATCH"
DELETE = "DELETE"
class SingletonMeta(type):
"""Metaclass that implements the Singleton pattern.
This metaclass ensures that only one instance of a class exists.
All calls to the constructor return the same instance.
"""
_instances: Dict[type, Any] = {}
def __call__(cls, *args, **kwargs):
"""Control instance creation to ensure singleton behavior."""
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs)
cls._instances[cls] = instance
return cls._instances[cls]
class ProwlerAPIClient(metaclass=SingletonMeta):
"""Shared API client with smart defaults and helper methods.
This class uses the Singleton pattern via metaclass to ensure only one
instance exists across the application, reducing initialization overhead
and enabling HTTP connection pooling.
"""
def __init__(self) -> None:
"""Initialize the API client (only called once due to singleton pattern)."""
self.auth_manager: ProwlerAppAuth = ProwlerAppAuth()
self.client: httpx.AsyncClient = httpx.AsyncClient(timeout=30.0)
async def _make_request(
self,
method: HTTPMethod,
path: str,
params: dict[str, any] | None = None,
json_data: dict[str, any] | None = None,
) -> dict[str, any]:
"""Make authenticated API request.
Args:
method: HTTP method (GET, POST, PATCH, DELETE)
path: API endpoint path
params: Optional query parameters
json_data: Optional JSON body data
Returns:
API response as dictionary
Raises:
Exception: If API request fails
"""
try:
token: str = await self.auth_manager.get_valid_token()
url: str = f"{self.auth_manager.base_url}{path}"
headers: dict[str, str] = self.auth_manager.get_headers(token)
response: httpx.Response = await self.client.request(
method=method.value,
url=url,
headers=headers,
params=params,
json=json_data,
)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error during {method.value} {path}: {e}")
error_detail: str = ""
try:
error_data: dict[str, any] = e.response.json()
error_detail = error_data.get("errors", [{}])[0].get("detail", "")
except Exception:
error_detail = e.response.text
raise Exception(
f"API request failed: {e.response.status_code} - {error_detail}"
)
except Exception as e:
logger.error(f"Error during {method.value} {path}: {e}")
raise
async def get(
self, path: str, params: dict[str, any] | None = None
) -> dict[str, any]:
"""Make GET request.
Args:
path: API endpoint path
params: Optional query parameters
Returns:
API response as dictionary
Raises:
Exception: If API request fails
"""
return await self._make_request(HTTPMethod.GET, path, params=params)
async def post(
self,
path: str,
params: dict[str, any] | None = None,
json_data: dict[str, any] | None = None,
) -> dict[str, any]:
"""Make POST request.
Args:
path: API endpoint path
params: Optional query parameters
json_data: Optional JSON body data
Returns:
API response as dictionary
Raises:
Exception: If API request fails
"""
return await self._make_request(
HTTPMethod.POST, path, params=params, json_data=json_data
)
async def patch(
self,
path: str,
params: dict[str, any] | None = None,
json_data: dict[str, any] | None = None,
) -> dict[str, any]:
"""Make PATCH request.
Args:
path: API endpoint path
params: Optional query parameters
json_data: Optional JSON body data
Returns:
API response as dictionary
Raises:
Exception: If API request fails
"""
return await self._make_request(
HTTPMethod.PATCH, path, params=params, json_data=json_data
)
async def delete(
self, path: str, params: dict[str, any] | None = None
) -> dict[str, any]:
"""Make DELETE request.
Args:
path: API endpoint path
params: Optional query parameters
Returns:
API response as dictionary
Raises:
Exception: If API request fails
"""
return await self._make_request(HTTPMethod.DELETE, path, params=params)
def _validate_date_format(self, date_str: str, param_name: str) -> datetime:
"""Validate date string format.
Args:
date_str: Date string to validate
param_name: Parameter name for error messages
Returns:
Parsed datetime object
Raises:
ValueError: If date format is invalid
"""
try:
return datetime.strptime(date_str, "%Y-%m-%d")
except ValueError:
raise ValueError(
f"Invalid date format for {param_name}. Expected YYYY-MM-DD (e.g., '2025-01-15'), got '{date_str}'. "
f"Full date required - partial dates like '2025' or '2025-01' are not accepted."
)
def validate_page_size(self, page_size: int) -> None:
"""Validate page size parameter.
Args:
page_size: Page size to validate
Raises:
ValueError: If page size is out of valid range (1-1000)
"""
if page_size < 1 or page_size > 1000:
raise ValueError(
f"Invalid page_size: {page_size}. Must be between 1 and 1000 (inclusive)."
)
def normalize_date_range(
self, date_from: str | None, date_to: str | None, max_days: int = 2
) -> tuple[str, str] | None:
"""Normalize and validate date range, auto-completing missing boundary.
The Prowler API has a 2-day limit for historical queries. This helper:
1. Returns None if no dates provided (signals: use latest/default endpoint)
2. Auto-completes missing boundary to maintain 2-day window
3. Validates the range doesn't exceed max_days
Args:
date_from: Start date (YYYY-MM-DD format) or None
date_to: End date (YYYY-MM-DD format) or None
max_days: Maximum allowed days between dates (default: 2)
Returns:
None if no dates provided, otherwise tuple of (date_from, date_to) as strings
Raises:
ValueError: If date range exceeds max_days or date format is invalid
"""
if not date_from and not date_to:
return None
# Parse and validate provided dates
from_date: datetime | None = (
self._validate_date_format(date_from, "date_from") if date_from else None
)
to_date: datetime | None = (
self._validate_date_format(date_to, "date_to") if date_to else None
)
# Auto-complete missing boundary to maintain max_days window
if from_date and not to_date:
to_date = from_date + timedelta(days=max_days - 1)
elif to_date and not from_date:
from_date = to_date - timedelta(days=max_days - 1)
# Validate range doesn't exceed max_days
delta: int = (to_date - from_date).days + 1
if delta > max_days:
raise ValueError(
f"Date range cannot exceed {max_days} days. "
f"Requested range: {from_date.date()} to {to_date.date()} ({delta} days)"
)
return from_date.strftime("%Y-%m-%d"), to_date.strftime("%Y-%m-%d")
def build_filter_params(
self, params: dict[str, any], exclude_none: bool = True
) -> dict[str, any]:
"""Build filter parameters for API, converting types to API-compatible formats.
Args:
params: Dictionary of parameters
exclude_none: If True, exclude None values from result
Returns:
Cleaned parameter dictionary ready for API
"""
result: dict[str, any] = {}
for key, value in params.items():
if value is None and exclude_none:
continue
# Convert boolean values to lowercase strings for API compatibility
if isinstance(value, bool):
result[key] = str(value).lower()
# Convert lists/arrays to comma-separated strings
elif isinstance(value, (list, tuple)):
result[key] = ",".join(str(v) for v in value)
else:
result[key] = value
return result

View File

@@ -1,732 +0,0 @@
{
"endpoints": {
"* /api/v1/providers*": {
"parameters": {
"id": {
"name": "provider_id",
"description": "The UUID of the provider. This UUID is generated by Prowler and it is not related with the UID of the provider (that is the one that is set by the provider).\n\tThe format is UUIDv4: \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877\""
}
}
},
"GET /api/v1/providers": {
"name": "list_providers",
"description": "List all providers with options for filtering by various criteria.",
"parameters": {
"fields[providers]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"uid,delta,status\")"
},
"filter[alias]": {
"name": "filter_alias",
"description": "Filter by exact alias name"
},
"filter[alias__icontains]": {
"name": "filter_alias_contains",
"description": "Filter by partial alias match"
},
"filter[alias__in]": {
"name": "filter_alias_in",
"description": "Filter by multiple aliases (comma-separated, e.g. \"aws_alias_1,azure_alias_2\"). Useful when searching for multiple providers at once."
},
"filter[connected]": {
"name": "filter_connected",
"description": "Filter by connected status (True for connected, False for connection failed, if not set all both are returned).\n\tIf the connection haven't been attempted yet, the status will be None and does not apply for this filter."
},
"filter[id]": {
"name": "filter_id",
"description": "Filter by exact ID of the provider (UUID)"
},
"filter[id__in]": {
"name": "filter_id_in",
"description": "Filter by multiple IDs of the providers (comma-separated UUIDs, e.g. \"a1b2c3d4-5678-90ab-cdef-1234567890ab,deadbeef-1234-5678-9abc-def012345678,0f1e2d3c-4b5a-6978-8c9d-0e1f2a3b4c5d\"). Useful when searching for multiple providers at once."
},
"filter[inserted_at]": {
"name": "filter_inserted_at",
"description": "Filter by exact date (format: YYYY-MM-DD). This is the date when the provider was inserted into the database."
},
"filter[inserted_at__gte]": {
"name": "filter_inserted_at_gte",
"description": "Filter providers inserted on or after this date (format: YYYY-MM-DD)"
},
"filter[inserted_at__lte]": {
"name": "filter_inserted_at_lte",
"description": "Filter providers inserted on or before this date (format: YYYY-MM-DD)"
},
"filter[provider]": {
"name": "filter_provider",
"description": "Filter by single provider type"
},
"filter[provider__in]": {
"name": "filter_provider_in",
"description": "Filter by multiple provider types (comma-separated, e.g. \"aws,azure,gcp\")"
},
"filter[search]": {
"name": "filter_search",
"description": "A search term accross \"provider\", \"alias\" and \"uid\""
},
"filter[uid]": {
"name": "filter_uid",
"description": "Filter by exact finding UID"
},
"filter[uid__icontains]": {
"name": "filter_uid_contains",
"description": "Filter by partial finding UID match"
},
"filter[uid__in]": {
"name": "filter_uid_in",
"description": "Filter by multiple UIDs (comma-separated UUIDs)"
},
"filter[updated_at]": {
"name": "filter_updated_at",
"description": "Filter by exact date (format: YYYY-MM-DD). This is the date when the provider was updated in the database."
},
"filter[updated_at__gte]": {
"name": "filter_updated_at_gte",
"description": "Filter providers updated on or after this date (format: YYYY-MM-DD)"
},
"filter[updated_at__lte]": {
"name": "filter_updated_at_lte",
"description": "Filter providers updated on or before this date (format: YYYY-MM-DD)"
},
"include": {
"name": "include",
"description": "Include related resources in the response, for now only \"provider_groups\" is supported"
},
"page[number]": {
"name": "page_number",
"description": "Page number to retrieve (default: 1)"
},
"page[size]": {
"name": "page_size",
"description": "Number of results per page (default: 100)"
},
"sort": {
"name": "sort",
"description": "Sort the results by the specified fields. Use '-' prefix for descending order. (e.g. \"-provider,inserted_at\", this first sorts by provider alphabetically and then inside of each category by inserted_at date)"
}
}
},
"POST /api/v1/providers": {
"name": "create_provider",
"description": "Create a new provider in the current Prowler Tenant.\n\tThis is just for creating a new provider, not for adding/configuring credentials. To add credentials to an existing provider, use tool add_provider_secret from Prowler MCP server",
"parameters": {
"alias": {
"description": "Pseudonym name to identify the provider"
},
"provider": {
"description": "Type of provider to create"
},
"uid": {
"description": "UID for the provider. This UID is dependent on the provider type: \n\tAWS: AWS account ID\n\tAzure: Azure subscription ID\n\tGCP: GCP project ID\n\tKubernetes: Kubernetes namespace\n\tM365: M365 domain ID\n\tGitHub: GitHub username or organization name"
}
}
},
"GET /api/v1/providers/{id}": {
"name": "get_provider",
"description": "Get detailed information about a specific provider",
"parameters": {
"fields[providers]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"uid,alias,connection\")."
},
"include": {
"description": "Include related resources in the response, for now only \"provider_groups\" is supported"
}
}
},
"PATCH /api/v1/providers/{id}": {
"name": "update_provider",
"description": "Update the details of a specific provider",
"parameters": {
"alias": {
"description": "Pseudonym name to identify the provider, if not set, the alias will not be updated"
}
}
},
"DELETE /api/v1/providers/{id}": {
"name": "delete_provider",
"description": "Delete a specific provider"
},
"POST /api/v1/providers/{id}/connection": {
"name": "test_provider_connection",
"description": "Test the connection status of a specific provider with the credentials set in the provider secret. Needed to be done before running a scan."
},
"GET /api/v1/providers/secrets": {
"name": "list_provider_secrets",
"description": "List all provider secrets with options for filtering by various criteria",
"parameters": {
"fields[provider-secrets]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"name,secret_type,provider\")"
},
"filter[inserted_at]": {
"name": "filter_inserted_at",
"description": "Filter by exact date when the secret was inserted (format: YYYY-MM-DD)"
},
"filter[name]": {
"name": "filter_name",
"description": "Filter by exact secret name"
},
"filter[name__icontains]": {
"name": "filter_name_contains",
"description": "Filter by partial secret name match"
},
"filter[provider]": {
"name": "filter_provider",
"description": "Filter by prowler provider UUID (UUIDv4)"
},
"filter[search]": {
"name": "filter_search",
"description": "Search term in name attribute"
},
"filter[updated_at]": {
"name": "filter_updated_at",
"description": "Filter by exact update date (format: YYYY-MM-DD)"
},
"page[number]": {
"name": "page_number",
"description": "Page number to retrieve (default: 1)"
},
"page[size]": {
"name": "page_size",
"description": "Number of results per page"
},
"sort": {
"name": "sort",
"description": "Sort the results by the specified fields. You can specify multiple fields separated by commas; the results will be sorted by the first field, then by the second within each group of the first, and so on. Use '-' as a prefix to a field name for descending order (e.g. \"-name,inserted_at\" sorts by name descending, then by inserted_at ascending within each name). If not set, the default sort order will be applied"
}
}
},
"* /api/v1/providers/secrets*": {
"parameters": {
"secret": {
"name": "credentials",
"description": "Provider-specific credentials dictionary. Supported formats:\n - AWS Static: {\"aws_access_key_id\": \"...\", \"aws_secret_access_key\": \"...\", \"aws_session_token\": \"...\"}\n - AWS Assume Role: {\"role_arn\": \"...\", \"external_id\": \"...\", \"session_duration\": 3600, \"role_session_name\": \"...\"}\n - Azure: {\"tenant_id\": \"...\", \"client_id\": \"...\", \"client_secret\": \"...\"}\n - M365: {\"tenant_id\": \"...\", \"client_id\": \"...\", \"client_secret\": \"...\", \"user\": \"...\", \"password\": \"...\"}\n - GCP Static: {\"client_id\": \"...\", \"client_secret\": \"...\", \"refresh_token\": \"...\"}\n - GCP Service Account: {\"service_account_key\": {...}}\n - Kubernetes: {\"kubeconfig_content\": \"...\"}\n - GitHub PAT: {\"personal_access_token\": \"...\"}\n - GitHub OAuth: {\"oauth_app_token\": \"...\"}\n - GitHub App: {\"github_app_id\": 123, \"github_app_key\": \"path/to/key\"}"
},
"secret_type": {
"description": "Type of secret:\n\tstatic: Static credentials\n\trole: Assume role credentials (for now only AWS is supported)\n\tservice_account: Service account credentials (for now only GCP is supported)"
}
}
},
"POST /api/v1/providers/secrets": {
"name": "add_provider_secret",
"description": "Add or update complete credentials for an existing provider",
"parameters": {
"provider_id": {
"description": "The UUID of the provider. This UUID is generated by Prowler and it is not related with the UID of the provider, the format is UUIDv4: \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877\""
},
"name": {
"name": "secret_name",
"description": "Name for the credential secret. This must be between 3 and 100 characters long"
}
}
},
"GET /api/v1/providers/secrets/{id}": {
"name": "get_provider_secret",
"description": "Get detailed information about a specific provider secret",
"parameters": {
"id": {
"name": "provider_secret_id",
"description": "The UUID of the provider secret"
},
"fields[provider-secrets]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"name,secret_type,provider\")"
}
}
},
"PATCH /api/v1/providers/secrets/{id}": {
"name": "update_provider_secret",
"description": "Update the details of a specific provider secret",
"parameters": {
"id": {
"name": "provider_secret_id",
"description": "The UUID of the provider secret."
},
"name": {
"name": "secret_name",
"description": "Name for the credential secret. This must be between 3 and 100 characters long"
}
}
},
"DELETE /api/v1/providers/secrets/{id}": {
"name": "delete_provider_secret",
"description": "Delete a specific provider secret",
"parameters": {
"id": {
"name": "provider_secret_id",
"description": "The UUID of the provider secret."
}
}
},
"GET /api/v1/findings*": {
"parameters": {
"fields[findings]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"uid,delta,status,status_extended,severity,check_id,scan\")"
},
"filter[check_id]": {
"name": "filter_check_id",
"description": "Filter by exact check ID (e.g. ec2_launch_template_imdsv2_required). To get the list of available checks for a provider, use tool get_checks from Prowler Hub MCP server"
},
"filter[check_id__icontains]": {
"name": "filter_check_id_contains",
"description": "Filter by partial check ID match (e.g. \"iam\" matches all IAM-related checks for all providers)"
},
"filter[check_id__in]": {
"name": "filter_check_id_in",
"description": "Filter by multiple check IDs (comma-separated, e.g. \"ec2_launch_template_imdsv2_required,bedrock_guardrail_prompt_attack_filter_enabled,vpc_endpoint_multi_az_enabled\")"
},
"filter[delta]": {
"name": "filter_delta",
"description": "Filter by finding delta status"
},
"filter[id]": {
"name": "filter_id",
"description": "Filter by exact finding ID (main key in the database, it is a UUIDv7). It is not the same as the finding UID."
},
"filter[id__in]": {
"name": "filter_id_in",
"description": "Filter by multiple finding IDs (comma-separated UUIDs)"
},
"filter[inserted_at]": {
"name": "filter_inserted_at",
"description": "Filter by exact date (format: YYYY-MM-DD)."
},
"filter[inserted_at__date]": {
"name": "filter_inserted_at_date",
"description": "Filter by exact date (format: YYYY-MM-DD). Same as filter_inserted_at parameter."
},
"filter[inserted_at__gte]": {
"name": "filter_inserted_at_gte",
"description": "Filter findings inserted on or after this date (format: YYYY-MM-DD)"
},
"filter[inserted_at__lte]": {
"name": "filter_inserted_at_lte",
"description": "Filter findings inserted on or before this date (format: YYYY-MM-DD)"
},
"filter[muted]": {
"name": "filter_muted",
"description": "Filter by muted status (True for muted, False for non-muted, if not set all both are returned). A muted finding is a finding that has been muted by the user to ignore it."
},
"filter[provider]": {
"name": "filter_provider",
"description": "Filter by exact provider UUID (UUIDv4). This UUID is generated by Prowler and it is not related with the UID of the provider (that is the one that is set by the provider). The format is UUIDv4: \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877\""
},
"filter[provider__in]": {
"name": "filter_provider_in",
"description": "Filter by multiple provider UUIDs (comma-separated UUIDs, e.g. \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877,deadbeef-1234-5678-9abc-def012345678,0f1e2d3c-4b5a-6978-8c9d-0e1f2a3b4c5d\"). Useful when searching for multiple providers at once."
},
"filter[provider_alias]": {
"name": "filter_provider_alias",
"description": "Filter by exact provider alias name"
},
"filter[provider_alias__icontains]": {
"name": "filter_provider_alias_contains",
"description": "Filter by partial provider alias match"
},
"filter[provider_alias__in]": {
"name": "filter_provider_alias_in",
"description": "Filter by multiple provider aliases (comma-separated)"
},
"filter[provider_id]": {
"name": "filter_provider_id",
"description": "Filter by exact provider ID (UUID)"
},
"filter[provider_id__in]": {
"name": "filter_provider_id_in",
"description": "Filter by multiple provider IDs (comma-separated UUIDs)"
},
"filter[provider_type]": {
"name": "filter_provider_type",
"description": "Filter by single provider type"
},
"filter[provider_type__in]": {
"name": "filter_provider_type_in",
"description": "Filter by multiple provider types (comma-separated, e.g. \"aws,azure,gcp\"). Allowed values are: aws, azure, gcp, kubernetes, m365, github"
},
"filter[provider_uid]": {
"name": "filter_provider_uid",
"description": "Filter by exact provider UID. This UID is dependent on the provider type: \n\tAWS: AWS account ID\n\tAzure: Azure subscription ID\n\tGCP: GCP project ID\n\tKubernetes: Kubernetes namespace\n\tM365: M365 domain ID\n\tGitHub: GitHub username or organization name"
},
"filter[provider_uid__icontains]": {
"name": "filter_provider_uid_contains",
"description": "Filter by partial provider UID match"
},
"filter[provider_uid__in]": {
"name": "filter_provider_uid_in",
"description": "Filter by multiple provider UIDs (comma-separated UUIDs)"
},
"filter[region]": {
"name": "filter_region",
"description": "Filter by exact region name (e.g. us-east-1, eu-west-1, etc.). To get a list of available regions in a subset of findings, use tool get_findings_metadata from Prowler MCP server"
},
"filter[region__icontains]": {
"name": "filter_region_contains",
"description": "Filter by partial region match (e.g. \"us-\" matches all US regions)"
},
"filter[region__in]": {
"name": "filter_region_in",
"description": "Filter by multiple regions (comma-separated, e.g. \"us-east-1,us-west-2,eu-west-1\")"
},
"filter[resource_name]": {
"name": "filter_resource_name",
"description": "Filter by exact resource name that finding is associated with"
},
"filter[resource_name__icontains]": {
"name": "filter_resource_name_contains",
"description": "Filter by partial resource name match that finding is associated with"
},
"filter[resource_name__in]": {
"name": "filter_resource_name_in",
"description": "Filter by multiple resource names (comma-separated) that finding is associated with"
},
"filter[resource_type]": {
"name": "filter_resource_type",
"description": "Filter by exact resource type that finding is associated with"
},
"filter[resource_type__icontains]": {
"name": "filter_resource_type_contains",
"description": "Filter by partial resource type match that finding is associated with"
},
"filter[resource_type__in]": {
"name": "filter_resource_type_in",
"description": "Filter by multiple resource types (comma-separated) that finding is associated with"
},
"filter[resource_uid]": {
"name": "filter_resource_uid",
"description": "Filter by exact resource UID that finding is associated with"
},
"filter[resource_uid__icontains]": {
"name": "filter_resource_uid_contains",
"description": "Filter by partial resource UID match that finding is associated with"
},
"filter[resource_uid__in]": {
"name": "filter_resource_uid_in",
"description": "Filter by multiple resource UIDss (comma-separated) that finding is associated with"
},
"filter[resources]": {
"name": "filter_resources",
"description": "Filter by multiple resources (comma-separated) that finding is associated with. The accepted vaules are internal Prowler generated resource UUIDs"
},
"filter[scan]": {
"name": "filter_scan",
"description": "Filter by scan UUID"
},
"filter[scan__in]": {
"name": "filter_scan_in",
"description": "Filter by multiple scan UUIDs (comma-separated UUIDs)"
},
"filter[service]": {
"name": "filter_service",
"description": "Filter by exact service name (e.g. s3, rds, ec2, keyvault, etc.). To get the list of available services, use tool list_providers from Prowler Hub MCP server"
},
"filter[service__icontains]": {
"name": "filter_service_contains",
"description": "Filter by partial service name match (e.g. \"storage\" matches all storage-related services)"
},
"filter[service__in]": {
"name": "filter_service_in",
"description": "Filter by multiple service names (comma-separated, e.g. \"s3,ec2,iam\")"
},
"filter[severity]": {
"name": "filter_severity",
"description": "Filter by single severity (critical, high, medium, low, informational)"
},
"filter[severity__in]": {
"name": "filter_severity_in",
"description": "Filter by multiple severities (comma-separated, e.g. \"critical,high\")"
},
"filter[status]": {
"name": "filter_status",
"description": "Filter by single status"
},
"filter[status__in]": {
"name": "filter_status_in",
"description": "Filter by multiple statuses (comma-separated, e.g. \"FAIL,MANUAL\"). Allowed values are: PASS, FAIL, MANUAL"
},
"filter[uid]": {
"name": "filter_uid",
"description": "Filter by exact finding UID assigned by Prowler"
},
"filter[uid__in]": {
"name": "filter_uid_in",
"description": "Filter by multiple finding UIDs (comma-separated UUIDs)"
},
"filter[updated_at]": {
"name": "filter_updated_at",
"description": "Filter by exact update date (format: YYYY-MM-DD)"
},
"filter[updated_at__gte]": {
"name": "filter_updated_at_gte",
"description": "Filter by update date on or after this date (format: YYYY-MM-DD)"
},
"filter[updated_at__lte]": {
"name": "filter_updated_at_lte",
"description": "Filter by update date on or before this date (format: YYYY-MM-DD)"
},
"include": {
"name": "include",
"description": "Include related resources in the response, supported values are: \"resources\" and \"scan\""
},
"page[number]": {
"name": "page_number",
"description": "Page number to retrieve (default: 1)"
},
"page[size]": {
"name": "page_size",
"description": "Number of results per page (default: 100)"
},
"sort": {
"name": "sort",
"description": "Sort the results by the specified fields. You can specify multiple fields separated by commas; the results will be sorted by the first field, then by the second within each group of the first, and so on. Use '-' as a prefix to a field name for descending order (e.g. \"status,-severity\" sorts by status ascending alphabetically and then by severity descending within each status alphabetically)"
}
}
},
"GET /api/v1/findings": {
"name": "list_findings",
"description": "List security findings from Prowler scans with advanced filtering.\n\tAt least one of the variations of the filter[inserted_at] is required. If not provided, defaults to findings from the last day."
},
"GET /api/v1/findings/{id}": {
"name": "get_finding",
"description": "Get detailed information about a specific security finding",
"parameters": {
"id": {
"name": "finding_id",
"description": "The UUID of the finding"
}
}
},
"GET /api/v1/findings/latest": {
"name": "get_latest_findings",
"description": "Retrieve a list of the latest findings from the latest scans for each provider with advanced filtering options"
},
"GET /api/v1/findings/metadata": {
"name": "get_findings_metadata",
"description": "Fetch unique metadata values from a filtered set of findings. This is useful for dynamic filtering",
"parameters": {
"fields[findings-metadata]": {
"name": "metadata_fields",
"description": "Specific metadata fields to return (comma-separated, e.g. 'regions,services,check_ids')"
}
}
},
"GET /api/v1/findings/metadata/latest": {
"name": "get_latest_findings_metadata",
"description": "Fetch unique metadata values from the latest findings across all providers"
},
"* /api/v1/scans*": {
"parameters": {
"id": {
"name": "scan_id",
"description": "The UUID of the scan. The format is UUIDv4: \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877\""
}
}
},
"GET /api/v1/scans": {
"name": "list_scans",
"description": "List all scans with options for filtering by various criteria.",
"parameters": {
"fields[scans]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"name,state,progress,duration\")"
},
"filter[completed_at]": {
"name": "filter_completed_at",
"description": "Filter by exact completion date (format: YYYY-MM-DD)"
},
"filter[inserted_at]": {
"name": "filter_inserted_at",
"description": "Filter by exact insertion date (format: YYYY-MM-DD)"
},
"filter[name]": {
"name": "filter_name",
"description": "Filter by exact scan name"
},
"filter[name__icontains]": {
"name": "filter_name_contains",
"description": "Filter by partial scan name match"
},
"filter[next_scan_at]": {
"name": "filter_next_scan_at",
"description": "Filter by exact next scan date (format: YYYY-MM-DD)"
},
"filter[next_scan_at__gte]": {
"name": "filter_next_scan_at_gte",
"description": "Filter scans scheduled on or after this date (format: YYYY-MM-DD)"
},
"filter[next_scan_at__lte]": {
"name": "filter_next_scan_at_lte",
"description": "Filter scans scheduled on or before this date (format: YYYY-MM-DD)"
},
"filter[provider]": {
"name": "filter_provider",
"description": "Filter by exact provider UUID (UUIDv4). This UUID is generated by Prowler and it is not related with the UID of the provider (that is the one that is set by the provider). The format is UUIDv4: \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877\""
},
"filter[provider__in]": {
"name": "filter_provider_in",
"description": "Filter by multiple provider UUIDs (comma-separated UUIDs, e.g. \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877,deadbeef-1234-5678-9abc-def012345678,0f1e2d3c-4b5a-6978-8c9d-0e1f2a3b4c5d\"). Useful when searching for multiple providers at once."
},
"filter[provider_alias]": {
"name": "filter_provider_alias",
"description": "Filter by exact provider alias name"
},
"filter[provider_alias__icontains]": {
"name": "filter_provider_alias_contains",
"description": "Filter by partial provider alias match"
},
"filter[provider_alias__in]": {
"name": "filter_provider_alias_in",
"description": "Filter by multiple provider aliases (comma-separated)"
},
"filter[provider_type]": {
"name": "filter_provider_type",
"description": "Filter by single provider type (aws, azure, gcp, github, kubernetes, m365)"
},
"filter[provider_type__in]": {
"name": "filter_provider_type_in",
"description": "Filter by multiple provider types (comma-separated, e.g. \"aws,azure,gcp\"). Allowed values are: aws, azure, gcp, kubernetes, m365, github"
},
"filter[provider_uid]": {
"name": "filter_provider_uid",
"description": "Filter by exact provider UID. This UID is dependent on the provider type: \n\tAWS: AWS account ID\n\tAzure: Azure subscription ID\n\tGCP: GCP project ID\n\tKubernetes: Kubernetes namespace\n\tM365: M365 domain ID\n\tGitHub: GitHub username or organization name"
},
"filter[provider_uid__icontains]": {
"name": "filter_provider_uid_contains",
"description": "Filter by partial provider UID match"
},
"filter[provider_uid__in]": {
"name": "filter_provider_uid_in",
"description": "Filter by multiple provider UIDs (comma-separated)"
},
"filter[scheduled_at]": {
"name": "filter_scheduled_at",
"description": "Filter by exact scheduled date (format: YYYY-MM-DD)"
},
"filter[scheduled_at__gte]": {
"name": "filter_scheduled_at_gte",
"description": "Filter scans scheduled on or after this date (format: YYYY-MM-DD)"
},
"filter[scheduled_at__lte]": {
"name": "filter_scheduled_at_lte",
"description": "Filter scans scheduled on or before this date (format: YYYY-MM-DD)"
},
"filter[search]": {
"name": "filter_search",
"description": "Search term across multiple scan attributes including: name (scan name), trigger (Manual/Scheduled), state (Available, Executing, Completed, Failed, etc.), unique_resource_count (number of resources found), progress (scan progress percentage), duration (scan duration), scheduled_at (when scan is scheduled), started_at (when scan started), completed_at (when scan completed), and next_scan_at (next scheduled scan time)"
},
"filter[started_at]": {
"name": "filter_started_at",
"description": "Filter by exact start date (format: YYYY-MM-DD)"
},
"filter[started_at__gte]": {
"name": "filter_started_at_gte",
"description": "Filter scans started on or after this date (format: YYYY-MM-DD)"
},
"filter[started_at__lte]": {
"name": "filter_started_at_lte",
"description": "Filter scans started on or before this date (format: YYYY-MM-DD)"
},
"filter[state]": {
"name": "filter_state",
"description": "Filter by exact scan state"
},
"filter[state__in]": {
"name": "filter_state_in",
"description": "Filter by multiple scan states (comma-separated)"
},
"filter[trigger]": {
"name": "filter_trigger",
"description": "Filter by scan trigger type"
},
"filter[trigger__in]": {
"name": "filter_trigger_in",
"description": "Filter by multiple trigger types (comma-separated)"
},
"include": {
"name": "include",
"description": "Include related resources in the response, supported value is \"provider\""
},
"page[number]": {
"name": "page_number",
"description": "Page number to retrieve (default: 1)"
},
"page[size]": {
"name": "page_size",
"description": "Number of results per page (default: 100)"
},
"sort": {
"name": "sort",
"description": "Sort the results by the specified fields. Use '-' prefix for descending order. (e.g. \"-started_at,name\")"
}
}
},
"POST /api/v1/scans": {
"name": "create_scan",
"description": "Trigger a manual scan for a specific provider",
"parameters": {
"provider_id": {
"name": "provider_id",
"description": "Prowler generated UUID of the provider to scan. The format is UUIDv4: \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877\""
},
"name": {
"description": "Optional name for the scan"
}
}
},
"GET /api/v1/scans/{id}": {
"name": "get_scan",
"description": "Get detailed information about a specific scan",
"parameters": {
"fields[scans]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"name,state,progress,duration\")"
},
"include": {
"description": "Include related resources in the response, supported value is \"provider\""
}
}
},
"PATCH /api/v1/scans/{id}": {
"name": "update_scan",
"description": "Update the details of a specific scan",
"parameters": {
"name": {
"description": "Name for the scan to be updated"
}
}
},
"GET /api/v1/scans/{id}/compliance/{name}": {
"name": "get_scan_compliance_report",
"description": "Download a specific compliance report (e.g., 'cis_1.4_aws') as a CSV file",
"parameters": {
"name": {
"name": "compliance_name"
},
"fields[scan-reports]": {
"name": "fields",
"description": "The tool will return only the specified fields, if not set all are returned (comma-separated, e.g. \"id,name\")"
}
}
},
"GET /api/v1/scans/{id}/report": {
"name": "get_scan_report",
"description": "Download a ZIP file containing the scan report",
"parameters": {
"fields[scan-reports]": {
"name": "fields",
"description": "Not use this parameter for now"
}
}
},
"POST /api/v1/schedules/daily": {
"name": "schedules_daily_scan",
"parameters": {
"provider_id": {
"name": "provider_id",
"description": "Prowler generated UUID of the provider to scan. The format is UUIDv4: \"4d0e2614-6385-4fa7-bf0b-c2e2f75c6877\""
}
}
}
}
}

View File

@@ -1,974 +0,0 @@
#!/usr/bin/env python3
"""
Generate FastMCP server code from OpenAPI specification.
This script parses an OpenAPI specification file and generates FastMCP tool functions
with proper type hints, parameters, and docstrings.
"""
import json
import os
import re
from datetime import datetime
from pathlib import Path
from typing import Optional
import requests
import yaml
class OpenAPIToMCPGenerator:
def __init__(
self,
spec_file: str,
custom_auth_module: Optional[str] = None,
exclude_patterns: Optional[list[str]] = None,
exclude_operations: Optional[list[str]] = None,
exclude_tags: Optional[list[str]] = None,
include_only_tags: Optional[list[str]] = None,
config_file: Optional[str] = None,
):
"""
Initialize the generator with an OpenAPI spec file.
Args:
spec_file: Path to OpenAPI specification file
custom_auth_module: Module path for custom authentication
exclude_patterns: list of regex patterns to exclude endpoints (matches against path)
exclude_operations: list of operation IDs to exclude
exclude_tags: list of tags to exclude
include_only_tags: If specified, only include endpoints with these tags
config_file: Path to JSON configuration file for custom mappings
"""
self.spec_file = spec_file
self.custom_auth_module = custom_auth_module
self.exclude_patterns = exclude_patterns or []
self.exclude_operations = exclude_operations or []
self.exclude_tags = exclude_tags or []
self.include_only_tags = include_only_tags
self.config_file = config_file
self.config = self._load_config() if config_file else {}
self.spec = self._load_spec()
self.generated_tools = []
self.imports = set()
self.needs_query_array_normalizer = False
self.type_mapping = {
"string": "str",
"integer": "str",
"number": "str",
"boolean": "bool | str",
"array": "list[Any] | str",
"object": "dict[str, Any] | str",
}
def _load_config(self) -> dict:
"""Load configuration from JSON file."""
try:
with open(self.config_file, "r") as f:
return json.load(f)
except FileNotFoundError:
return {}
except json.JSONDecodeError:
return {}
def _load_spec(self) -> dict:
"""Load OpenAPI specification from file."""
with open(self.spec_file, "r") as f:
if self.spec_file.endswith(".yaml") or self.spec_file.endswith(".yml"):
return yaml.safe_load(f)
else:
return json.load(f)
def _get_endpoint_config(self, path: str, method: str) -> dict:
"""Get endpoint configuration from config file with pattern matching and inheritance.
Configuration resolution order (most to least specific):
1. Exact endpoint match (e.g., "GET /api/v1/findings/metadata")
2. Pattern matches, sorted by specificity:
- Patterns without wildcards are more specific
- Longer patterns are more specific
- Example: "GET /api/v1/findings/*" matches all findings endpoints
When multiple configurations match, they are merged with more specific
configurations overriding less specific ones.
"""
if not self.config:
return {}
endpoint_key = f"{method.upper()} {path}"
merged_config = {}
# Get endpoints configuration (now supports both exact and pattern matches)
endpoints = self.config.get("endpoints", {})
# Separate exact matches from patterns
exact_match = None
pattern_matches = []
for config_key, config_value in endpoints.items():
if "*" in config_key or "?" in config_key:
# This is a pattern - convert wildcards to regex
regex_pattern = config_key.replace("*", ".*").replace("?", ".")
if re.match(f"^{regex_pattern}$", endpoint_key):
pattern_matches.append((config_key, config_value))
elif config_key == endpoint_key:
# Exact match
exact_match = (config_key, config_value)
# Also check for patterns in endpoint_patterns for backward compatibility
endpoint_patterns = self.config.get("endpoint_patterns", {})
for pattern, pattern_config in endpoint_patterns.items():
regex_pattern = pattern.replace("*", ".*").replace("?", ".")
if re.match(f"^{regex_pattern}$", endpoint_key):
pattern_matches.append((pattern, pattern_config))
# Sort pattern matches by specificity
# More specific patterns should be applied last to override less specific ones
pattern_matches.sort(
key=lambda x: (
x[0].count("*") + x[0].count("?"), # Fewer wildcards = more specific
-len(
x[0]
), # Longer patterns = more specific (negative for reverse sort)
),
reverse=True,
) # Reverse so least specific comes first
# Apply configurations from least to most specific
# First apply pattern matches (from least to most specific)
for pattern, pattern_config in pattern_matches:
merged_config = self._merge_configs(merged_config, pattern_config)
# Finally apply exact match (most specific)
if exact_match:
merged_config = self._merge_configs(merged_config, exact_match[1])
# Fallback to old endpoint_mappings for backward compatibility
if not merged_config:
endpoint_mappings = self.config.get("endpoint_mappings", {})
if endpoint_key in endpoint_mappings:
merged_config = {"name": endpoint_mappings[endpoint_key]}
return merged_config
def _merge_configs(self, base_config: dict, override_config: dict) -> dict:
"""Merge two configurations, with override_config taking precedence.
Special handling for parameters: merges parameter configurations deeply.
"""
import copy
result = copy.deepcopy(base_config)
for key, value in override_config.items():
if key == "parameters" and key in result:
# Deep merge parameters
if not isinstance(result[key], dict):
result[key] = {}
if isinstance(value, dict):
for param_name, param_config in value.items():
if param_name in result[key] and isinstance(
result[key][param_name], dict
):
# Merge parameter configurations
result[key][param_name] = {
**result[key][param_name],
**param_config,
}
else:
result[key][param_name] = param_config
else:
# For other keys, override completely
result[key] = value
return result
def _sanitize_function_name(self, operation_id: str) -> str:
"""Convert operation ID to valid Python function name."""
# Replace non-alphanumeric characters with underscores
name = re.sub(r"[^a-zA-Z0-9_]", "_", operation_id)
# Ensure it doesn't start with a number
if name and name[0].isdigit():
name = f"op_{name}"
return name.lower()
def _get_python_type(self, schema: dict) -> tuple[str, str]:
"""Convert OpenAPI schema to Python type hint.
Returns:
Tuple of (type_hint, original_type) where original_type is used for casting
"""
if not schema:
return "Any", "any"
# Handle oneOf/anyOf/allOf schemas - these are typically objects
if "oneOf" in schema or "anyOf" in schema or "allOf" in schema:
# These are complex schemas, typically representing different object variants
return "dict[str, Any] | str", "object"
schema_type = schema.get("type", "string")
# Handle enums
if "enum" in schema:
enum_values = schema["enum"]
if all(isinstance(v, str) for v in enum_values):
# Create Literal type for string enums - already strings, no casting needed
self.imports.add("from typing import Literal")
enum_str = ", ".join(f'"{v}"' for v in enum_values)
return f"Literal[{enum_str}]", "string"
else:
return self.type_mapping.get(schema_type, "Any"), schema_type
# Handle arrays
if schema_type == "array":
return "list[Any] | str", "array"
# Handle format specifications
if schema_type == "string":
format_type = schema.get("format", "")
if format_type in ["date", "date-time", "uuid", "email"]:
return "str", "string"
return self.type_mapping.get(schema_type, "Any"), schema_type
def _resolve_ref(self, ref: str) -> dict:
"""Resolve a $ref reference in the OpenAPI spec."""
if not ref.startswith("#/"):
return {}
# Split the reference path
ref_parts = ref[2:].split("/") # Remove '#/' and split
# Navigate through the spec to find the referenced schema
resolved = self.spec
for part in ref_parts:
resolved = resolved.get(part, {})
return resolved
def _extract_parameters(
self, operation: dict, endpoint_config: Optional[dict] = None
) -> list[dict]:
"""Extract and process parameters from an operation."""
parameters = []
for param in operation.get("parameters", []):
# Sanitize parameter name for Python
python_name = (
param.get("name", "")
.replace("[", "_")
.replace("]", "")
.replace(".", "_")
.replace("-", "_")
) # Also replace hyphens
type_hint, original_type = self._get_python_type(param.get("schema", {}))
param_info = {
"name": param.get("name", ""),
"python_name": python_name,
"in": param.get("in", "query"),
"required": param.get("required", False),
"description": param.get("description", ""),
"type": type_hint,
"original_type": original_type,
"original_schema": param.get("schema", {}),
}
# Apply custom parameter configuration from endpoint config
if endpoint_config and "parameters" in endpoint_config:
param_config = endpoint_config["parameters"]
if param_info["name"] in param_config:
custom_param = param_config[param_info["name"]]
if "name" in custom_param:
param_info["python_name"] = custom_param["name"]
if "description" in custom_param:
param_info["description"] = custom_param["description"]
parameters.append(param_info)
# Handle request body if present - extract as individual parameters
if "requestBody" in operation:
body = operation["requestBody"]
content = body.get("content", {})
# Check for different content types
schema = None
if "application/vnd.api+json" in content:
schema = content["application/vnd.api+json"].get("schema", {})
elif "application/json" in content:
schema = content["application/json"].get("schema", {})
if schema:
# Resolve $ref if present
if "$ref" in schema:
schema = self._resolve_ref(schema["$ref"])
# Try to extract individual fields from the schema
body_params = self._extract_body_parameters(
schema, body.get("required", False)
)
# Apply custom parameter config to body parameters
if endpoint_config and "parameters" in endpoint_config:
param_config = endpoint_config["parameters"]
for param in body_params:
if param["name"] in param_config:
custom_param = param_config[param["name"]]
if "name" in custom_param:
param["python_name"] = custom_param["name"]
if "description" in custom_param:
param["description"] = custom_param["description"]
parameters.extend(body_params)
return parameters
def _extract_body_parameters(self, schema: dict, is_required: bool) -> list[dict]:
"""Extract individual parameters from request body schema."""
parameters = []
# Handle JSON:API format with data.attributes structure
if "properties" in schema:
data = schema["properties"].get("data", {})
if "properties" in data:
# Extract attributes
attributes = data["properties"].get("attributes", {})
if "properties" in attributes:
# Get required fields from attributes
required_attrs = attributes.get("required", [])
for prop_name, prop_schema in attributes["properties"].items():
# Skip read-only fields for POST/PUT/PATCH operations
if prop_schema.get("readOnly", False):
continue
python_name = prop_name.replace("-", "_")
# Check if this field is required
is_field_required = prop_name in required_attrs
type_hint, original_type = self._get_python_type(prop_schema)
param_info = {
"name": prop_name, # Keep original name for API
"python_name": python_name,
"in": "body",
"required": is_field_required,
"description": prop_schema.get(
"description",
prop_schema.get("title", f"{prop_name} parameter"),
),
"type": type_hint,
"original_type": original_type,
"original_schema": prop_schema,
"resource_type": (
data["properties"]
.get("type", {})
.get("enum", ["resource"])[0]
if "type" in data["properties"]
else "resource"
),
}
parameters.append(param_info)
# Also check for relationships (like provider_id)
relationships = data["properties"].get("relationships", {})
if "properties" in relationships:
required_rels = relationships.get("required", [])
for rel_name, rel_schema in relationships["properties"].items():
# Extract ID from relationship
python_name = f"{rel_name}_id"
is_rel_required = rel_name in required_rels
param_info = {
"name": f"{rel_name}_id",
"python_name": python_name,
"in": "body",
"required": is_rel_required,
"description": f"ID of the related {rel_name}",
"type": "str",
"original_type": "string",
"original_schema": rel_schema,
}
parameters.append(param_info)
# If no structured params found, fall back to generic body parameter
if not parameters and schema:
parameters.append(
{
"name": "body",
"python_name": "body",
"in": "body",
"required": is_required,
"description": "Request body data",
"type": "dict[str, Any] | str",
"original_type": "object",
"original_schema": schema,
}
)
return parameters
def _generate_docstring(
self,
operation: dict,
parameters: list[dict],
path: str,
method: str,
endpoint_config: Optional[dict] = None,
) -> str:
"""Generate a comprehensive docstring for the tool function."""
lines = []
# Main description - use custom or default
endpoint_config = endpoint_config or {}
# Use custom description if provided, otherwise fall back to OpenAPI
if "description" in endpoint_config:
lines.append(f' """{endpoint_config["description"]}')
else:
summary = operation.get("summary", "")
description = operation.get("description", "")
if summary:
lines.append(f' """{summary}')
else:
lines.append(f' """Execute {method.upper()} {path}')
if "description" not in endpoint_config:
# Only add OpenAPI description if no custom description was provided
description = operation.get("description", "")
if description and description != summary:
lines.append("")
# Clean up description - remove extra whitespace
clean_desc = " ".join(description.split())
lines.append(f" {clean_desc}")
# Add endpoint info
lines.append("")
lines.append(f" Endpoint: {method.upper()} {path}")
# Parameters section
if parameters:
lines.append("")
lines.append(" Args:")
for param in parameters:
# Use custom description if available
param_desc = param["description"] or "Self-explanatory parameter"
# Handle multi-line descriptions properly
required_text = "(required)" if param["required"] else "(optional)"
if "\n" in param_desc:
# Split on actual newlines (not escaped)
desc_lines = param_desc.split("\n")
first_line = desc_lines[0].strip()
lines.append(
f" {param['python_name']} {required_text}: {first_line}"
)
# Add subsequent lines with proper indentation (12 spaces for continuation)
for desc_line in desc_lines[1:]:
desc_line = desc_line.strip()
if desc_line:
lines.append(f" {desc_line}")
else:
# Clean up parameter description for single line
param_desc = " ".join(param_desc.split())
lines.append(
f" {param['python_name']} {required_text}: {param_desc}"
)
# Add enum values if present
if "enum" in param.get("original_schema", {}):
enum_values = param["original_schema"]["enum"]
lines.append(
f" Allowed values: {', '.join(str(v) for v in enum_values)}"
)
# Returns section
lines.append("")
lines.append(" Returns:")
lines.append(" dict containing the API response")
lines.append(' """')
return "\n".join(lines)
def _generate_function_signature(
self, func_name: str, parameters: list[dict]
) -> str:
"""Generate the function signature with proper type hints."""
# Sort parameters: required first, then optional
sorted_params = sorted(
parameters, key=lambda x: (not x["required"], x["python_name"])
)
param_strings = []
for param in sorted_params:
if param["required"]:
param_strings.append(f" {param['python_name']}: {param['type']}")
else:
param_strings.append(
f" {param['python_name']}: Optional[{param['type']}] = None"
)
if param_strings:
params_str = ",\n".join(param_strings)
return f"async def {func_name}(\n{params_str}\n) -> dict[str, Any]:"
else:
return f"async def {func_name}() -> dict[str, Any]:"
def _get_cast_expression(self, param: dict) -> str:
"""Generate type casting expression for a parameter.
Args:
param: Parameter dict with 'python_name' and 'original_type'
Returns:
Expression string that casts the parameter value to the correct type
"""
python_name = param["python_name"]
original_type = param.get("original_type", "string")
if original_type == "boolean":
# Convert string to boolean using simple comparison
return f"({python_name}.lower() in ('true', '1', 'yes', 'on') if isinstance({python_name}, str) else {python_name})"
elif original_type == "array":
if param.get("in") == "query":
self.needs_query_array_normalizer = True
return f"_normalize_query_array({python_name})"
return f"json.loads({python_name}) if isinstance({python_name}, str) else {python_name}"
elif original_type == "object":
return f"json.loads({python_name}) if isinstance({python_name}, str) else {python_name}"
else:
# string or any other type - no casting needed
return python_name
def _generate_function_body(
self, path: str, method: str, parameters: list[dict], operation_id: str
) -> str:
"""Generate the function body for making API calls."""
lines = []
# Add try block
lines.append(" try:")
# Get authentication token if custom auth module is provided
if self.custom_auth_module:
lines.append(" token = await auth_manager.get_valid_token()")
lines.append("")
# Build parameters
query_params = [p for p in parameters if p["in"] == "query"]
path_params = [p for p in parameters if p["in"] == "path"]
body_params = [p for p in parameters if p["in"] == "body"]
# Add json import if needed for object or array type casting
if any(p.get("original_type") in ["object", "array"] for p in parameters):
self.imports.add("import json")
# Build query parameters
if query_params:
lines.append(" params = {}")
for param in query_params:
cast_expr = self._get_cast_expression(param)
if param["required"]:
lines.append(f" params['{param['name']}'] = {cast_expr}")
else:
lines.append(f" if {param['python_name']} is not None:")
lines.append(f" params['{param['name']}'] = {cast_expr}")
lines.append("")
# Build path with path parameters
final_path = path
for param in path_params:
cast_expr = self._get_cast_expression(param)
lines.append(
f" path = '{path}'.replace('{{{param['name']}}}', str({cast_expr}))"
)
final_path = "path"
# Build request body if there are body parameters
if body_params:
# Check if we have individual params or a single body param
if len(body_params) == 1 and body_params[0]["python_name"] == "body":
# Single body parameter - use it directly with casting
cast_expr = self._get_cast_expression(body_params[0])
lines.append(f" request_body = {cast_expr}")
else:
# Get resource type from first body param (they should all have the same)
resource_type = (
body_params[0].get("resource_type", "resource")
if body_params
else "resource"
)
# Build JSON:API structure from individual parameters
lines.append(" # Build request body")
lines.append(" request_body = {")
lines.append(' "data": {')
lines.append(f' "type": "{resource_type}"')
# Separate attributes from relationships
# Note: Check if param was originally from attributes section, not just by name
attribute_params = []
relationship_params = []
for p in body_params:
# If this param came from the attributes section (has resource_type), it's an attribute
# even if its name ends with _id
if "resource_type" in p:
attribute_params.append(p)
elif p["python_name"].endswith("_id") and "resource_type" not in p:
relationship_params.append(p)
else:
attribute_params.append(p)
if attribute_params:
lines.append(",")
lines.append(' "attributes": {}')
lines.append(" }")
lines.append(" }")
if attribute_params:
lines.append("")
lines.append(" # Add attributes")
for param in attribute_params:
cast_expr = self._get_cast_expression(param)
if param["required"]:
lines.append(
f' request_body["data"]["attributes"]["{param["name"]}"] = {cast_expr}'
)
else:
lines.append(
f" if {param['python_name']} is not None:"
)
lines.append(
f' request_body["data"]["attributes"]["{param["name"]}"] = {cast_expr}'
)
if relationship_params:
lines.append("")
lines.append(" # Add relationships")
lines.append(' request_body["data"]["relationships"] = {}')
for param in relationship_params:
rel_name = param["python_name"].replace("_id", "")
cast_expr = self._get_cast_expression(param)
if param["required"]:
lines.append(
f' request_body["data"]["relationships"]["{rel_name}"] = {{'
)
lines.append(' "data": {')
lines.append(f' "type": "{rel_name}s",')
lines.append(f' "id": {cast_expr}')
lines.append(" }")
lines.append(" }")
else:
lines.append(
f" if {param['python_name']} is not None:"
)
lines.append(
f' request_body["data"]["relationships"]["{rel_name}"] = {{'
)
lines.append(' "data": {')
lines.append(f' "type": "{rel_name}s",')
lines.append(f' "id": {cast_expr}')
lines.append(" }")
lines.append(" }")
lines.append("")
# Build the request URL
url_line = (
f'f"{{auth_manager.base_url}}{{{final_path}}}"'
if final_path == "path"
else f'f"{{auth_manager.base_url}}{path}"'
)
lines.append(f" url = {url_line}")
lines.append("")
# Build request parameters
request_params = ["url"]
if self.custom_auth_module:
request_params.append("headers=auth_manager.get_headers(token)")
if query_params:
request_params.append("params=params")
if body_params:
request_params.append("json=request_body")
params_str = ",\n ".join(request_params)
lines.append(f" response = await prowler_app_client.{method}(")
lines.append(f" {params_str}")
lines.append(" )")
lines.append(" response.raise_for_status()")
lines.append("")
# Parse response
lines.append(" data = response.json()")
lines.append("")
lines.append(" return {")
lines.append(' "success": True,')
lines.append(' "data": data.get("data", data),')
lines.append(' "meta": data.get("meta", {}),')
lines.append(" }")
lines.append("")
# Exception handling
lines.append(" except Exception as e:")
lines.append(" return {")
lines.append(' "success": False,')
lines.append(
f' "error": f"Failed to execute {operation_id}: {{str(e)}}"'
)
lines.append(" }")
return "\n".join(lines)
def _should_exclude_endpoint(self, path: str, operation: dict) -> bool:
"""
Determine if an endpoint should be excluded from generation.
Args:
path: The API endpoint path
operation: The operation dictionary from OpenAPI spec
Returns:
True if endpoint should be excluded, False otherwise
"""
# Check if operation is marked as deprecated
if operation.get("deprecated", False):
return True
# Check operation ID exclusion
operation_id = operation.get("operationId", "")
if operation_id in self.exclude_operations:
return True
# Check path pattern exclusion
for pattern in self.exclude_patterns:
if re.search(pattern, path):
return True
# Check tags
tags = operation.get("tags", [])
# If include_only_tags is specified, exclude if no matching tag
if self.include_only_tags:
if not any(tag in self.include_only_tags for tag in tags):
return True
# Check excluded tags
if any(tag in self.exclude_tags for tag in tags):
return True
return False
def generate_tools(self) -> str:
"""Generate all FastMCP tools from the OpenAPI spec."""
output_lines = []
# Generate header
output_lines.append('"""')
output_lines.append("Auto-generated FastMCP server from OpenAPI specification")
output_lines.append(f"Generated on: {datetime.now().isoformat()}")
output_lines.append(
f"Source: {self.spec_file} (version: {self.spec.get('info', {}).get('version', 'unknown')})"
)
output_lines.append('"""')
output_lines.append("")
# Add imports
self.imports.add("from typing import Any, Optional")
self.imports.add("import httpx")
self.imports.add("from fastmcp import FastMCP")
if self.custom_auth_module:
self.imports.add(f"from {self.custom_auth_module} import ProwlerAppAuth")
# Process all paths and operations
paths = self.spec.get("paths", {})
tools_by_tag = {} # Group tools by tag for better organization
excluded_count = 0
for path, path_item in paths.items():
for method in ["get", "post", "put", "patch", "delete"]:
if method in path_item:
operation = path_item[method]
# Check if this endpoint should be excluded
if self._should_exclude_endpoint(path, operation):
excluded_count += 1
continue
operation_id = operation.get("operationId", f"{method}_{path}")
tags = operation.get("tags", ["default"])
# Get endpoint configuration
endpoint_config = self._get_endpoint_config(path, method)
# Use custom function name if provided
if "name" in endpoint_config:
func_name = endpoint_config["name"]
else:
func_name = self._sanitize_function_name(operation_id)
parameters = self._extract_parameters(operation, endpoint_config)
tool_code = []
# Add @app_mcp_server.tool() decorator
tool_code.append("@app_mcp_server.tool()")
# Generate function signature
tool_code.append(
self._generate_function_signature(func_name, parameters)
)
# Generate docstring with custom description if provided
tool_code.append(
self._generate_docstring(
operation, parameters, path, method, endpoint_config
)
)
# Generate function body
tool_code.append(
self._generate_function_body(
path, method, parameters, operation_id
)
)
# Group by tag
for tag in tags:
if tag not in tools_by_tag:
tools_by_tag[tag] = []
tools_by_tag[tag].append("\n".join(tool_code))
# Write imports (consolidate typing imports)
typing_imports = set()
other_imports = []
for imp in sorted(self.imports):
if imp.startswith("from typing import"):
# Extract the imported items
items = imp.replace("from typing import", "").strip()
typing_imports.update([item.strip() for item in items.split(",")])
else:
other_imports.append(imp)
# Add consolidated typing import if needed
if typing_imports:
output_lines.append(
f"from typing import {', '.join(sorted(typing_imports))}"
)
# Add other imports
for imp in other_imports:
output_lines.append(imp)
if self.needs_query_array_normalizer:
output_lines.append("")
output_lines.append("")
output_lines.append("def _normalize_query_array(value):")
output_lines.append(
' """Normalize query array inputs to comma-separated strings."""'
)
output_lines.append(" if isinstance(value, str):")
output_lines.append(" stripped = value.strip()")
output_lines.append(" if not stripped:")
output_lines.append(" return stripped")
output_lines.append(
" if stripped.startswith('[') and stripped.endswith(']'):"
)
output_lines.append(" try:")
output_lines.append(" parsed = json.loads(stripped)")
output_lines.append(" except json.JSONDecodeError:")
output_lines.append(" return stripped")
output_lines.append(" if isinstance(parsed, list):")
output_lines.append(
" return ','.join(str(item) for item in parsed)"
)
output_lines.append(" return stripped")
output_lines.append(" if isinstance(value, (list, tuple, set)):")
output_lines.append(" return ','.join(str(item) for item in value)")
output_lines.append(
" if hasattr(value, '__iter__') and not isinstance(value, dict):"
)
output_lines.append(" try:")
output_lines.append(
" return ','.join(str(item) for item in value)"
)
output_lines.append(" except TypeError:")
output_lines.append(" return str(value)")
output_lines.append(" if isinstance(value, dict):")
output_lines.append(" return ','.join(str(key) for key in value)")
output_lines.append(" return str(value)")
output_lines.append("")
output_lines.append("# Initialize MCP server")
output_lines.append('app_mcp_server = FastMCP("prowler-app")')
output_lines.append("")
if self.custom_auth_module:
output_lines.append("# Initialize authentication manager")
output_lines.append("auth_manager = ProwlerAppAuth()")
output_lines.append("")
output_lines.append("# Initialize HTTP client")
output_lines.append("prowler_app_client = httpx.AsyncClient(")
output_lines.append(" timeout=30.0,")
output_lines.append(")")
output_lines.append("")
# Write tools grouped by tag
for tag, tools in tools_by_tag.items():
output_lines.append("")
output_lines.append("# " + "=" * 76)
output_lines.append(f"# {tag.upper()} ENDPOINTS")
output_lines.append("# " + "=" * 76)
output_lines.append("")
for tool in tools:
output_lines.append("")
output_lines.append(tool)
return "\n".join(output_lines)
def save_to_file(self, output_file: str):
"""Save the generated code to a file."""
generated_code = self.generate_tools()
Path(output_file).write_text(generated_code)
def generate_server_file():
# Get the spec file from the API directly (https://api.prowler.com/api/v1/schema)
api_base_url = os.getenv("PROWLER_API_BASE_URL", "https://api.prowler.com")
spec_file = f"{api_base_url}/api/v1/schema"
# Download the spec yaml file
response = requests.get(spec_file)
response.raise_for_status()
spec_data = response.text
# Save the spec data to a file
with open(str(Path(__file__).parent / "schema.yaml"), "w") as f:
f.write(spec_data)
# Example usage
generator = OpenAPIToMCPGenerator(
spec_file=str(Path(__file__).parent / "schema.yaml"),
custom_auth_module="prowler_mcp_server.prowler_app.utils.auth",
include_only_tags=[
"Provider",
"Scan",
"Schedule",
"Finding",
"Processor",
],
config_file=str(
Path(__file__).parent / "mcp_config.json"
), # Use custom naming config
)
# Generate and save the MCP server
generator.save_to_file(str(Path(__file__).parent.parent / "server.py"))

View File

@@ -0,0 +1,79 @@
"""Utility for auto-discovering and loading MCP tools.
This module provides functionality to automatically discover and register
all BaseTool subclasses from the tools package.
"""
import importlib
import pkgutil
from fastmcp import FastMCP
from prowler_mcp_server.lib.logger import logger
from prowler_mcp_server.prowler_app.tools.base import BaseTool
def load_all_tools(mcp: FastMCP) -> None:
"""Auto-discover and load all BaseTool subclasses from the tools package.
This function:
1. Dynamically imports all Python modules in the tools package
2. Discovers all concrete BaseTool subclasses
3. Instantiates each tool class
4. Registers all tools with the provided FastMCP instance
Args:
mcp: The FastMCP instance to register tools with
TOOLS_PACKAGE: The package path containing tool modules (default: prowler_mcp_server.prowler_app.tools)
Example:
from fastmcp import FastMCP
from prowler_mcp_server.prowler_app.utils.tool_loader import load_all_tools
app = FastMCP("prowler-app")
load_all_tools(app)
"""
TOOLS_PACKAGE = "prowler_mcp_server.prowler_app.tools"
logger.info(f"Auto-discovering tools from package: {TOOLS_PACKAGE}")
# Import the tools package
try:
tools_module = importlib.import_module(TOOLS_PACKAGE)
except ImportError as e:
logger.error(f"Failed to import tools package {TOOLS_PACKAGE}: {e}")
return
# Get the package path
if hasattr(tools_module, "__path__"):
package_path = tools_module.__path__
else:
logger.error(f"Package {TOOLS_PACKAGE} has no __path__ attribute")
return
# Import all modules in the package
for _, module_name, _ in pkgutil.iter_modules(package_path):
try:
full_module_name = f"{TOOLS_PACKAGE}.{module_name}"
importlib.import_module(full_module_name)
logger.debug(f"Imported module: {full_module_name}")
except Exception as e:
logger.error(f"Failed to import module {module_name}: {e}")
# Discover all concrete BaseTool subclasses
concrete_tools = [
tool_class
for tool_class in BaseTool.__subclasses__()
if not getattr(tool_class, "__abstractmethods__", None)
]
logger.info(f"Discovered {len(concrete_tools)} tool classes")
# Instantiate and register each tool
for tool_class in concrete_tools:
try:
tool_instance = tool_class()
tool_instance.register_tools(mcp)
logger.info(f"Loaded and registered: {tool_class.__name__}")
except Exception as e:
logger.error(f"Failed to load tool {tool_class.__name__}: {e}")
logger.info("Tool loading complete")

View File

@@ -1,5 +1,4 @@
import asyncio
import os
from fastmcp import FastMCP
from prowler_mcp_server import __version__
@@ -24,17 +23,6 @@ async def setup_main_server():
# Import Prowler App tools with prowler_app_ prefix
try:
logger.info("Importing Prowler App server...")
if not os.path.exists(
os.path.join(os.path.dirname(__file__), "prowler_app", "server.py")
):
from prowler_mcp_server.prowler_app.utils.server_generator import (
generate_server_file,
)
logger.info("Prowler App server not found, generating...")
generate_server_file()
from prowler_mcp_server.prowler_app.server import app_mcp_server
await prowler_mcp_server.import_server(app_mcp_server, prefix="prowler_app")

View File

@@ -4,8 +4,8 @@ requires = ["setuptools>=61.0", "wheel"]
[project]
dependencies = [
"fastmcp>=2.11.3",
"httpx>=0.27.0",
"fastmcp==2.13.1",
"httpx>=0.28.0"
]
description = "MCP server for Prowler ecosystem"
name = "prowler-mcp"

1072
mcp_server/uv.lock generated

File diff suppressed because it is too large Load Diff