feat(mcp): migrate documentation search from ReadTheDocs to Mintlify API (#8916)

This commit is contained in:
Rubén De la Torre Vico
2025-10-15 17:40:18 +02:00
committed by GitHub
parent db5bab51ae
commit ec75b5d0a3
6 changed files with 101 additions and 86 deletions

View File

@@ -13,3 +13,4 @@ All notable changes to the **Prowler MCP Server** are documented in this file.
- Add new MCP Server for Prowler Documentation [(#8795)](https://github.com/prowler-cloud/prowler/pull/8795)
- API key support for STDIO mode and enhanced HTTP mode authentication [(#8823)](https://github.com/prowler-cloud/prowler/pull/8823)
- Add health check endpoint [(#8905)](https://github.com/prowler-cloud/prowler/pull/8905)
- Update Prowler Documentation MCP Server to use Mintlify API [(#8915)](https://github.com/prowler-cloud/prowler/pull/8915)

View File

@@ -2,16 +2,18 @@
> ⚠️ **Preview Feature**: This MCP server is currently in preview and under active development. Features and functionality may change. We welcome your feedback—please report any issues on [GitHub](https://github.com/prowler-cloud/prowler/issues) or join our [Slack community](https://goto.prowler.com/slack) to discuss and share your thoughts.
Access the entire Prowler ecosystem through the Model Context Protocol (MCP). This server provides two main capabilities:
Access the entire Prowler ecosystem through the Model Context Protocol (MCP). This server provides three main capabilities:
- **Prowler Cloud and Prowler App (Self-Managed)**: Full access to Prowler Cloud platform and Prowler Self-Managed for managing providers, running scans, and analyzing security findings
- **Prowler Hub**: Access to Prowler's security checks, fixers, and compliance frameworks catalog
- **Prowler Documentation**: Search and retrieve official Prowler documentation
## Requirements
- Python 3.12+
- Network access to `https://hub.prowler.com` (for Prowler Hub)
- Network access to `https://prowler.mintlify.app` (for Prowler Documentation)
- Network access to Prowler Cloud and Prowler App (Self-Managed) API (it can be Prowler Cloud API or self-hosted Prowler App API)
- Prowler Cloud account credentials (for Prowler Cloud and Prowler App (Self-Managed) features)
@@ -169,6 +171,13 @@ All tools are exposed under the `prowler_hub` prefix.
- `prowler_hub_list_providers`: List Prowler official providers and their services.
- `prowler_hub_get_artifacts_count`: Return total artifact count (checks + frameworks).
### Prowler Documentation
All tools are exposed under the `prowler_docs` prefix.
- `prowler_docs_search`: Search the official Prowler documentation using fulltext search. Returns relevant documentation pages with highlighted snippets and relevance scores.
- `prowler_docs_get_document`: Retrieve the full markdown content of a specific documentation file using the path from search results.
### Prowler Cloud and Prowler App (Self-Managed)
All tools are exposed under the `prowler_app` prefix.
@@ -218,7 +227,7 @@ All tools are exposed under the `prowler_app` prefix.
### Prowler Cloud and Prowler App (Self-Managed) Authentication
> [!IMPORTANT]
> Authentication is not needed for using Prowler Hub features.
> Authentication is not needed for using Prowler Hub or Prowler Documentation features.
The Prowler MCP server supports different authentication in Prowler Cloud and Prowler App (Self-Managed) methods depending on the transport mode:

View File

@@ -1,7 +1,7 @@
import urllib.parse
from typing import List, Optional
import requests
import httpx
from prowler_mcp_server import __version__
from pydantic import BaseModel, Field
@@ -12,25 +12,51 @@ class SearchResult(BaseModel):
title: str = Field(description="Document title")
url: str = Field(description="Documentation URL")
highlights: List[str] = Field(
description="Highlighted content snippets showing query matches with <span> tags",
description="Highlighted content snippets showing query matches with <mark><b> tags",
default_factory=list,
)
score: float = Field(
description="Relevance score for the search result", default=0.0
)
class ProwlerDocsSearchEngine:
"""Prowler documentation search using ReadTheDocs API."""
"""Prowler documentation search using Mintlify API."""
def __init__(self):
"""Initialize the search engine."""
self.api_base_url = "https://docs.prowler.com/_/api/v3/search/"
self.project_name = "prowler-prowler"
self.github_raw_base = (
"https://raw.githubusercontent.com/prowler-cloud/prowler/master/docs"
self.api_base_url = (
"https://api.mintlifytrieve.com/api/chunk_group/group_oriented_autocomplete"
)
self.dataset_id = "0096ba11-3f72-463b-9d95-b788495ac392"
self.api_key = "tr-T6JLeTkFXeNbNPyhijtI9XhIncydQQ3O"
self.docs_base_url = "https://prowler.mintlify.app"
# HTTP client for Mintlify API
self.mintlify_client = httpx.Client(
timeout=30.0,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": f"prowler-mcp-server/{__version__}",
"TR-Dataset": self.dataset_id,
"Authorization": self.api_key,
"X-API-Version": "V2",
},
)
# HTTP client for Mintlify documentation
self.docs_client = httpx.Client(
timeout=30.0,
headers={
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"User-Agent": f"prowler-mcp-server/{__version__}",
},
)
def search(self, query: str, page_size: int = 5) -> List[SearchResult]:
"""
Search documentation using ReadTheDocs API.
Search documentation using Mintlify API.
Args:
query: Search query string
@@ -40,53 +66,69 @@ class ProwlerDocsSearchEngine:
List of search results
"""
try:
# Construct the search query with project filter
search_query = f"project:{self.project_name} {query}"
# Construct request body
payload = {
"query": query,
"search_type": "fulltext",
"extend_results": True,
"highlight_options": {
"highlight_window": 10,
"highlight_max_num": 1,
"highlight_max_length": 2,
"highlight_strategy": "exactmatch",
"highlight_delimiters": ["?", ",", ".", "!", "\n"],
},
"score_threshold": 0.2,
"filters": {"must_not": [{"field": "tag_set", "match": ["code"]}]},
"page_size": page_size,
"group_size": 3,
}
# Make request to ReadTheDocs API with page_size to limit results
params = {"q": search_query, "page_size": page_size}
response = requests.get(
# Make request to Mintlify API
response = self.mintlify_client.post(
self.api_base_url,
params=params,
timeout=10,
json=payload,
)
response.raise_for_status()
data = response.json()
# Parse results
results = []
for hit in data.get("results", []):
# Extract relevant fields from API response
blocks = hit.get("blocks", [])
# Get the document path from the hit's path field
hit_path = hit.get("path", "")
doc_path = self._extract_doc_path(hit_path)
for result in data.get("results", []):
group = result.get("group", {})
chunks = result.get("chunks", [])
# Get document path and title from group
doc_path = group.get("name", "")
group_title = group.get("name", "").replace("/", " / ").title()
# If chunks exist, use the first chunk's title from metadata
title = group_title
if chunks:
first_chunk = chunks[0].get("chunk", {})
metadata = first_chunk.get("metadata", {})
title = metadata.get("title", group_title)
# Construct full URL to docs
domain = hit.get("domain", "https://docs.prowler.com")
full_url = f"{domain}{hit_path}" if hit_path else ""
full_url = f"{self.docs_base_url}/{doc_path}"
# Extract highlights from API response
# Extract highlights and scores from chunks
highlights = []
# Add title highlights
page_highlights = hit.get("highlights", {})
if page_highlights.get("title"):
highlights.extend(page_highlights["title"])
# Add block content highlights (up to 3 snippets)
for block in blocks[:3]:
block_highlights = block.get("highlights", {})
if block_highlights.get("content"):
highlights.extend(block_highlights["content"])
max_score = 0.0
for chunk_data in chunks:
chunk_highlights = chunk_data.get("highlights", [])
highlights.extend(chunk_highlights)
# Track the highest score among all chunks in this group
chunk_score = chunk_data.get("score", 0.0)
max_score = max(max_score, chunk_score)
results.append(
SearchResult(
path=doc_path,
title=hit.get("title", ""),
title=title,
url=full_url,
highlights=highlights,
score=max_score,
)
)
@@ -99,7 +141,7 @@ class ProwlerDocsSearchEngine:
def get_document(self, doc_path: str) -> Optional[str]:
"""
Get full document content from GitHub raw API.
Get full document content from Mintlify documentation.
Args:
doc_path: Path to the documentation file (e.g., "getting-started/installation")
@@ -111,15 +153,15 @@ class ProwlerDocsSearchEngine:
# Clean up the path
doc_path = doc_path.rstrip("/")
# Add .md extension if not present
# Add .md extension if not present (Mintlify serves both .md and .mdx)
if not doc_path.endswith(".md"):
doc_path = f"{doc_path}.md"
# Construct GitHub raw URL
url = f"{self.github_raw_base}/{doc_path}"
# Construct Mintlify URL
url = f"{self.docs_base_url}/{doc_path}"
# Fetch the raw markdown
response = requests.get(url, timeout=10)
# Fetch the documentation page
response = self.docs_client.get(url)
response.raise_for_status()
return response.text
@@ -127,34 +169,3 @@ class ProwlerDocsSearchEngine:
except Exception as e:
print(f"Error fetching document: {e}")
return None
def _extract_doc_path(self, url: str) -> str:
"""
Extract the document path from a full URL.
Args:
url: Full documentation URL
Returns:
Document path relative to docs base
"""
if not url:
return ""
# Parse URL and extract path
try:
parsed = urllib.parse.urlparse(url)
path = parsed.path
# Remove the base path prefix if present
base_path = "/projects/prowler-open-source/en/latest/"
if path.startswith(base_path):
path = path[len(base_path) :]
# Remove .html extension
if path.endswith(".html"):
path = path[:-5]
return path.lstrip("/")
except Exception:
return url

View File

@@ -23,18 +23,15 @@ def search(
to find relevant information about security checks, cloud providers,
compliance frameworks, and usage instructions.
Supports advanced search syntax:
- Exact phrases: "custom css"
- Prefix search: test*
- Fuzzy search: doks~1
- Proximity search: "dashboard admin"~2
Uses fulltext search to find the most relevant documentation pages
based on your query.
Args:
query: The search query
page_size: Number of top results to return (default: 5)
Returns:
List of search results with highlights showing matched terms (in <span> tags)
List of search results with highlights showing matched terms (in <mark><b> tags)
"""
return prowler_docs_search_engine.search(query, page_size)

View File

@@ -6,7 +6,6 @@ requires = ["setuptools>=61.0", "wheel"]
dependencies = [
"fastmcp>=2.11.3",
"httpx>=0.27.0",
"requests>=2.31.0"
]
description = "MCP server for Prowler ecosystem"
name = "prowler-mcp"

2
mcp_server/uv.lock generated
View File

@@ -634,14 +634,12 @@ source = { editable = "." }
dependencies = [
{ name = "fastmcp" },
{ name = "httpx" },
{ name = "requests" },
]
[package.metadata]
requires-dist = [
{ name = "fastmcp", specifier = ">=2.11.3" },
{ name = "httpx", specifier = ">=0.27.0" },
{ name = "requests", specifier = ">=2.31.0" },
]
[[package]]