feat(lighthouse): filter out non-compatible OpenAI models (#9523)

Co-authored-by: Chandrapal Badshah <12944530+Chan9390@users.noreply.github.com>
Co-authored-by: Adrián Jesús Peña Rodríguez <adrianjpr@gmail.com>
This commit is contained in:
Chandrapal Badshah
2025-12-15 16:01:04 +05:30
committed by GitHub
parent 4398b00801
commit 2b4b23c719
2 changed files with 60 additions and 3 deletions

View File

@@ -10,6 +10,7 @@ All notable changes to the **Prowler API** are documented in this file.
### Changed
- Endpoint `GET /overviews/attack-surfaces` no longer returns the related check IDs [(#9529)](https://github.com/prowler-cloud/prowler/pull/9529)
- OpenAI provider to only load chat-compatible models with tool calling support [(#9523)](https://github.com/prowler-cloud/prowler/pull/9523)
---

View File

@@ -11,6 +11,41 @@ from api.models import LighthouseProviderConfiguration, LighthouseProviderModels
logger = get_task_logger(__name__)
# OpenAI model prefixes to exclude from Lighthouse model selection.
# These models don't support text chat completions and tool calling.
EXCLUDED_OPENAI_MODEL_PREFIXES = (
"dall-e", # Image generation
"whisper", # Audio transcription
"tts-", # Text-to-speech (tts-1, tts-1-hd, etc.)
"sora", # Text-to-video (sora-2, sora-2-pro, etc.)
"text-embedding", # Embeddings
"embedding", # Embeddings (alternative naming)
"text-moderation", # Content moderation
"omni-moderation", # Content moderation
"text-davinci", # Legacy completion models
"text-curie", # Legacy completion models
"text-babbage", # Legacy completion models
"text-ada", # Legacy completion models
"davinci", # Legacy completion models
"curie", # Legacy completion models
"babbage", # Legacy completion models
"ada", # Legacy completion models
"computer-use", # Computer control agent
"gpt-image", # Image generation
"gpt-audio", # Audio models
"gpt-realtime", # Realtime voice API
)
# OpenAI model substrings to exclude (patterns that can appear anywhere in model ID).
# These patterns identify non-chat model variants.
EXCLUDED_OPENAI_MODEL_SUBSTRINGS = (
"-audio-", # Audio preview models (gpt-4o-audio-preview, etc.)
"-realtime-", # Realtime preview models (gpt-4o-realtime-preview, etc.)
"-transcribe", # Transcription models (gpt-4o-transcribe, etc.)
"-tts", # TTS models (gpt-4o-mini-tts)
"-instruct", # Legacy instruct models (gpt-3.5-turbo-instruct, etc.)
)
def _extract_error_message(e: Exception) -> str:
"""
@@ -283,20 +318,41 @@ def _fetch_openai_models(api_key: str) -> Dict[str, str]:
"""
Fetch available models from OpenAI API.
Filters out models that don't support text input/output and tool calling,
such as image generation (DALL-E), audio transcription (Whisper),
text-to-speech (TTS), embeddings, and moderation models.
Args:
api_key: OpenAI API key for authentication.
Returns:
Dict mapping model_id to model_name. For OpenAI, both are the same
as the API doesn't provide separate display names.
as the API doesn't provide separate display names. Only includes
models that support text input, text output or tool calling.
Raises:
Exception: If the API call fails.
"""
client = openai.OpenAI(api_key=api_key)
models = client.models.list()
# OpenAI uses model.id for both ID and display name
return {m.id: m.id for m in getattr(models, "data", [])}
# Filter models to only include those supporting chat completions + tool calling
filtered_models = {}
for model in getattr(models, "data", []):
model_id = model.id
# Skip if model ID starts with excluded prefixes
if model_id.startswith(EXCLUDED_OPENAI_MODEL_PREFIXES):
continue
# Skip if model ID contains excluded substrings
if any(substring in model_id for substring in EXCLUDED_OPENAI_MODEL_SUBSTRINGS):
continue
# Include model (supports chat completions + tool calling)
filtered_models[model_id] = model_id
return filtered_models
def _fetch_openai_compatible_models(base_url: str, api_key: str) -> Dict[str, str]: