mirror of
https://github.com/prowler-cloud/prowler.git
synced 2026-05-06 08:47:18 +00:00
feat(codebuild): use batched API calls to prevent throttling and false positives (#10639)
Co-authored-by: Daniel Barranquero <danielbo2001@gmail.com>
This commit is contained in:
@@ -15,6 +15,7 @@ All notable changes to the **Prowler SDK** are documented in this file.
|
||||
- `route53_dangling_ip_subdomain_takeover` now also flags `CNAME` records pointing to S3 website endpoints whose buckets are missing from the account [(#10920)](https://github.com/prowler-cloud/prowler/pull/10920)
|
||||
- Azure Network Watcher flow log checks now require workspace-backed Traffic Analytics for `network_flow_log_captured_sent` and align metadata with VNet-compatible flow log guidance [(#10645)](https://github.com/prowler-cloud/prowler/pull/10645)
|
||||
- Azure compliance entries for legacy Network Watcher flow log controls now use retirement-aware guidance and point new deployments to VNet flow logs
|
||||
- AWS CodeBuild service now batches `BatchGetProjects` and `BatchGetBuilds` calls per region (up to 100 items per call) to reduce API call volume and prevent throttling-induced false positives in `codebuild_project_not_publicly_accessible` [(#10639)](https://github.com/prowler-cloud/prowler/pull/10639)
|
||||
- `display_compliance_table` dispatch switched from substring `in` checks to `startswith` to prevent false matches between similarly named frameworks (e.g. `cisa` vs `cis`) [(#10301)](https://github.com/prowler-cloud/prowler/pull/10301)
|
||||
|
||||
### 🐞 Fixed
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import datetime
|
||||
from concurrent.futures import as_completed
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic.v1 import BaseModel
|
||||
@@ -14,9 +15,9 @@ class Codebuild(AWSService):
|
||||
super().__init__(__class__.__name__, provider)
|
||||
self.projects = {}
|
||||
self.__threading_call__(self._list_projects)
|
||||
self.__threading_call__(self._list_builds_for_project, self.projects.values())
|
||||
self.__threading_call__(self._batch_get_builds, self.projects.values())
|
||||
self.__threading_call__(self._batch_get_projects, self.projects.values())
|
||||
self.__threading_call__(self._list_builds_for_project)
|
||||
self.__threading_call__(self._batch_get_builds)
|
||||
self.__threading_call__(self._batch_get_projects)
|
||||
self.report_groups = {}
|
||||
self.__threading_call__(self._list_report_groups)
|
||||
self.__threading_call__(
|
||||
@@ -44,10 +45,8 @@ class Codebuild(AWSService):
|
||||
f"{regional_client.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
|
||||
)
|
||||
|
||||
def _list_builds_for_project(self, project):
|
||||
logger.info("Codebuild - Listing builds...")
|
||||
def _fetch_project_last_build(self, regional_client, project):
|
||||
try:
|
||||
regional_client = self.regional_clients[project.region]
|
||||
build_ids = regional_client.list_builds_for_project(
|
||||
projectName=project.name
|
||||
).get("ids", [])
|
||||
@@ -58,28 +57,99 @@ class Codebuild(AWSService):
|
||||
f"{project.region}: {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
|
||||
)
|
||||
|
||||
def _batch_get_builds(self, project):
|
||||
logger.info("Codebuild - Getting builds...")
|
||||
def _list_builds_for_project(self, regional_client):
|
||||
logger.info("Codebuild - Listing builds...")
|
||||
try:
|
||||
if project.last_build and project.last_build.id:
|
||||
regional_client = self.regional_clients[project.region]
|
||||
builds_by_id = regional_client.batch_get_builds(
|
||||
ids=[project.last_build.id]
|
||||
).get("builds", [])
|
||||
if len(builds_by_id) > 0:
|
||||
project.last_invoked_time = builds_by_id[0].get("endTime")
|
||||
regional_projects = [
|
||||
project
|
||||
for project in self.projects.values()
|
||||
if project.region == regional_client.region
|
||||
]
|
||||
|
||||
# list_builds_for_project has no batch API equivalent, so reuse the
|
||||
# shared thread pool to issue per-project calls in parallel within
|
||||
# this region — preserving the wall-clock performance of the
|
||||
# previous implementation.
|
||||
futures = [
|
||||
self.thread_pool.submit(
|
||||
self._fetch_project_last_build, regional_client, project
|
||||
)
|
||||
for project in regional_projects
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
future.result()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
f"{regional_client.region}: {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
|
||||
f"{regional_client.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
|
||||
)
|
||||
|
||||
def _batch_get_projects(self, project):
|
||||
def _batch_get_builds(self, regional_client):
|
||||
logger.info("Codebuild - Getting builds...")
|
||||
try:
|
||||
# Collect all build IDs for this region
|
||||
build_id_to_project = {}
|
||||
for project in self.projects.values():
|
||||
if (
|
||||
project.region == regional_client.region
|
||||
and project.last_build
|
||||
and project.last_build.id
|
||||
):
|
||||
build_id_to_project[project.last_build.id] = project
|
||||
|
||||
if not build_id_to_project:
|
||||
return
|
||||
|
||||
build_ids = list(build_id_to_project.keys())
|
||||
|
||||
# batch_get_builds supports up to 100 IDs per call
|
||||
for i in range(0, len(build_ids), 100):
|
||||
batch = build_ids[i : i + 100]
|
||||
response = regional_client.batch_get_builds(ids=batch)
|
||||
for build_info in response.get("builds", []):
|
||||
build_id = build_info.get("id")
|
||||
if build_id in build_id_to_project:
|
||||
end_time = build_info.get("endTime")
|
||||
if end_time:
|
||||
build_id_to_project[build_id].last_invoked_time = end_time
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
f"{regional_client.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
|
||||
)
|
||||
|
||||
def _batch_get_projects(self, regional_client):
|
||||
logger.info("Codebuild - Getting projects...")
|
||||
try:
|
||||
regional_client = self.regional_clients[project.region]
|
||||
project_info = regional_client.batch_get_projects(names=[project.name])[
|
||||
"projects"
|
||||
][0]
|
||||
# Collect all project names for this region
|
||||
regional_projects = {
|
||||
arn: project
|
||||
for arn, project in self.projects.items()
|
||||
if project.region == regional_client.region
|
||||
}
|
||||
if not regional_projects:
|
||||
return
|
||||
|
||||
project_names = [project.name for project in regional_projects.values()]
|
||||
|
||||
# batch_get_projects supports up to 100 names per call
|
||||
for i in range(0, len(project_names), 100):
|
||||
batch = project_names[i : i + 100]
|
||||
response = regional_client.batch_get_projects(names=batch)
|
||||
for project_info in response.get("projects", []):
|
||||
project_arn = project_info.get("arn")
|
||||
if project_arn in regional_projects:
|
||||
self._parse_project_info(
|
||||
regional_projects[project_arn], project_info
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
f"{regional_client.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
|
||||
)
|
||||
|
||||
def _parse_project_info(self, project, project_info):
|
||||
try:
|
||||
project.buildspec = project_info["source"].get("buildspec")
|
||||
if project_info["source"]["type"] != "NO_SOURCE":
|
||||
project.source = Source(
|
||||
|
||||
@@ -45,11 +45,12 @@ def mock_make_api_call(self, operation_name, kwarg):
|
||||
elif operation_name == "ListBuildsForProject":
|
||||
return {"ids": [build_id]}
|
||||
elif operation_name == "BatchGetBuilds":
|
||||
return {"builds": [{"endTime": last_invoked_time}]}
|
||||
return {"builds": [{"id": build_id, "endTime": last_invoked_time}]}
|
||||
elif operation_name == "BatchGetProjects":
|
||||
return {
|
||||
"projects": [
|
||||
{
|
||||
"arn": project_arn,
|
||||
"source": {
|
||||
"type": source_type,
|
||||
"location": bitbucket_url,
|
||||
@@ -230,3 +231,97 @@ class Test_Codebuild_Service:
|
||||
assert (
|
||||
codebuild.report_groups[report_group_arn].tags[0]["value"] == project_name
|
||||
)
|
||||
|
||||
|
||||
# Module-level state and helpers used by the chunking/out-of-order test below.
|
||||
# Kept at module level so the API-call mock is a plain function rather than a
|
||||
# closure defined inside the test method.
|
||||
TOTAL_PROJECTS = 150
|
||||
many_project_names = [f"project-{i}" for i in range(TOTAL_PROJECTS)]
|
||||
many_project_arns = [
|
||||
f"arn:{AWS_COMMERCIAL_PARTITION}:codebuild:{AWS_REGION_EU_WEST_1}:{AWS_ACCOUNT_NUMBER}:project/{name}"
|
||||
for name in many_project_names
|
||||
]
|
||||
many_build_ids_for = {name: f"{name}:build-id" for name in many_project_names}
|
||||
many_end_times_for = {
|
||||
name: datetime.now() - timedelta(days=i)
|
||||
for i, name in enumerate(many_project_names)
|
||||
}
|
||||
many_name_by_build_id = {v: k for k, v in many_build_ids_for.items()}
|
||||
many_batch_call_sizes = {"BatchGetProjects": [], "BatchGetBuilds": []}
|
||||
|
||||
|
||||
def mock_make_api_call_many_projects(self, operation_name, kwarg):
|
||||
if operation_name == "ListProjects":
|
||||
return {"projects": many_project_names}
|
||||
if operation_name == "ListBuildsForProject":
|
||||
return {"ids": [many_build_ids_for[kwarg["projectName"]]]}
|
||||
if operation_name == "BatchGetBuilds":
|
||||
ids = kwarg["ids"]
|
||||
many_batch_call_sizes["BatchGetBuilds"].append(len(ids))
|
||||
# Reverse the response order to verify id->project mapping does not
|
||||
# depend on response ordering.
|
||||
builds = [
|
||||
{"id": bid, "endTime": many_end_times_for[many_name_by_build_id[bid]]}
|
||||
for bid in reversed(ids)
|
||||
]
|
||||
return {"builds": builds}
|
||||
if operation_name == "BatchGetProjects":
|
||||
names = kwarg["names"]
|
||||
many_batch_call_sizes["BatchGetProjects"].append(len(names))
|
||||
# Reverse the response order to verify arn->project mapping does not
|
||||
# depend on response ordering.
|
||||
projects = [
|
||||
{
|
||||
"arn": f"arn:{AWS_COMMERCIAL_PARTITION}:codebuild:{AWS_REGION_EU_WEST_1}:{AWS_ACCOUNT_NUMBER}:project/{name}",
|
||||
"source": {"type": "NO_SOURCE"},
|
||||
"logsConfig": {},
|
||||
"tags": [],
|
||||
"projectVisibility": "PRIVATE",
|
||||
}
|
||||
for name in reversed(names)
|
||||
]
|
||||
return {"projects": projects}
|
||||
if operation_name == "ListReportGroups":
|
||||
return {"reportGroups": []}
|
||||
return make_api_call(self, operation_name, kwarg)
|
||||
|
||||
|
||||
class Test_Codebuild_Service_Batching:
|
||||
@patch(
|
||||
"botocore.client.BaseClient._make_api_call",
|
||||
new=mock_make_api_call_many_projects,
|
||||
)
|
||||
@patch(
|
||||
"prowler.providers.aws.aws_provider.AwsProvider.generate_regional_clients",
|
||||
new=mock_generate_regional_clients,
|
||||
)
|
||||
@mock_aws
|
||||
def test_codebuild_batches_chunks_over_100_projects_and_maps_out_of_order_responses(
|
||||
self,
|
||||
):
|
||||
"""Verify _batch_get_projects/_batch_get_builds chunk in groups of 100
|
||||
and correctly map out-of-order batch responses back to the right
|
||||
project using `arn`/`id`.
|
||||
"""
|
||||
# Reset the per-test recorder (module-level state survives across runs).
|
||||
many_batch_call_sizes["BatchGetProjects"].clear()
|
||||
many_batch_call_sizes["BatchGetBuilds"].clear()
|
||||
|
||||
codebuild = Codebuild(set_mocked_aws_provider([AWS_REGION_EU_WEST_1]))
|
||||
|
||||
# Verify chunking: 150 items -> two batches of 100 and 50.
|
||||
assert sorted(many_batch_call_sizes["BatchGetProjects"]) == [50, 100]
|
||||
assert sorted(many_batch_call_sizes["BatchGetBuilds"]) == [50, 100]
|
||||
|
||||
# Verify all projects were tracked.
|
||||
assert len(codebuild.projects) == TOTAL_PROJECTS
|
||||
|
||||
# Verify out-of-order responses were correctly mapped back to the
|
||||
# right project by `arn` (projects) and `id` (builds).
|
||||
for name, arn in zip(many_project_names, many_project_arns):
|
||||
project = codebuild.projects[arn]
|
||||
assert project.name == name
|
||||
assert project.project_visibility == "PRIVATE"
|
||||
assert project.last_build == Build(id=many_build_ids_for[name])
|
||||
assert project.last_invoked_time == many_end_times_for[name]
|
||||
|
||||
Reference in New Issue
Block a user