feat(findings): Improve /findings/metadata performance (#6748)

This commit is contained in:
Víctor Fernández Poyatos
2025-01-30 13:31:43 +01:00
committed by GitHub
parent c159a28016
commit 5061da6897
11 changed files with 178 additions and 232 deletions

55
api/poetry.lock generated
View File

@@ -252,6 +252,20 @@ files = [
[package.dependencies]
cryptography = "*"
[[package]]
name = "autopep8"
version = "2.3.2"
description = "A tool that automatically formats Python code to conform to the PEP 8 style guide"
optional = false
python-versions = ">=3.9"
files = [
{file = "autopep8-2.3.2-py2.py3-none-any.whl", hash = "sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128"},
{file = "autopep8-2.3.2.tar.gz", hash = "sha256:89440a4f969197b69a995e4ce0661b031f455a9f776d2c5ba3dbd83466931758"},
]
[package.dependencies]
pycodestyle = ">=2.12.0"
[[package]]
name = "awsipranges"
version = "0.3.3"
@@ -1488,6 +1502,23 @@ docs = ["Sphinx (==2.2.0)", "docutils (<0.18)", "sphinx-rtd-theme (==0.4.3)"]
publish = ["build (==0.7.0)", "twine (==3.7.1)"]
test = ["coveralls (==3.3.0)", "dj-database-url (==0.5.0)", "freezegun (==1.1.0)", "psycopg2 (>=2.8.4,<3.0.0)", "pytest (==6.2.5)", "pytest-benchmark (==3.4.1)", "pytest-cov (==3.0.0)", "pytest-django (==4.4.0)", "pytest-freezegun (==0.4.2)", "pytest-lazy-fixture (==0.6.3)", "snapshottest (==0.6.0)", "tox (==3.24.4)"]
[[package]]
name = "django-silk"
version = "5.3.2"
description = "Silky smooth profiling for the Django Framework"
optional = false
python-versions = ">=3.9"
files = [
{file = "django_silk-5.3.2-py3-none-any.whl", hash = "sha256:49f1caebfda28b1707f0cfef524e0476beb82b8c5e40f5ccff7f73a6b4f6d3ac"},
{file = "django_silk-5.3.2.tar.gz", hash = "sha256:b0db54eebedb8d16f572321bd6daccac0bd3f547ae2618bb45d96fe8fc02229d"},
]
[package.dependencies]
autopep8 = "*"
Django = ">=4.2"
gprof2dot = ">=2017.09.19"
sqlparse = "*"
[[package]]
name = "django-timezone-field"
version = "7.1"
@@ -1984,6 +2015,17 @@ protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4
[package.extras]
grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
[[package]]
name = "gprof2dot"
version = "2024.6.6"
description = "Generate a dot graph from the output of several profilers."
optional = false
python-versions = ">=3.8"
files = [
{file = "gprof2dot-2024.6.6-py2.py3-none-any.whl", hash = "sha256:45b14ad7ce64e299c8f526881007b9eb2c6b75505d5613e96e66ee4d5ab33696"},
{file = "gprof2dot-2024.6.6.tar.gz", hash = "sha256:fa1420c60025a9eb7734f65225b4da02a10fc6dd741b37fa129bc6b41951e5ab"},
]
[[package]]
name = "grapheme"
version = "0.6.0"
@@ -3603,6 +3645,17 @@ files = [
[package.dependencies]
pyasn1 = ">=0.4.6,<0.7.0"
[[package]]
name = "pycodestyle"
version = "2.12.1"
description = "Python style guide checker"
optional = false
python-versions = ">=3.8"
files = [
{file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"},
{file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"},
]
[[package]]
name = "pycparser"
version = "2.22"
@@ -5070,4 +5123,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.11,<3.13"
content-hash = "af973deaabe32ea4179c44ae702f6a8cc359aabe6f1641dcf2b05534b0bbb3d5"
content-hash = "6465edb36efd1fa6db06d4103fea8046951acc3f4f8b357facaaa34ae2bc74bd"

View File

@@ -8,7 +8,7 @@ description = "Prowler's API (Django/DRF)"
license = "Apache-2.0"
name = "prowler-api"
package-mode = false
version = "1.3.1"
version = "1.3.2"
[tool.poetry.dependencies]
celery = {extras = ["pytest"], version = "^5.4.0"}
@@ -37,6 +37,7 @@ uuid6 = "2024.7.10"
[tool.poetry.group.dev.dependencies]
bandit = "1.7.9"
coverage = "7.5.4"
django-silk = "5.3.2"
docker = "7.1.0"
freezegun = "1.5.1"
mypy = "1.10.1"

View File

@@ -4,13 +4,17 @@ class MainRouter:
def db_for_read(self, model, **hints): # noqa: F841
model_table_name = model._meta.db_table
if model_table_name.startswith("django_"):
if model_table_name.startswith("django_") or model_table_name.startswith(
"silk_"
):
return self.admin_db
return None
def db_for_write(self, model, **hints): # noqa: F841
model_table_name = model._meta.db_table
if model_table_name.startswith("django_"):
if model_table_name.startswith("django_") or model_table_name.startswith(
"silk_"
):
return self.admin_db
return None

View File

@@ -319,26 +319,27 @@ class FindingFilter(FilterSet):
field_name="resources__type", lookup_expr="icontains"
)
resource_tag_key = CharFilter(field_name="resources__tags__key")
resource_tag_key__in = CharInFilter(
field_name="resources__tags__key", lookup_expr="in"
)
resource_tag_key__icontains = CharFilter(
field_name="resources__tags__key", lookup_expr="icontains"
)
resource_tag_value = CharFilter(field_name="resources__tags__value")
resource_tag_value__in = CharInFilter(
field_name="resources__tags__value", lookup_expr="in"
)
resource_tag_value__icontains = CharFilter(
field_name="resources__tags__value", lookup_expr="icontains"
)
resource_tags = CharInFilter(
method="filter_resource_tag",
lookup_expr="in",
help_text="Filter by resource tags `key:value` pairs.\nMultiple values may be "
"separated by commas.",
)
# Temporarily disabled until we implement tag filtering in the UI
# resource_tag_key = CharFilter(field_name="resources__tags__key")
# resource_tag_key__in = CharInFilter(
# field_name="resources__tags__key", lookup_expr="in"
# )
# resource_tag_key__icontains = CharFilter(
# field_name="resources__tags__key", lookup_expr="icontains"
# )
# resource_tag_value = CharFilter(field_name="resources__tags__value")
# resource_tag_value__in = CharInFilter(
# field_name="resources__tags__value", lookup_expr="in"
# )
# resource_tag_value__icontains = CharFilter(
# field_name="resources__tags__value", lookup_expr="icontains"
# )
# resource_tags = CharInFilter(
# method="filter_resource_tag",
# lookup_expr="in",
# help_text="Filter by resource tags `key:value` pairs.\nMultiple values may be "
# "separated by commas.",
# )
scan = UUIDFilter(method="filter_scan_id")
scan__in = UUIDInFilter(method="filter_scan_id_in")
@@ -374,12 +375,6 @@ class FindingFilter(FilterSet):
},
}
@property
def qs(self):
# Force distinct results to prevent duplicates with many-to-many relationships
parent_qs = super().qs
return parent_qs.distinct()
# Convert filter values to UUIDv7 values for use with partitioning
def filter_scan_id(self, queryset, name, value):
try:

View File

@@ -1,7 +1,7 @@
openapi: 3.0.3
info:
title: Prowler API
version: 1.3.1
version: 1.3.2
description: |-
Prowler API specification.
@@ -478,51 +478,6 @@ paths:
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tag_key]
schema:
type: string
- in: query
name: filter[resource_tag_key__icontains]
schema:
type: string
- in: query
name: filter[resource_tag_key__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tag_value]
schema:
type: string
- in: query
name: filter[resource_tag_value__icontains]
schema:
type: string
- in: query
name: filter[resource_tag_value__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tags]
schema:
type: array
items:
type: string
description: |-
Filter by resource tags `key:value` pairs.
Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_type]
schema:
@@ -1028,51 +983,6 @@ paths:
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tag_key]
schema:
type: string
- in: query
name: filter[resource_tag_key__icontains]
schema:
type: string
- in: query
name: filter[resource_tag_key__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tag_value]
schema:
type: string
- in: query
name: filter[resource_tag_value__icontains]
schema:
type: string
- in: query
name: filter[resource_tag_value__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tags]
schema:
type: array
items:
type: string
description: |-
Filter by resource tags `key:value` pairs.
Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_type]
schema:
@@ -1280,7 +1190,6 @@ paths:
- services
- regions
- resource_types
- tags
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
@@ -1498,51 +1407,6 @@ paths:
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tag_key]
schema:
type: string
- in: query
name: filter[resource_tag_key__icontains]
schema:
type: string
- in: query
name: filter[resource_tag_key__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tag_value]
schema:
type: string
- in: query
name: filter[resource_tag_value__icontains]
schema:
type: string
- in: query
name: filter[resource_tag_value__in]
schema:
type: array
items:
type: string
description: Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_tags]
schema:
type: array
items:
type: string
description: |-
Filter by resource tags `key:value` pairs.
Multiple values may be separated by commas.
explode: false
style: form
- in: query
name: filter[resource_type]
schema:
@@ -6081,13 +5945,10 @@ components:
type: array
items:
type: string
tags:
description: Tags are described as key-value pairs.
required:
- services
- regions
- resource_types
- tags
FindingMetadataResponse:
type: object
properties:

View File

@@ -2454,15 +2454,16 @@ class TestFindingViewSet:
("search", "ec2", 2),
# full text search on finding tags
("search", "value2", 2),
("resource_tag_key", "key", 2),
("resource_tag_key__in", "key,key2", 2),
("resource_tag_key__icontains", "key", 2),
("resource_tag_value", "value", 2),
("resource_tag_value__in", "value,value2", 2),
("resource_tag_value__icontains", "value", 2),
("resource_tags", "key:value", 2),
("resource_tags", "not:exists", 0),
("resource_tags", "not:exists,key:value", 2),
# Temporary disabled until we implement tag filtering in the UI
# ("resource_tag_key", "key", 2),
# ("resource_tag_key__in", "key,key2", 2),
# ("resource_tag_key__icontains", "key", 2),
# ("resource_tag_value", "value", 2),
# ("resource_tag_value__in", "value,value2", 2),
# ("resource_tag_value__icontains", "value", 2),
# ("resource_tags", "key:value", 2),
# ("resource_tags", "not:exists", 0),
# ("resource_tags", "not:exists,key:value", 2),
]
),
)
@@ -2611,7 +2612,8 @@ class TestFindingViewSet:
expected_services = {"ec2", "s3"}
expected_regions = {"eu-west-1", "us-east-1"}
expected_tags = {"key": ["value"], "key2": ["value2"]}
# Temporarily disabled until we implement tag filtering in the UI
# expected_tags = {"key": ["value"], "key2": ["value2"]}
expected_resource_types = {"prowler-test"}
assert data["data"]["type"] == "findings-metadata"
@@ -2621,7 +2623,7 @@ class TestFindingViewSet:
assert (
set(data["data"]["attributes"]["resource_types"]) == expected_resource_types
)
assert data["data"]["attributes"]["tags"] == expected_tags
# assert data["data"]["attributes"]["tags"] == expected_tags
def test_findings_metadata_severity_retrieve(
self, authenticated_client, findings_fixture
@@ -2638,7 +2640,8 @@ class TestFindingViewSet:
expected_services = {"s3"}
expected_regions = {"eu-west-1"}
expected_tags = {"key": ["value"], "key2": ["value2"]}
# Temporary disabled until we implement tag filtering in the UI
# expected_tags = {"key": ["value"], "key2": ["value2"]}
expected_resource_types = {"prowler-test"}
assert data["data"]["type"] == "findings-metadata"
@@ -2648,7 +2651,7 @@ class TestFindingViewSet:
assert (
set(data["data"]["attributes"]["resource_types"]) == expected_resource_types
)
assert data["data"]["attributes"]["tags"] == expected_tags
# assert data["data"]["attributes"]["tags"] == expected_tags
def test_findings_metadata_future_date(self, authenticated_client):
response = authenticated_client.get(
@@ -2660,7 +2663,8 @@ class TestFindingViewSet:
assert data["data"]["id"] is None
assert data["data"]["attributes"]["services"] == []
assert data["data"]["attributes"]["regions"] == []
assert data["data"]["attributes"]["tags"] == {}
# Temporary disabled until we implement tag filtering in the UI
# assert data["data"]["attributes"]["tags"] == {}
assert data["data"]["attributes"]["resource_types"] == []
def test_findings_metadata_invalid_date(self, authenticated_client):

View File

@@ -933,7 +933,8 @@ class FindingMetadataSerializer(serializers.Serializer):
resource_types = serializers.ListField(
child=serializers.CharField(), allow_empty=True
)
tags = serializers.JSONField(help_text="Tags are described as key-value pairs.")
# Temporarily disabled until we implement tag filtering in the UI
# tags = serializers.JSONField(help_text="Tags are described as key-value pairs.")
class Meta:
resource_name = "findings-metadata"

View File

@@ -1,30 +1,31 @@
from django.conf import settings
from django.urls import include, path
from drf_spectacular.views import SpectacularRedocView
from rest_framework_nested import routers
from api.v1.views import (
ComplianceOverviewViewSet,
CustomTokenObtainView,
CustomTokenRefreshView,
FindingViewSet,
MembershipViewSet,
ProviderGroupViewSet,
ProviderGroupProvidersRelationshipView,
ProviderSecretViewSet,
InvitationViewSet,
InvitationAcceptViewSet,
RoleViewSet,
RoleProviderGroupRelationshipView,
UserRoleRelationshipView,
InvitationViewSet,
MembershipViewSet,
OverviewViewSet,
ComplianceOverviewViewSet,
ProviderGroupProvidersRelationshipView,
ProviderGroupViewSet,
ProviderSecretViewSet,
ProviderViewSet,
ResourceViewSet,
RoleProviderGroupRelationshipView,
RoleViewSet,
ScanViewSet,
ScheduleViewSet,
SchemaView,
TaskViewSet,
TenantMembersViewSet,
TenantViewSet,
UserRoleRelationshipView,
UserViewSet,
)
@@ -112,3 +113,6 @@ urlpatterns = [
path("schema", SchemaView.as_view(), name="schema"),
path("docs", SpectacularRedocView.as_view(url_name="schema"), name="docs"),
]
if settings.DEBUG:
urlpatterns += [path("silk/", include("silk.urls", namespace="silk"))]

View File

@@ -4,7 +4,7 @@ from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import Count, F, OuterRef, Prefetch, Q, Subquery, Sum
from django.db.models.functions import Coalesce, JSONObject
from django.db.models.functions import Coalesce
from django.urls import reverse
from django.utils.decorators import method_decorator
from django.views.decorators.cache import cache_control
@@ -193,7 +193,7 @@ class SchemaView(SpectacularAPIView):
def get(self, request, *args, **kwargs):
spectacular_settings.TITLE = "Prowler API"
spectacular_settings.VERSION = "1.3.1"
spectacular_settings.VERSION = "1.3.2"
spectacular_settings.DESCRIPTION = (
"Prowler API specification.\n\nThis file is auto-generated."
)
@@ -1392,48 +1392,59 @@ class FindingViewSet(BaseRLSViewSet):
@action(detail=False, methods=["get"], url_name="metadata")
def metadata(self, request):
tenant_id = self.request.tenant_id
queryset = self.get_queryset()
filtered_queryset = self.filter_queryset(queryset)
result = filtered_queryset.aggregate(
services=ArrayAgg("resources__service", flat=True, distinct=True),
regions=ArrayAgg("resources__region", flat=True, distinct=True),
tags=ArrayAgg(
JSONObject(
key=F("resources__tags__key"), value=F("resources__tags__value")
),
distinct=True,
filter=Q(resources__tags__key__isnull=False),
),
resource_types=ArrayAgg("resources__type", flat=True, distinct=True),
)
if result["services"] is None:
result["services"] = []
if result["regions"] is None:
result["regions"] = []
if result["regions"] is None:
result["regions"] = []
if result["resource_types"] is None:
result["resource_types"] = []
if result["tags"] is None:
result["tags"] = []
relevant_resources = Resource.objects.filter(
tenant_id=tenant_id, findings__in=filtered_queryset
).distinct()
tags_dict = {}
for t in result["tags"]:
key, value = t["key"], t["value"]
if key not in tags_dict:
tags_dict[key] = []
tags_dict[key].append(value)
result["tags"] = tags_dict
serializer = self.get_serializer(
data=result,
services = (
relevant_resources.values_list("service", flat=True)
.distinct()
.order_by("service")
)
regions = (
relevant_resources.exclude(region="")
.values_list("region", flat=True)
.distinct()
.order_by("region")
)
resource_types = (
relevant_resources.values_list("type", flat=True)
.distinct()
.order_by("type")
)
# Temporarily disabled until we implement tag filtering in the UI
# tag_data = (
# relevant_resources
# .filter(tags__key__isnull=False, tags__value__isnull=False)
# .exclude(tags__key="")
# .exclude(tags__value="")
# .values("tags__key", "tags__value")
# .distinct()
# .order_by("tags__key", "tags__value")
# )
#
# tags_dict = {}
# for row in tag_data:
# k, v = row["tags__key"], row["tags__value"]
# tags_dict.setdefault(k, []).append(v)
result = {
"services": list(services),
"regions": list(regions),
"resource_types": list(resource_types),
# "tags": tags_dict
}
serializer = self.get_serializer(data=result)
serializer.is_valid(raise_exception=True)
return Response(data=serializer.data, status=status.HTTP_200_OK)
return Response(serializer.data, status=status.HTTP_200_OK)
@extend_schema_view(

View File

@@ -37,3 +37,9 @@ REST_FRAMEWORK["DEFAULT_FILTER_BACKENDS"] = tuple( # noqa: F405
) + ("api.filters.CustomDjangoFilterBackend",)
SECRETS_ENCRYPTION_KEY = "ZMiYVo7m4Fbe2eXXPyrwxdJss2WSalXSv3xHBcJkPl0="
MIDDLEWARE += [ # noqa: F405
"silk.middleware.SilkyMiddleware",
]
INSTALLED_APPS += ["silk"] # noqa: F405

View File

@@ -152,6 +152,9 @@ def perform_prowler_scan(
for progress, findings in prowler_scan.scan():
for finding in findings:
if finding is None:
logger.error(f"None finding detected on scan {scan_id}.")
continue
for attempt in range(CELERY_DEADLOCK_ATTEMPTS):
try:
with rls_transaction(tenant_id):
@@ -176,7 +179,10 @@ def perform_prowler_scan(
# Update resource fields if necessary
updated_fields = []
if resource_instance.region != finding.region:
if (
finding.region
and resource_instance.region != finding.region
):
resource_instance.region = finding.region
updated_fields.append("region")
if resource_instance.service != finding.service_name: