Skip to content

Commit bae12d0

Browse files
committed
add cloudwatch dashboard
1 parent 3942baa commit bae12d0

File tree

3 files changed

+172
-4
lines changed

3 files changed

+172
-4
lines changed

aws_sra_examples/solutions/genai/bedrock_org/lambda/src/app.py

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import json
23
import os
34
import logging
@@ -57,14 +58,15 @@ def load_kms_key_policies() -> dict:
5758
def load_cloudwatch_oam_sink_policy() -> dict:
5859
with open("sra_cloudwatch_oam_sink_policy.json", "r") as file:
5960
return json.load(file)
60-
# ["sra-oam-sink-policy"]["Statement"][0]["Condition"]["ForAnyValue:StringEquals"]["aws:PrincipalOrgID"]
6161

6262

6363
def load_sra_cloudwatch_oam_trust_policy() -> dict:
6464
with open("sra_cloudwatch_oam_trust_policy.json", "r") as file:
6565
return json.load(file)
66-
# ["Statement"][0]["Principal"]["AWS"]
6766

67+
def load_sra_cloudwatch_dashboard() -> dict:
68+
with open("sra_cloudwatch_dashboard.json", "r") as file:
69+
return json.load(file)
6870

6971
# Global vars
7072
RESOURCE_TYPE: str = ""
@@ -84,6 +86,7 @@ def load_sra_cloudwatch_oam_trust_policy() -> dict:
8486
ACCOUNT: str = boto3.client("sts").get_caller_identity().get("Account")
8587
REGION: str = os.environ.get("AWS_REGION")
8688
CFN_RESOURCE_ID: str = "sra-bedrock-org-function"
89+
ALARM_SNS_KEY_ALIAS = "sra-alarm-sns-key"
8790

8891
# CFN_RESPONSE_DATA definition:
8992
# dry_run: bool - type of run
@@ -107,7 +110,7 @@ def load_sra_cloudwatch_oam_trust_policy() -> dict:
107110
KMS_KEY_POLICIES: dict = load_kms_key_policies()
108111
CLOUDWATCH_OAM_SINK_POLICY: dict = load_cloudwatch_oam_sink_policy()
109112
CLOUDWATCH_OAM_TRUST_POLICY: dict = load_sra_cloudwatch_oam_trust_policy()
110-
ALARM_SNS_KEY_ALIAS = "sra-alarm-sns-key"
113+
CLOUDWATCH_DASHBOARD: dict = load_sra_cloudwatch_dashboard()
111114

112115
# Instantiate sra class objects
113116
# todo(liamschn): can these files exist in some central location to be shared with other solutions?
@@ -325,6 +328,27 @@ def build_s3_metric_filter_pattern(bucket_names: list, filter_pattern_template:
325328
s3_filter = s3_filter.replace('&& ($.requestParameters.bucketName = "<BUCKET_NAME_PLACEHOLDER>")', "")
326329
return s3_filter
327330

331+
def build_cloudwatch_dashboard(dashboard_template, bedrock_accounts, regions):
332+
i = 0
333+
for bedrock_account in bedrock_accounts:
334+
for region in regions:
335+
if i == 0:
336+
injection_template = copy.deepcopy(dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][2])
337+
sensitive_info_template = copy.deepcopy(dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][3])
338+
else:
339+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"].append(copy.deepcopy(injection_template))
340+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"].append(copy.deepcopy(sensitive_info_template))
341+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][2 + i][2]["accountId"] = bedrock_account
342+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][2 + i][2]["region"] = region
343+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][3 + i][2]["accountId"] = bedrock_account
344+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][3 + i][2]["region"] = region
345+
i += 2
346+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][0][2]["accountId"] = sts.MANAGEMENT_ACCOUNT
347+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][0][2]["region"] = sts.HOME_REGION
348+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][1][2]["accountId"] = sts.MANAGEMENT_ACCOUNT
349+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][1][2]["region"] = sts.HOME_REGION
350+
dashboard_template["sra-bedrock-org"]["widgets"][0]["properties"]["region"] = sts.HOME_REGION
351+
return dashboard_template
328352

329353
def create_event(event, context):
330354
global DRY_RUN_DATA
@@ -704,7 +728,7 @@ def create_event(event, context):
704728
"OAMCrossAccountRolePolicyAttach"
705729
] = f"DRY_RUN: Attach {policy_arn} policy to {cloudwatch.CROSS_ACCOUNT_ROLE_NAME} IAM role"
706730

707-
# 5d) OAM link in bedrock account
731+
# 5e) OAM link in bedrock account
708732
cloudwatch.CWOAM_CLIENT = sts.assume_role(bedrock_account, sts.CONFIGURATION_ROLE, "oam", bedrock_region)
709733
search_oam_link = cloudwatch.find_oam_link(oam_sink_arn)
710734
if search_oam_link[0] is False:
@@ -721,6 +745,41 @@ def create_event(event, context):
721745
else:
722746
LOGGER.info("CloudWatch observability access manager link found")
723747

748+
# 6) Cloudwatch dashboard in security account
749+
cloudwatch_dashboard = build_cloudwatch_dashboard(CLOUDWATCH_DASHBOARD, central_observability_params["bedrock_accounts"], central_observability_params["regions"])
750+
cloudwatch.CLOUDWATCH_CLIENT = sts.assume_role(SECURITY_ACCOUNT, sts.CONFIGURATION_ROLE, "cloudwatch", sts.HOME_REGION)
751+
# sra-bedrock-filter-prompt-injection-metric template ["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][2]
752+
# sra-bedrock-filter-sensitive-info-metric template ["sra-bedrock-org"]["widgets"][0]["properties"]["metrics"][3]
753+
754+
search_dashboard = cloudwatch.find_dashboard(SOLUTION_NAME)
755+
if search_dashboard[0] is False:
756+
if DRY_RUN is False:
757+
LOGGER.info("CloudWatch observability dashboard not found, creating...")
758+
cloudwatch.create_dashboard(cloudwatch.SOLUTION_NAME, cloudwatch_dashboard)
759+
LIVE_RUN_DATA["CloudWatchDashboardCreate"] = "Created CloudWatch observability dashboard"
760+
CFN_RESPONSE_DATA["deployment_info"]["action_count"] += 1
761+
CFN_RESPONSE_DATA["deployment_info"]["resources_deployed"] += 1
762+
LOGGER.info("Created CloudWatch observability dashboard")
763+
else:
764+
LOGGER.info("DRY_RUN: CloudWatch observability dashboard not found, creating...")
765+
DRY_RUN_DATA["CloudWatchDashboardCreate"] = "DRY_RUN: Create CloudWatch observability dashboard"
766+
else:
767+
LOGGER.info(f"Cloudwatch dashboard already exists: {search_dashboard[1]}")
768+
# check_dashboard = cloudwatch.compare_dashboard(search_dashboard[1], cloudwatch_dashboard)
769+
# if check_dashboard is False:
770+
# if DRY_RUN is False:
771+
# LOGGER.info("CloudWatch observability dashboard needs updating...")
772+
# cloudwatch.create_dashboard(cloudwatch.SOLUTION_NAME, cloudwatch_dashboard)
773+
# LIVE_RUN_DATA["OAMDashboardUpdate"] = "Updated CloudWatch observability dashboard"
774+
# CFN_RESPONSE_DATA["deployment_info"]["action_count"] += 1
775+
# CFN_RESPONSE_DATA["deployment_info"]["configuration_changes"] += 1
776+
# LOGGER.info("Updated CloudWatch observability dashboard")
777+
# else:
778+
# LOGGER.info("DRY_RUN: CloudWatch observability dashboard needs updating...")
779+
# DRY_RUN_DATA["OAMDashboardUpdate"] = "DRY_RUN: Update CloudWatch observability dashboard"
780+
# else:
781+
# LOGGER.info("CloudWatch observability dashboard is correct")
782+
724783
# End
725784
# TODO(liamschn): Consider the 256 KB limit for any cloudwatch log message
726785
if DRY_RUN is False:

aws_sra_examples/solutions/genai/bedrock_org/lambda/src/sra_cloudwatch.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,3 +407,69 @@ def delete_oam_link(self, link_arn: str) -> None:
407407
except ClientError as e:
408408
self.LOGGER.info(self.UNEXPECTED)
409409
raise ValueError(f"Unexpected error executing Lambda function. {e}") from None
410+
411+
def find_dashboard(self, dashboard_name: str) -> tuple[bool, str]:
412+
"""Find the CloudWatch dashboard for SRA in the organization.
413+
414+
Args:
415+
dashboard_name (str): name of the dashboard
416+
417+
Returns:
418+
tuple[bool, str]: True if the dashboard is found, False if not, and the dashboard ARN
419+
"""
420+
try:
421+
response = self.CLOUDWATCH_CLIENT.list_dashboards()
422+
for dashboard in response["DashboardEntries"]:
423+
if dashboard["DashboardName"] == dashboard_name:
424+
self.LOGGER.info(f"CloudWatch dashboard {dashboard_name} found: {dashboard['DashboardArn']}")
425+
return True, dashboard["DashboardArn"]
426+
self.LOGGER.info(f"CloudWatch dashboard {dashboard_name} not found")
427+
return False, ""
428+
except ClientError as error:
429+
if error.response["Error"]["Code"] == "ResourceNotFoundException":
430+
self.LOGGER.info(f"CloudWatch dashboard {dashboard_name} not found. Error code: {error.response['Error']['Code']}")
431+
return False, ""
432+
else:
433+
self.LOGGER.info(self.UNEXPECTED)
434+
raise ValueError(f"Unexpected error executing Lambda function. {error}") from None
435+
436+
def create_dashboard(self, dashboard_name: str, dashboard_body: dict) -> str:
437+
"""Create the CloudWatch dashboard for SRA in the organization.
438+
439+
Args:
440+
dashboard_name (str): name of the dashboard
441+
dashboard_body (str): body of the dashboard
442+
443+
Returns:
444+
str: ARN of the created dashboard
445+
"""
446+
try:
447+
response = self.CLOUDWATCH_CLIENT.put_dashboard(
448+
DashboardName=dashboard_name,
449+
DashboardBody=json.dumps(dashboard_body)
450+
)
451+
self.LOGGER.info(f"CloudWatch dashboard {dashboard_name} created: {response['DashboardArn']}")
452+
return response["DashboardArn"]
453+
except ClientError as error:
454+
if error.response["Error"]["Code"] == "ResourceAlreadyExistsException":
455+
self.LOGGER.info(f"CloudWatch dashboard {dashboard_name} already exists")
456+
return self.find_dashboard(dashboard_name)[1]
457+
else:
458+
self.LOGGER.info(self.UNEXPECTED)
459+
raise ValueError(f"Unexpected error executing Lambda function. {error}") from None
460+
461+
def delete_dashboard(self, dashboard_arn: str) -> None:
462+
"""Delete the CloudWatch dashboard for SRA in the organization.
463+
464+
Args:
465+
dashboard_arn (str): ARN of the dashboard
466+
467+
Returns:
468+
None
469+
"""
470+
try:
471+
self.CLOUDWATCH_CLIENT.delete_dashboards(DashboardNames=[dashboard_arn])
472+
self.LOGGER.info(f"CloudWatch dashboard {dashboard_arn} deleted")
473+
except ClientError as e:
474+
self.LOGGER.info(self.UNEXPECTED)
475+
raise ValueError(f"Unexpected error executing Lambda function. {e}") from None
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"sra-bedrock-org": {
3+
"widgets": [
4+
{
5+
"height": 9,
6+
"width": 24,
7+
"y": 0,
8+
"x": 0,
9+
"type": "metric",
10+
"properties": {
11+
"metrics": [
12+
[
13+
".",
14+
"sra-bedrock-filter-bucket-changes-metric",
15+
{ "accountId": "<MANAGEMENT_ACCOUNT>", "region": "<HOME_REGION>" }
16+
],
17+
[
18+
".",
19+
"sra-bedrock-filter-service-changes-metric",
20+
{ "accountId": "<MANAGEMENT_ACCOUNT>", "region": "<HOME_REGION>" }
21+
],
22+
[
23+
"sra-bedrock",
24+
"sra-bedrock-filter-prompt-injection-metric",
25+
{ "accountId": "<BEDROCK_ACCOUNT>", "region": "<REGION>" }
26+
],
27+
[
28+
".",
29+
"sra-bedrock-filter-sensitive-info-metric",
30+
{ "accountId": "<BEDROCK_ACCOUNT>", "region": "<REGION>" }
31+
]
32+
],
33+
"view": "timeSeries",
34+
"stacked": false,
35+
"region": "<HOME_REGION>",
36+
"title": "SRA Bedrock Generative AI Metrics and Alarms",
37+
"period": 1,
38+
"stat": "Sum"
39+
}
40+
}
41+
]
42+
}
43+
}

0 commit comments

Comments
 (0)