Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
7009920
Add customMetrics to telemetry validation queries in validate_ai.sh
tokaplan Oct 31, 2025
6340d66
Fix syntax error in queries array declaration in validate_ai.sh
tokaplan Oct 31, 2025
0b0678c
Merge branch 'ai_prod' into alkaplan/extend-validation-pipeline
tokaplan Nov 7, 2025
b4870f3
Extend validation pipeline with OTEL validation
tokaplan Nov 7, 2025
ce66d21
Update applicationInsightsConnectionString in appmonitoring-cr.yaml f…
tokaplan Nov 7, 2025
22240dc
Enhance telemetry validation scripts to include HTTP status codes in …
tokaplan Nov 7, 2025
a57d23b
Add client_id logging to AI and OTEL validation scripts
tokaplan Nov 7, 2025
be56f53
Refactor AI and OTEL telemetry validation scripts; consolidate checks…
tokaplan Nov 7, 2025
c54e725
Add Go instrumented test app and update validation scripts for integr…
tokaplan Nov 7, 2025
8f9840a
Update azure_pipeline_validation_appmonitoring.yaml for improved clar…
tokaplan Nov 8, 2025
68c3f35
Update Helm push command in Chart.yaml to simplify the path
tokaplan Nov 8, 2025
d0cab23
Update telemetry validation script to include OTelLogs and adjust Hel…
tokaplan Nov 8, 2025
62da549
Update telemetry validation script to include OTelLogs and adjust Hel…
tokaplan Nov 8, 2025
32e749a
Enhance telemetry validation scripts to include Go app support; updat…
tokaplan Nov 8, 2025
5486df4
Merge branch 'alkaplan/extend-validation-pipeline' of https://github.…
tokaplan Nov 8, 2025
a074799
Remove .tgz files from repository
tokaplan Nov 8, 2025
c7ba2f2
Enhance error handling in Node.js test server; record exceptions in O…
tokaplan Nov 8, 2025
736ba8e
Update cow type in metrics counter for Node.js test server
tokaplan Nov 8, 2025
0f4f3e7
Adjust error simulation probability in /call-target endpoint to 20%
tokaplan Nov 8, 2025
bbb6ffe
Add AMW metrics validation and update instrumentation for multiple la…
tokaplan Nov 8, 2025
f176ea7
Add OpenTelemetry environment variable for custom metrics in dotnet t…
tokaplan Nov 8, 2025
0c8d814
Add Helm push commands for test applications in Chart.yaml files
tokaplan Nov 8, 2025
9477cb6
Enhance AMW validation script with detailed access token decoding and…
tokaplan Nov 8, 2025
cfdac5f
Update Prometheus query to use service.instance.id label for pod name
tokaplan Nov 8, 2025
39c3dca
Update Prometheus query to use service.instance.id label for pod name
tokaplan Nov 8, 2025
a6c7ab0
Exclude sdk.version in Prometheus query for cows_sold_total metric va…
tokaplan Nov 8, 2025
0faccce
Add OpenTelemetry metrics for cows sold in .NET application
tokaplan Nov 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions .pipelines/azure_pipeline_validation_appmonitoring.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ variables:
pythonTestAppName: 'python-test-app'
dotnetTestAppImageName: '${{ variables.containerRegistry }}.azurecr.io/demoaks-dotnet-app:latest'
dotnetTestAppName: 'dotnet-test-app'
goTestAppImageName: '${{ variables.containerRegistry }}.azurecr.io/demoaks-go-instrumented-app:latest'
goTestAppName: 'go-instrumented-test-app'
testNamespace: 'test-ns'
aiResourceId: '/subscriptions/5a3b3ba4-3a42-42ae-b2cb-f882345803bc/resourceGroups/aks-appmonitoring-pipeline/providers/microsoft.insights/components/appmonitoring-pipeline-validation-ai'
aiResourceId: '/subscriptions/5a3b3ba4-3a42-42ae-b2cb-f882345803bc/resourcegroups/aks-appmonitoring-pipeline/providers/microsoft.insights/components/appmonitoring-pipeline-validation-ai-otel'
lawResourceId: '/subscriptions/5a3b3ba4-3a42-42ae-b2cb-f882345803bc/resourcegroups/ai_appmonitoring-pipeline-validation-ai-ote_37743a46-5226-447c-842b-35fac54dbd92_managed/providers/microsoft.operationalinsights/workspaces/managed-appmonitoring-pipeline-validation-ai-otel-ws'
amwQueryEndpoint: 'https://managed-appmonitoring-pipeline-validatio-amw-axfudjacdrgbe5ht.eastus.prometheus.monitor.azure.com'
Codeql.Enabled: true
Codeql.BuildIdentifier: 'linuxbuild'
AKSResourceGroup: 'aks-appmonitoring-pipeline'
Expand Down Expand Up @@ -296,11 +300,13 @@ jobs:
export NODEJS_TEST_IMAGE_NAME=${{ variables.nodeTestAppImageName }}
export PYTHON_TEST_IMAGE_NAME=${{ variables.pythonTestAppImageName }}
export DOTNET_TEST_IMAGE_NAME=${{ variables.dotnetTestAppImageName }}
export GO_TEST_IMAGE_NAME=${{ variables.goTestAppImageName }}

export JAVA_TEST_APP_NAME="${{ variables.javaTestAppName }}"
export NODEJS_TEST_APP_NAME="${{ variables.nodeTestAppName }}"
export PYTHON_TEST_APP_NAME="${{ variables.pythonTestAppName }}"
export DOTNET_TEST_APP_NAME="${{ variables.dotnetTestAppName }}"
export GO_TEST_APP_NAME="${{ variables.goTestAppName }}"

export TEST_APP_SOURCE_NAME="nodejs-source-app"
export NODEJS_CALLER_APP_NAME="nodejs-caller-app"
Expand Down Expand Up @@ -328,13 +334,13 @@ jobs:

sudo chmod u+x ./validate-mutation.sh

if ! ./validate-mutation.sh ${{ variables.javaTestAppName }} ${{ variables.nodeTestAppName }} ${{ variables.pythonTestAppName }} ${{ variables.dotnetTestAppName }} ${{ variables.testNamespace }}; then
if ! ./validate-mutation.sh ${{ variables.javaTestAppName }} ${{ variables.nodeTestAppName }} ${{ variables.pythonTestAppName }} ${{ variables.dotnetTestAppName }} ${{ variables.goTestAppName }} ${{ variables.testNamespace }}; then
echo "Mutation validation failed"
exit 1
fi

- task: AzureCLI@2
displayName: "Check test apps are sending telemetry to AI"
displayName: "Check test apps are sending AI and OTEL telemetry"
inputs:
azureSubscription: ${{ variables.armServiceConnectionName }}
scriptType: bash
Expand All @@ -348,17 +354,29 @@ jobs:
export NODEJS_TEST_APP_NAME="${{ variables.nodeTestAppName }}"
export PYTHON_TEST_APP_NAME="${{ variables.pythonTestAppName }}"
export DOTNET_TEST_APP_NAME="${{ variables.dotnetTestAppName }}"
export GO_TEST_APP_NAME="${{ variables.goTestAppName }}"

echo "Wait 30s for telemetry to flow..."
sleep 30

sudo chmod u+x ./validate_ai.sh

if ! ./validate_ai.sh ${{ variables.aiResourceId }} ${{ variables.testNamespace }}; then
echo "Validating AI telemetry..."
if ! ./validate_ai.sh ${{ variables.lawResourceId }} ${{ variables.testNamespace }} "java,nodejs,python,dotnet" "AppRoleInstance" "AppRequests" "AppDependencies" "AppMetrics" "AppExceptions"; then
echo "AI telemetry validation failed"
exit 1
fi

echo "Validating OTEL telemetry..."
if ! ./validate_ai.sh ${{ variables.lawResourceId }} ${{ variables.testNamespace }} "go" "ServiceInstanceId" "OTelSpans" "OTelResources" "OTelLogs"; then
echo "OTEL telemetry validation failed"
exit 1
fi

echo "Validating AMW metrics..."
sudo chmod u+x ./validate_amw.sh
if ! ./validate_amw.sh ${{ variables.amwQueryEndpoint }} ${{ variables.testNamespace }} "java,nodejs,python,dotnet,go"; then
echo "AMW metrics validation failed"
exit 1
fi

- task: AzureCLI@2
displayName: "Validate Housekeeper Cron Job"
inputs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@ variables:
pythonTestAppName: 'python-test-app'
dotnetTestAppImageName: '${{ variables.containerRegistry }}.azurecr.io/demoaks-dotnet-app:latest'
dotnetTestAppName: 'dotnet-test-app'
goTestAppImageName: '${{ variables.containerRegistry }}.azurecr.io/demoaks-go-instrumented-app:latest'
goTestAppName: 'go-instrumented-test-app'
testNamespace: 'test-ns'
aiConnectionString: 'InstrumentationKey=2b453402-fcfb-408f-8495-c551f0e82f46;IngestionEndpoint=https://eastus-8.in.applicationinsights.azure.com/;LiveEndpoint=https://eastus.livediagnostics.monitor.azure.com/'
aiResourceId: '/subscriptions/5a3b3ba4-3a42-42ae-b2cb-f882345803bc/resourceGroups/aks-appmonitoring-pipeline/providers/microsoft.insights/components/appmonitoring-pipeline-validation-ai'
aiResourceId: '/subscriptions/5a3b3ba4-3a42-42ae-b2cb-f882345803bc/resourceGroups/aks-appmonitoring-pipeline/providers/microsoft.insights/components/appmonitoring-pipeline-validation-ai-otel'
lawResourceId: '/subscriptions/5a3b3ba4-3a42-42ae-b2cb-f882345803bc/resourcegroups/ai_appmonitoring-pipeline-validation-ai-ote_37743a46-5226-447c-842b-35fac54dbd92_managed/providers/microsoft.operationalinsights/workspaces/managed-appmonitoring-pipeline-validation-ai-otel-ws'
amwQueryEndpoint: 'https://managed-appmonitoring-pipeline-validatio-amw-axfudjacdrgbe5ht.eastus.prometheus.monitor.azure.com'
Codeql.Enabled: true
Codeql.BuildIdentifier: 'linuxbuild'
AKSResourceGroup: 'aks-appmonitoring-pipeline'
Expand Down Expand Up @@ -348,11 +352,13 @@ jobs:
export NODEJS_TEST_IMAGE_NAME=${{ variables.nodeTestAppImageName }}
export PYTHON_TEST_IMAGE_NAME=${{ variables.pythonTestAppImageName }}
export DOTNET_TEST_IMAGE_NAME=${{ variables.dotnetTestAppImageName }}
export GO_TEST_IMAGE_NAME=${{ variables.goTestAppImageName }}

export JAVA_TEST_APP_NAME="${{ variables.javaTestAppName }}"
export NODEJS_TEST_APP_NAME="${{ variables.nodeTestAppName }}"
export PYTHON_TEST_APP_NAME="${{ variables.pythonTestAppName }}"
export DOTNET_TEST_APP_NAME="${{ variables.dotnetTestAppName }}"
export GO_TEST_APP_NAME="${{ variables.goTestAppName }}"
export TEST_APP_SOURCE_NAME="nodejs-source-app"
export NODEJS_CALLER_APP_NAME="nodejs-caller-app"

Expand All @@ -377,13 +383,13 @@ jobs:

sudo chmod u+x ./validate-mutation.sh

if ! ./validate-mutation.sh ${{ variables.javaTestAppName }} ${{ variables.nodeTestAppName }} ${{ variables.pythonTestAppName }} ${{ variables.dotnetTestAppName }} ${{ variables.testNamespace }}; then
if ! ./validate-mutation.sh ${{ variables.javaTestAppName }} ${{ variables.nodeTestAppName }} ${{ variables.pythonTestAppName }} ${{ variables.dotnetTestAppName }} ${{ variables.goTestAppName }} ${{ variables.testNamespace }}; then
echo "Mutation validation failed"
exit 1
fi

- task: AzureCLI@2
displayName: "Check test apps are sending telemetry to AI"
displayName: "Check test apps are sending AI and OTEL telemetry"
inputs:
azureSubscription: ${{ variables.armServiceConnectionName }}
scriptType: bash
Expand All @@ -397,17 +403,29 @@ jobs:
export NODEJS_TEST_APP_NAME="${{ variables.nodeTestAppName }}"
export PYTHON_TEST_APP_NAME="${{ variables.pythonTestAppName }}"
export DOTNET_TEST_APP_NAME="${{ variables.dotnetTestAppName }}"
export GO_TEST_APP_NAME="${{ variables.goTestAppName }}"

echo "Wait 30s for telemetry to flow..."
sleep 30

sudo chmod u+x ./validate_ai.sh

if ! ./validate_ai.sh ${{ variables.aiResourceId }} ${{ variables.testNamespace }}; then
echo "Validating AI telemetry..."
if ! ./validate_ai.sh ${{ variables.lawResourceId }} ${{ variables.testNamespace }} "java,nodejs,python,dotnet" "AppRoleInstance" "AppRequests" "AppDependencies" "AppMetrics" "AppExceptions"; then
echo "AI telemetry validation failed"
exit 1
fi

echo "Validating OTEL telemetry..."
if ! ./validate_ai.sh ${{ variables.lawResourceId }} ${{ variables.testNamespace }} "go" "ServiceInstanceId" "OTelSpans" "OTelResources" "OTelLogs"; then
echo "OTEL telemetry validation failed"
exit 1
fi

echo "Validating AMW metrics..."
sudo chmod u+x ./validate_amw.sh
if ! ./validate_amw.sh ${{ variables.amwQueryEndpoint }} ${{ variables.testNamespace }} "java,nodejs,python,dotnet,go"; then
echo "AMW metrics validation failed"
exit 1
fi

- task: AzureCLI@2
displayName: "Validate Housekeeper Cron Job"
inputs:
Expand Down
19 changes: 18 additions & 1 deletion appmonitoring/scripts/install-test-apps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ require_env JAVA_TEST_APP_NAME
require_env NODEJS_TEST_APP_NAME
require_env PYTHON_TEST_APP_NAME
require_env DOTNET_TEST_APP_NAME
require_env GO_TEST_APP_NAME
require_env NODEJS_CALLER_APP_NAME
require_env JAVA_TEST_IMAGE_NAME
require_env NODEJS_TEST_IMAGE_NAME
require_env PYTHON_TEST_IMAGE_NAME
require_env DOTNET_TEST_IMAGE_NAME
require_env GO_TEST_IMAGE_NAME

if ! command -v envsubst >/dev/null 2>&1; then
echo "Error: envsubst command not found"
Expand All @@ -36,12 +38,14 @@ JAVA_RELEASE_NAME=${JAVA_TEST_APP_NAME}
NODEJS_RELEASE_NAME=${NODEJS_TEST_APP_NAME}
PYTHON_RELEASE_NAME=${PYTHON_TEST_APP_NAME}
DOTNET_RELEASE_NAME=${DOTNET_TEST_APP_NAME}
GO_RELEASE_NAME=${GO_TEST_APP_NAME}
CALLER_RELEASE_NAME=${NODEJS_CALLER_APP_NAME}

JAVA_SERVICE_HOST="${JAVA_RELEASE_NAME}-service.${TEST_NS}.svc.cluster.local"
NODEJS_SERVICE_HOST="${NODEJS_RELEASE_NAME}-service.${TEST_NS}.svc.cluster.local"
PYTHON_SERVICE_HOST="${PYTHON_RELEASE_NAME}-service.${TEST_NS}.svc.cluster.local"
DOTNET_SERVICE_HOST="${DOTNET_RELEASE_NAME}-service.${TEST_NS}.svc.cluster.local"
GO_SERVICE_HOST="${GO_RELEASE_NAME}-service.${TEST_NS}.svc.cluster.local"
SOURCE_SERVICE_URL="http://${SOURCE_RELEASE_NAME}-service.${TEST_NS}.svc.cluster.local:3001"

# Delete existing test apps if they exist - TEMPORARY - WILL BE REMOVED LATER
Expand All @@ -50,6 +54,7 @@ cat ../validation-helm/test-apps/java/chart.yaml | envsubst | kubectl delete -f
cat ../validation-helm/test-apps/nodejs/chart.yaml | envsubst | kubectl delete -f - --ignore-not-found
cat ../validation-helm/test-apps/python/chart.yaml | envsubst | kubectl delete -f - --ignore-not-found
cat ../validation-helm/test-apps/dotnet/chart.yaml | envsubst | kubectl delete -f - --ignore-not-found
cat ../validation-helm/test-apps/go-instrumented/chart.yaml | envsubst | kubectl delete -f - --ignore-not-found
cat ../validation-helm/test-apps/testappcaller/chart.yaml | envsubst | kubectl delete -f - --ignore-not-found


Expand All @@ -59,6 +64,7 @@ helm uninstall -n ${TEST_NS} "${JAVA_RELEASE_NAME}" --ignore-not-found 2>/dev/nu
helm uninstall -n ${TEST_NS} "${NODEJS_RELEASE_NAME}" --ignore-not-found 2>/dev/null || true
helm uninstall -n ${TEST_NS} "${PYTHON_RELEASE_NAME}" --ignore-not-found 2>/dev/null || true
helm uninstall -n ${TEST_NS} "${DOTNET_RELEASE_NAME}" --ignore-not-found 2>/dev/null || true
helm uninstall -n ${TEST_NS} "${GO_RELEASE_NAME}" --ignore-not-found 2>/dev/null || true
helm uninstall -n ${TEST_NS} "${CALLER_RELEASE_NAME}" --ignore-not-found 2>/dev/null || true


Expand Down Expand Up @@ -124,14 +130,25 @@ if ! helm install "${DOTNET_RELEASE_NAME}" oci://${ACR_NAME}/helm/testapps/dotne
exit 1
fi

# this is the instrumented go app
echo "Installing ${GO_RELEASE_NAME}..."
if ! helm install "${GO_RELEASE_NAME}" oci://${ACR_NAME}/helm/testapps/go-instrumented-test-app --version "${CHART_VERSION}" -n "${TEST_NS}" \
--set-string appName="${GO_RELEASE_NAME}" \
--set-string image="${GO_TEST_IMAGE_NAME}" \
--set-string targetUrl="${SOURCE_SERVICE_URL}"; then
echo "Error: ${GO_RELEASE_NAME} installation failed"
exit 1
fi

# this is the app that will periodically call the instrumented apps to generate request telemetry
echo "Installing ${CALLER_RELEASE_NAME}..."
if ! helm install "${CALLER_RELEASE_NAME}" oci://${ACR_NAME}/helm/testapps/testappcaller --version "${CHART_VERSION}" -n "${TEST_NS}" \
--set-string appName="${CALLER_RELEASE_NAME}" \
--set-string javaHost="${JAVA_SERVICE_HOST}" \
--set-string nodejsHost="${NODEJS_SERVICE_HOST}" \
--set-string pythonHost="${PYTHON_SERVICE_HOST}" \
--set-string dotnetHost="${DOTNET_SERVICE_HOST}"; then
--set-string dotnetHost="${DOTNET_SERVICE_HOST}" \
--set-string goHost="${GO_SERVICE_HOST}"; then
echo "Error: ${CALLER_RELEASE_NAME} installation failed"
exit 1
fi
Expand Down
13 changes: 12 additions & 1 deletion appmonitoring/scripts/validate-mutation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ DEPLOYMENT_JAVA_NAME=$1
DEPLOYMENT_NODEJS_NAME=$2
DEPLOYMENT_PYTHON_NAME=$3
DEPLOYMENT_DOTNET_NAME=$4
NAMESPACE=$5
DEPLOYMENT_GO_NAME=$5
NAMESPACE=$6

# Define the property to check for
PROPERTY="APPLICATIONINSIGHTS_CONNECTION_STRING"
Expand All @@ -14,6 +15,7 @@ JAVA_DEPLOYMENT_NAME=$(kubectl get deployment -n "$NAMESPACE" -o custom-columns=
NODEJS_DEPLOYMENT_NAME=$(kubectl get deployment -n "$NAMESPACE" -o custom-columns=NAME:.metadata.name | grep "$DEPLOYMENT_NODEJS_NAME")
PYTHON_DEPLOYMENT_NAME=$(kubectl get deployment -n "$NAMESPACE" -o custom-columns=NAME:.metadata.name | grep "$DEPLOYMENT_PYTHON_NAME")
DOTNET_DEPLOYMENT_NAME=$(kubectl get deployment -n "$NAMESPACE" -o custom-columns=NAME:.metadata.name | grep "$DEPLOYMENT_DOTNET_NAME")
GO_DEPLOYMENT_NAME=$(kubectl get deployment -n "$NAMESPACE" -o custom-columns=NAME:.metadata.name | grep "$DEPLOYMENT_GO_NAME")

EXPECTED_ENV_VARS=(
"NODE_NAME"
Expand Down Expand Up @@ -41,6 +43,8 @@ DOTNET_ENV_VARS=(
"OTEL_DOTNET_AUTO_PLUGINS"
"OTEL_DOTNET_AUTO_LOGS_ENABLED"
)
GO_ENV_VARS=(
)

EXPECTED_INIT_CONTAINERS=(
"azure-monitor-auto-instrumentation-java"
Expand All @@ -52,6 +56,8 @@ PYTHON_EXPECTED_INIT_CONTAINERS=(
DOTNET_EXPECTED_INIT_CONTAINERS=(
"azure-monitor-auto-instrumentation-dotnet"
)
GO_EXPECTED_INIT_CONTAINERS=(
)

checkMutation() {
local deploymentName="$1"
Expand Down Expand Up @@ -126,3 +132,8 @@ if ! checkMutation "$DEPLOYMENT_DOTNET_NAME" DOTNET_ENV_VARS[@] DOTNET_EXPECTED_
echo "FATAL ERROR: checkMutation failed for $DEPLOYMENT_DOTNET_NAME"
exit 1
fi

if ! checkMutation "$DEPLOYMENT_GO_NAME" GO_ENV_VARS[@] GO_EXPECTED_INIT_CONTAINERS[@]; then
echo "FATAL ERROR: checkMutation failed for $DEPLOYMENT_GO_NAME"
exit 1
fi
60 changes: 45 additions & 15 deletions appmonitoring/scripts/validate_ai.sh
Original file line number Diff line number Diff line change
@@ -1,34 +1,50 @@
#!/bin/bash

AI_RES_ID=$1
WS_RES_ID=$1
NAMESPACE=$2
APPS_TO_VALIDATE=$3 # Comma-separated list of apps (e.g., "java,nodejs,python,dotnet" or "go")
ROLE_INSTANCE_FIELD=$4
shift 4 # Remove first 4 arguments
QUERIES=("$@") # Remaining arguments are the queries

# Validate that apps parameter is provided
if [[ -z "$APPS_TO_VALIDATE" ]]; then
echo "Error: APPS_TO_VALIDATE parameter is required (3rd argument)" >&2
echo "Usage: $0 <WS_RES_ID> <NAMESPACE> <APPS_TO_VALIDATE> <ROLE_INSTANCE_FIELD> <QUERIES...>" >&2
exit 1
fi

echo "Finding pods in namespace: $NAMESPACE for Java App $JAVA_TEST_APP_NAME, NodeJS App $NODEJS_TEST_APP_NAME, Python App $PYTHON_TEST_APP_NAME, and Dotnet App $DOTNET_TEST_APP_NAME"
echo "Finding pods in namespace: $NAMESPACE for Java App $JAVA_TEST_APP_NAME, NodeJS App $NODEJS_TEST_APP_NAME, Python App $PYTHON_TEST_APP_NAME, Dotnet App $DOTNET_TEST_APP_NAME, and Go App $GO_TEST_APP_NAME"
POD_JAVA_NAME=$(kubectl get pods -n "$NAMESPACE" -l app=$JAVA_TEST_APP_NAME --no-headers -o custom-columns=":metadata.name" | head -n 1)
POD_NODEJS_NAME=$(kubectl get pods -n "$NAMESPACE" -l app=$NODEJS_TEST_APP_NAME --no-headers -o custom-columns=":metadata.name" | head -n 1)
POD_PYTHON_NAME=$(kubectl get pods -n "$NAMESPACE" -l app=$PYTHON_TEST_APP_NAME --no-headers -o custom-columns=":metadata.name" | head -n 1)
POD_DOTNET_NAME=$(kubectl get pods -n "$NAMESPACE" -l app=$DOTNET_TEST_APP_NAME --no-headers -o custom-columns=":metadata.name" | head -n 1)
POD_GO_NAME=$(kubectl get pods -n "$NAMESPACE" -l app=$GO_TEST_APP_NAME --no-headers -o custom-columns=":metadata.name" | head -n 1)


# Get an access token
result_rsp=$(curl 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://api.applicationinsights.io&mi_res_id=/subscriptions/66010356-d8a5-42d3-8593-6aaa3aeb1c11/resourceGroups/rambhatt-rnd-v2/providers/Microsoft.ManagedIdentity/userAssignedIdentities/rambhatt-agentpool-es-identity' -H Metadata:true -s)
# echo "Result: $result_rsp"
access_token=$(echo $result_rsp | jq -r '.access_token')
client_id=$(echo $result_rsp | jq -r '.client_id')

echo "$AI_RES_ID"
echo "Using identity with client_id: $client_id"
echo "Workspace: $WS_RES_ID"
echo "Role instance field: $ROLE_INSTANCE_FIELD"

# Define your variables
url="https://api.loganalytics.io/v1$AI_RES_ID/query"
url="https://api.loganalytics.io/v1$WS_RES_ID/query"

verify_AI_telemetry() {
local pod_name="$1"
local app_type="$2"
local skip_exceptions="$3"
local queries=("requests" "dependencies")
local tables=("${QUERIES[@]}")
local found_any=0

if [[ "$skip_exceptions" != "true" ]]; then
queries+=("exceptions")
# Remove AppExceptions from tables if skip_exceptions is true
if [[ "$skip_exceptions" == "true" ]]; then
tables=("${tables[@]/AppExceptions/}")
fi

echo "Validating telemetry for $pod_name ($app_type)..."
Expand All @@ -37,9 +53,14 @@ verify_AI_telemetry() {
exit 1
fi

for table in "${queries[@]}"; do
for table in "${tables[@]}"; do
# Skip empty entries (from removed AppExceptions)
[[ -z "$table" ]] && continue

query="$table | where TimeGenerated > ago(15m) | where $ROLE_INSTANCE_FIELD == '$pod_name' | count"

json_body="{
\"query\": \"$table | where timestamp > ago(15m) | where cloud_RoleInstance == '$pod_name' | count\",
\"query\": \"$query\",
\"options\": {
\"truncationMaxSize\": 67108864
},
Expand All @@ -50,28 +71,35 @@ verify_AI_telemetry() {
}"

echo "Validating $table telemetry for $pod_name ($app_type)..."
response=$(curl -s -X POST $url \
response=$(curl -s -w "\n%{http_code}" -X POST $url \
-H "Authorization: Bearer $access_token" \
-H "Content-Type: application/json" \
-d "$json_body")

count_val=$(echo $response | jq '.tables[0].rows[0][0]')
http_code=$(echo "$response" | tail -n 1)
response_body=$(echo "$response" | sed '$d')

count_val=$(echo $response_body | jq '.tables[0].rows[0][0]')

if (( count_val > 0 )); then
echo "$table telemetry found: $count_val"

found_any=1
else
echo "No $table telemetry found for $pod_name ($app_type)" >&2
echo "No $table telemetry found for $pod_name ($app_type) [HTTP $http_code]" >&2
echo "Validation for $app_type pods failed: No $table telemetry found" >&2
return 1
fi
done
}

max_retries=10
retry_interval=30
max_retries=30
retry_interval=10

# Convert comma-separated list to array
IFS=',' read -ra APPS_ARRAY <<< "$APPS_TO_VALIDATE"

for app in "java" "nodejs" "python" "dotnet"; do
for app in "${APPS_ARRAY[@]}"; do
skip_exceptions="false"
if [ "$app" = "java" ]; then
pod_name="$POD_JAVA_NAME"
Expand All @@ -82,6 +110,8 @@ for app in "java" "nodejs" "python" "dotnet"; do
elif [ "$app" = "dotnet" ]; then
pod_name="$POD_DOTNET_NAME"
skip_exceptions="true"
elif [ "$app" = "go" ]; then
pod_name="$POD_GO_NAME"
else
echo "Unsupported application type: $app"
exit 1
Expand Down
Loading