From 25ea3455262889ea0a338d9b95ab2d51cd200d5f Mon Sep 17 00:00:00 2001 From: "Behzad.Mirkhanzadeh" Date: Mon, 14 Jul 2025 09:13:09 -0700 Subject: [PATCH 1/5] fix: Fixing CNI Telemtry Service run by CNS --- cns/service/main.go | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index df1fc67453..4b36e0a8fb 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -94,6 +94,7 @@ const ( // Service name. name = "azure-cns" pluginName = "azure-vnet" + aiPluginName = "AzureCNI" endpointStoreName = "azure-endpoints" endpointStoreLocationLinux = "/var/run/azure-cns/" endpointStoreLocationWindows = "/k/azurecns/" @@ -470,11 +471,36 @@ func sendRegisterNodeRequest(ctx context.Context, httpClient httpDoer, httpRestS return nil } -func startTelemetryService(ctx context.Context) { - var config aitelemetry.AIConfig +func startTelemetryService(ctx context.Context, cnsconfig *configuration.CNSConfig) { + // Use the same telemetry settings as the main CNS service + ts := cnsconfig.TelemetrySettings + + // Check if telemetry is disabled + if ts.DisableAll { + logger.Printf("Telemetry is disabled, skipping CNI telemetry service") + return + } + + aiConfig := aitelemetry.AIConfig{ + AppName: aiPluginName, + AppVersion: version, + BatchSize: ts.TelemetryBatchSizeBytes, + BatchInterval: ts.TelemetryBatchIntervalInSecs, + RefreshTimeout: ts.RefreshIntervalInSecs, + DisableMetadataRefreshThread: ts.DisableMetadataRefreshThread, + DebugMode: ts.DebugMode, + } tb := telemetry.NewTelemetryBuffer(nil) - err := tb.CreateAITelemetryHandle(config, false, false, false) + + var err error + if aiKey := cnsconfig.TelemetrySettings.AppInsightsInstrumentationKey; aiKey != "" { + err = tb.CreateAITelemetryHandle(aiConfig, ts.DisableTrace, ts.DisableMetric, ts.DisableEvent) + } else { + logger.Printf("No Application Insights key provided for CNI telemetry service") + return + } + if err != nil { logger.Errorf("AI telemetry handle creation failed: %v", err) return @@ -705,7 +731,7 @@ func main() { if telemetryDaemonEnabled { logger.Printf("CNI Telemetry is enabled") - go startTelemetryService(rootCtx) + go startTelemetryService(rootCtx, cnsconfig) } // Log platform information. From af43d45935a408dd2d907dbad3c9ee6451faf227 Mon Sep 17 00:00:00 2001 From: "Behzad.Mirkhanzadeh" Date: Mon, 21 Jul 2025 22:09:35 -0700 Subject: [PATCH 2/5] feat: Add CNI-telemtry sidecar in CNS pod --- .pipelines/pipeline.yaml | 12 ++ Makefile | 52 ++++++ cns/cni-telemetry-sidecar/Dockerfile | 59 +++++++ cns/cni-telemetry-sidecar/configmanager.go | 107 ++++++++++++ cns/cni-telemetry-sidecar/main.go | 51 ++++++ cns/cni-telemetry-sidecar/sidecar.go | 180 +++++++++++++++++++++ cns/service/main.go | 34 +--- 7 files changed, 465 insertions(+), 30 deletions(-) create mode 100644 cns/cni-telemetry-sidecar/Dockerfile create mode 100644 cns/cni-telemetry-sidecar/configmanager.go create mode 100644 cns/cni-telemetry-sidecar/main.go create mode 100644 cns/cni-telemetry-sidecar/sidecar.go diff --git a/.pipelines/pipeline.yaml b/.pipelines/pipeline.yaml index 8f03b1c3a4..536bd6cf3f 100644 --- a/.pipelines/pipeline.yaml +++ b/.pipelines/pipeline.yaml @@ -158,6 +158,14 @@ stages: arch: amd64 name: npm os: windows + cni_telemetry_sidecar_linux_amd64: + arch: amd64 + name: cni-telemetry-sidecar + os: linux + cni_telemetry_sidecar_windows_amd64: + arch: amd64 + name: cni-telemetry-sidecar + os: windows steps: - template: containers/container-template.yaml parameters: @@ -199,6 +207,10 @@ stages: arch: arm64 name: npm os: linux + cni_telemetry_sidecar_linux_arm64: + arch: arm64 + name: cni-telemetry-sidecar + os: linux steps: - template: containers/container-template.yaml parameters: diff --git a/Makefile b/Makefile index 0bcf4bde88..884e8a8c4a 100644 --- a/Makefile +++ b/Makefile @@ -88,6 +88,10 @@ ACN_PACKAGE_PATH = github.com/Azure/azure-container-networking CNI_AI_PATH=$(ACN_PACKAGE_PATH)/telemetry.aiMetadata CNS_AI_PATH=$(ACN_PACKAGE_PATH)/cns/logger.aiMetadata NPM_AI_PATH=$(ACN_PACKAGE_PATH)/npm.aiMetadata +CNI_TELEMETRY_SIDECAR_DIR = $(REPO_ROOT)/cns/cni-telemetry-sidecar +CNI_TELEMETRY_SIDECAR_BUILD_DIR = $(BUILD_DIR)/cni-telemetry-sidecar +CNI_TELEMETRY_SIDECAR_AI_ID = $(CNI_AI_ID) # Reuse CNI AI ID +CNI_TELEMETRY_SIDECAR_VERSION = $(CNS_VERSION) # Version follows CNS # Tool paths CONTROLLER_GEN := $(TOOLS_BIN_DIR)/controller-gen @@ -235,6 +239,10 @@ azure-vnet-ipamv6-binary: azure-vnet-telemetry-binary: cd $(CNI_TELEMETRY_DIR) && CGO_ENABLED=0 go build -v -o $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) -ldflags "-X main.version=$(CNI_VERSION) -X $(CNI_AI_PATH)=$(CNI_AI_ID) $(LD_BUILD_FLAGS)" -gcflags="-dwarflocationlists=true" +# Build the Azure CNI Telemetry Sidecar binary. +cni-telemetry-sidecar-binary: + cd $(CNI_TELEMETRY_SIDECAR_DIR) && CGO_ENABLED=0 go build -v -o $(CNI_TELEMETRY_SIDECAR_BUILD_DIR)/azure-cni-telemetry-sidecar$(EXE_EXT) -ldflags "-X main.version=$(CNI_TELEMETRY_SIDECAR_VERSION) -X $(CNI_AI_PATH)=$(CNI_TELEMETRY_SIDECAR_AI_ID)" -gcflags="-dwarflocationlists=true" + # Build the Azure CLI network binary. acncli-binary: cd $(ACNCLI_DIR) && CGO_ENABLED=0 go build -v -o $(ACNCLI_BUILD_DIR)/acn$(EXE_EXT) -ldflags "-X main.version=$(ACN_VERSION) $(LD_BUILD_FLAGS)" -gcflags="-dwarflocationlists=true" @@ -596,6 +604,35 @@ npm-image-pull: ## pull cns container image. IMAGE=$(NPM_IMAGE) \ TAG=$(NPM_PLATFORM_TAG) +# cni-telemetry-sidecar + +cni-telemetry-sidecar-image-name: # util target to print the CNI telemetry sidecar image name + @echo $(CNI_TELEMETRY_SIDECAR_IMAGE) + +cni-telemetry-sidecar-image-name-and-tag: # util target to print the CNI telemetry sidecar image name and tag. + @echo $(IMAGE_REGISTRY)/$(CNI_TELEMETRY_SIDECAR_IMAGE):$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) + +cni-telemetry-sidecar-image: ## build cni-telemetry-sidecar container image. + $(MAKE) container \ + DOCKERFILE=cns/cni-telemetry-sidecar/Dockerfile \ + IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ + EXTRA_BUILD_ARGS='--build-arg CNI_AI_PATH=$(CNI_AI_PATH) --build-arg CNI_AI_ID=$(CNI_TELEMETRY_SIDECAR_AI_ID)' \ + PLATFORM=$(PLATFORM) \ + TAG=$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) \ + TARGET=$(OS) \ + OS=$(OS) \ + ARCH=$(ARCH) + +cni-telemetry-sidecar-image-push: ## push cni-telemetry-sidecar container image. + $(MAKE) container-push \ + IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ + TAG=$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) + +cni-telemetry-sidecar-image-pull: ## pull cni-telemetry-sidecar container image. + $(MAKE) container-pull \ + IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ + TAG=$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) + ## Reusable targets for building multiplat container image manifests. IMAGE_ARCHIVE_DIR ?= $(shell pwd) @@ -751,6 +788,21 @@ npm-skopeo-archive: ## export tar archive of multiplat container manifest. IMAGE=$(NPM_IMAGE) \ TAG=$(NPM_VERSION) +cni-telemetry-sidecar-manifest-build: ## build cni-telemetry-sidecar multiplat container manifest. + $(MAKE) manifest-build \ + PLATFORMS="$(PLATFORMS)" \ + IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ + TAG=$(CNI_TELEMETRY_SIDECAR_VERSION) + +cni-telemetry-sidecar-manifest-push: ## push cni-telemetry-sidecar multiplat container manifest + $(MAKE) manifest-push \ + IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ + TAG=$(CNI_TELEMETRY_SIDECAR_VERSION) + +cni-telemetry-sidecar-skopeo-archive: ## export tar archive of cni-telemetry-sidecar multiplat container manifest. + $(MAKE) manifest-skopeo-archive \ + IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ + TAG=$(CNI_TELEMETRY_SIDECAR_VERSION) ########################### Archives ################################ diff --git a/cns/cni-telemetry-sidecar/Dockerfile b/cns/cni-telemetry-sidecar/Dockerfile new file mode 100644 index 0000000000..4c2fef0040 --- /dev/null +++ b/cns/cni-telemetry-sidecar/Dockerfile @@ -0,0 +1,59 @@ +# Azure CNI Telemetry Sidecar Dockerfile +# Based on proven CNI Dockerfile pattern from azure-container-networking +ARG ARCH +ARG OS_VERSION +ARG OS + +# Use the same proven base images as CNI +# mcr.microsoft.com/oss/go/microsoft/golang:1.23-azurelinux3.0 +FROM --platform=linux/${ARCH} mcr.microsoft.com/oss/go/microsoft/golang@sha256:8f60e85f4b2f567c888d0b3a4cd12dc74bee534d94c528655546452912d90c74 AS go + +# mcr.microsoft.com/azurelinux/base/core:3.0 +FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/base/core@sha256:9948138108a3d69f1dae62104599ac03132225c3b7a5ac57b85a214629c8567d AS mariner-core + +FROM go AS azure-cni-telemetry-sidecar +ARG OS +ARG VERSION +ARG CNI_AI_PATH +ARG CNI_AI_ID + +WORKDIR /azure-container-networking +COPY . . + +# Debug: Check if the source file exists and show Go version +RUN ls -la cns/cni-telemetry-sidecar/ && go version + +# Build the Azure CNI telemetry sidecar binary for Linux +RUN CGO_ENABLED=0 GOOS=linux go build \ + -a \ + -o /go/bin/azure-cni-telemetry-sidecar \ + -trimpath \ + -ldflags "-X main.version=${VERSION:-unknown} -X ${CNI_AI_PATH:-main.aiMetadata}=${CNI_AI_ID:-unknown}" \ + -gcflags="-dwarflocationlists=true" \ + ./cns/cni-telemetry-sidecar/ + +# Build the Azure CNI telemetry sidecar binary for Windows +RUN CGO_ENABLED=0 GOOS=windows go build \ + -a \ + -o /go/bin/azure-cni-telemetry-sidecar.exe \ + -trimpath \ + -ldflags "-X main.version=${VERSION:-unknown} -X ${CNI_AI_PATH:-main.aiMetadata}=${CNI_AI_ID:-unknown}" \ + -gcflags="-dwarflocationlists=true" \ + ./cns/cni-telemetry-sidecar/ + +# Verify both binaries were built +RUN ls -la /go/bin/azure-cni-telemetry-sidecar* + +FROM scratch AS bins +COPY --from=azure-cni-telemetry-sidecar /go/bin/* / + +FROM scratch AS linux +COPY --from=azure-cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar /azure-cni-telemetry-sidecar +ENTRYPOINT [ "/azure-cni-telemetry-sidecar" ] + +# Windows support following CNI pattern +FROM --platform=windows/${ARCH} mcr.microsoft.com/oss/kubernetes/windows-host-process-containers-base-image@sha256:b4c9637e032f667c52d1eccfa31ad8c63f1b035e8639f3f48a510536bf34032b AS hpc + +FROM hpc AS windows +COPY --from=azure-cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar.exe /azure-cni-telemetry-sidecar.exe +ENTRYPOINT [ "/azure-cni-telemetry-sidecar.exe" ] \ No newline at end of file diff --git a/cns/cni-telemetry-sidecar/configmanager.go b/cns/cni-telemetry-sidecar/configmanager.go new file mode 100644 index 0000000000..12a3855fff --- /dev/null +++ b/cns/cni-telemetry-sidecar/configmanager.go @@ -0,0 +1,107 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "time" + + "github.com/Azure/azure-container-networking/cns/configuration" + "github.com/Azure/azure-container-networking/cns/logger" +) + +// ConfigManager handles Azure CNS configuration loading and validation +type ConfigManager struct { + configPath string +} + +// NewConfigManager creates a new configuration manager for Azure CNS +func NewConfigManager(configPath string) *ConfigManager { + return &ConfigManager{ + configPath: configPath, + } +} + +// LoadConfig loads and validates the Azure CNS configuration +func (cm *ConfigManager) LoadConfig() (*configuration.CNSConfig, error) { + logger.Printf("Loading Azure CNS configuration from: %s", cm.configPath) + + // Wait for configuration file to become available (Kubernetes ConfigMap mount) + if err := cm.waitForConfigFile(); err != nil { + return nil, fmt.Errorf("Azure CNS configuration file not available: %w", err) + } + + // Read the configuration file from mounted volume + data, err := os.ReadFile(cm.configPath) + if err != nil { + return nil, fmt.Errorf("failed to read Azure CNS configuration file: %w", err) + } + + // Parse JSON configuration into CNS config structure + var config configuration.CNSConfig + if err := json.Unmarshal(data, &config); err != nil { + return nil, fmt.Errorf("failed to parse Azure CNS configuration: %w", err) + } + + // Validate configuration for Azure telemetry requirements + if err := cm.validateConfig(&config); err != nil { + return nil, fmt.Errorf("Azure CNS configuration validation failed: %w", err) + } + + logger.Printf("Azure CNS configuration loaded and validated successfully") + return &config, nil +} + +// waitForConfigFile waits for the configuration file to become available +// This is important in Kubernetes environments where ConfigMaps are mounted asynchronously +func (cm *ConfigManager) waitForConfigFile() error { + const maxRetries = 30 + const retryInterval = 2 * time.Second + + for i := 0; i < maxRetries; i++ { + if _, err := os.Stat(cm.configPath); err == nil { + return nil + } + + if i == 0 { + logger.Printf("Waiting for Azure CNS configuration file to become available...") + } + + time.Sleep(retryInterval) + } + + return fmt.Errorf("Azure CNS configuration file not available after %d attempts (%v total wait time)", + maxRetries, time.Duration(maxRetries)*retryInterval) +} + +// validateConfig performs Azure-specific validation of the CNS configuration +func (cm *ConfigManager) validateConfig(config *configuration.CNSConfig) error { + // Validate that telemetry settings are properly configured + if config.TelemetrySettings.AppInsightsInstrumentationKey == "" && !config.TelemetrySettings.DisableAll { + logger.Printf("Warning: No Application Insights instrumentation key configured and telemetry not disabled") + } + + // Validate batch size settings for Azure Application Insights + if config.TelemetrySettings.TelemetryBatchSizeBytes <= 0 { + logger.Printf("Warning: Invalid telemetry batch size, using default") + } + + // Validate batch interval for optimal Azure ingestion + if config.TelemetrySettings.TelemetryBatchIntervalInSecs <= 0 { + logger.Printf("Warning: Invalid telemetry batch interval, using default") + } + + // Log configuration summary for Azure monitoring and debugging + logger.Printf("Azure CNS Configuration Summary:") + logger.Printf(" - Telemetry DisableAll: %t", config.TelemetrySettings.DisableAll) + logger.Printf(" - Application Insights Key Present: %t", + config.TelemetrySettings.AppInsightsInstrumentationKey != "") + logger.Printf(" - Batch Size: %d bytes", config.TelemetrySettings.TelemetryBatchSizeBytes) + logger.Printf(" - Batch Interval: %d seconds", config.TelemetrySettings.TelemetryBatchIntervalInSecs) + logger.Printf(" - Disable Trace: %t", config.TelemetrySettings.DisableTrace) + logger.Printf(" - Disable Metric: %t", config.TelemetrySettings.DisableMetric) + logger.Printf(" - Disable Event: %t", config.TelemetrySettings.DisableEvent) + logger.Printf(" - Debug Mode: %t", config.TelemetrySettings.DebugMode) + + return nil +} diff --git a/cns/cni-telemetry-sidecar/main.go b/cns/cni-telemetry-sidecar/main.go new file mode 100644 index 0000000000..3e5bcb2a30 --- /dev/null +++ b/cns/cni-telemetry-sidecar/main.go @@ -0,0 +1,51 @@ +package main + +import ( + "context" + "flag" + "os" + "os/signal" + "syscall" + + "github.com/Azure/azure-container-networking/cns/logger" +) + +var ( + version = "unknown" + configPath = flag.String("config", "/etc/cns/cns-config.json", "Path to CNS configuration file") +) + +func main() { + flag.Parse() + + // Initialize logging for the CNI telemetry sidecar + logger.InitLogger("azure-cns-cni-telemetry-sidecar", 1, 1, "/var/log/azure-cns-telemetry") + defer logger.Close() + + logger.Printf("Starting Azure CNI Telemetry Sidecar v%s", version) + + // Create telemetry sidecar service + sidecar := NewTelemetrySidecar(*configPath) + + // Setup graceful shutdown context + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Handle OS signals for graceful shutdown + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) + + go func() { + sig := <-sigCh + logger.Printf("Received shutdown signal %v, initiating graceful shutdown", sig) + cancel() + }() + + // Run the telemetry sidecar + if err := sidecar.Run(ctx); err != nil { + logger.Errorf("Azure CNI Telemetry Sidecar failed: %v", err) + os.Exit(1) + } + + logger.Printf("Azure CNI Telemetry Sidecar stopped gracefully") +} diff --git a/cns/cni-telemetry-sidecar/sidecar.go b/cns/cni-telemetry-sidecar/sidecar.go new file mode 100644 index 0000000000..4661ac5e0b --- /dev/null +++ b/cns/cni-telemetry-sidecar/sidecar.go @@ -0,0 +1,180 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/Azure/azure-container-networking/aitelemetry" + "github.com/Azure/azure-container-networking/cns/configuration" + "github.com/Azure/azure-container-networking/cns/logger" + "github.com/Azure/azure-container-networking/telemetry" +) + +const ( + // CNI telemetry constants aligned with azure-vnet-telemetry + cniTelemetryAppName = "azure-vnet-telemetry" + cniTelemetryVersion = "1.0.0" + telemetrySocket = "/var/run/azure-vnet-telemetry.sock" +) + +// TelemetrySidecar manages the lifecycle of the CNI telemetry service +type TelemetrySidecar struct { + configPath string + configManager *ConfigManager + telemetryBuffer *telemetry.TelemetryBuffer +} + +// NewTelemetrySidecar creates a new telemetry sidecar instance +func NewTelemetrySidecar(configPath string) *TelemetrySidecar { + return &TelemetrySidecar{ + configPath: configPath, + configManager: NewConfigManager(configPath), + } +} + +// Run starts the telemetry sidecar and manages its lifecycle +func (s *TelemetrySidecar) Run(ctx context.Context) error { + logger.Printf("Initializing Azure CNI Telemetry Sidecar for azure-vnet-telemetry") + + // Load CNS configuration from shared mount + config, err := s.configManager.LoadConfig() + if err != nil { + return fmt.Errorf("failed to load CNS configuration: %w", err) + } + + // Determine if telemetry should run based on configuration and environment + if !s.shouldRunTelemetry(config) { + logger.Printf("CNI Telemetry disabled, entering sleep mode") + return s.sleepUntilShutdown(ctx) + } + + // Initialize CNI telemetry using existing Azure packages + if err := s.initializeCNITelemetry(config); err != nil { + return fmt.Errorf("failed to initialize CNI telemetry: %w", err) + } + + // Start telemetry service mimicking azure-vnet-telemetry behavior + logger.Printf("Starting Azure VNet Telemetry service (CNI mode)") + return s.runCNITelemetryService(ctx) +} + +// initializeCNITelemetry sets up telemetry specifically for CNI operations +func (s *TelemetrySidecar) initializeCNITelemetry(config *configuration.CNSConfig) error { + ts := config.TelemetrySettings + + // Create AI configuration matching azure-vnet-telemetry behavior + aiConfig := aitelemetry.AIConfig{ + AppName: cniTelemetryAppName, + AppVersion: cniTelemetryVersion, + BatchSize: ts.TelemetryBatchSizeBytes, + BatchInterval: ts.TelemetryBatchIntervalInSecs, + RefreshTimeout: ts.RefreshIntervalInSecs, + DisableMetadataRefreshThread: ts.DisableMetadataRefreshThread, + DebugMode: ts.DebugMode, + } + + // Create telemetry buffer for CNI-specific data collection + s.telemetryBuffer = telemetry.NewTelemetryBuffer(nil) + + // Validate Application Insights instrumentation key + if config.TelemetrySettings.AppInsightsInstrumentationKey == "" { + return fmt.Errorf("Application Insights instrumentation key is required for CNI telemetry") + } + + // Initialize AI telemetry handle with Azure-specific configuration + err := s.telemetryBuffer.CreateAITelemetryHandle( + aiConfig, + ts.DisableTrace, + ts.DisableMetric, + ts.DisableEvent, + ) + if err != nil { + return fmt.Errorf("failed to create AI telemetry handle for CNI: %w", err) + } + + logger.Printf("CNI Telemetry initialized with Application Insights (App: %s, Version: %s, BatchSize: %d)", + cniTelemetryAppName, cniTelemetryVersion, ts.TelemetryBatchSizeBytes) + return nil +} + +// runCNITelemetryService runs the telemetry service mimicking azure-vnet-telemetry +func (s *TelemetrySidecar) runCNITelemetryService(ctx context.Context) error { + // Cleanup any existing CNI telemetry instances + s.cleanupExistingInstances() + + // Start telemetry server on the expected socket for CNI integration + if err := s.telemetryBuffer.StartServer(); err != nil { + return fmt.Errorf("failed to start CNI telemetry server: %w", err) + } + + logger.Printf("Azure VNet Telemetry server started successfully on socket: %s", telemetrySocket) + + // Start telemetry data collection in background (non-blocking) + go s.telemetryBuffer.PushData(ctx) + + // Log readiness for CNI network event collection + logger.Printf("CNI Telemetry sidecar ready to collect Azure network interface events") + + // Wait for context cancellation (graceful shutdown signal) + <-ctx.Done() + + // Perform cleanup and graceful shutdown + logger.Printf("Shutting down Azure CNI Telemetry service") + return s.shutdownTelemetry() +} + +// cleanupExistingInstances cleans up any leftover telemetry instances +func (s *TelemetrySidecar) cleanupExistingInstances() { + // Create temporary buffer for cleanup operations + tempBuffer := telemetry.NewTelemetryBuffer(nil) + + // Use the same FdName that azure-vnet-telemetry uses for consistency + if err := tempBuffer.Cleanup(telemetry.FdName); err != nil { + logger.Printf("Warning: Failed to cleanup existing CNI telemetry instances: %v", err) + } else { + logger.Printf("Successfully cleaned up existing CNI telemetry instances") + } +} + +// shutdownTelemetry handles graceful shutdown of telemetry resources +func (s *TelemetrySidecar) shutdownTelemetry() error { + if s.telemetryBuffer != nil { + // Close telemetry buffer (ensures data is flushed to Azure) + s.telemetryBuffer.Close() + logger.Printf("CNI Telemetry buffer closed and remaining data flushed to Azure") + } + return nil +} + +// shouldRunTelemetry determines if CNI telemetry should be enabled +func (s *TelemetrySidecar) shouldRunTelemetry(config *configuration.CNSConfig) bool { + // Check global telemetry disable flag in CNS configuration + if config.TelemetrySettings.DisableAll { + logger.Printf("CNI Telemetry disabled globally in CNS configuration") + return false + } + + // Check CNI telemetry specific enable flag (replaces old "ts" option) + cniTelemetryEnabled := os.Getenv("CNI_TELEMETRY_ENABLED") + if cniTelemetryEnabled != "true" { + logger.Printf("CNI Telemetry not enabled via CNI_TELEMETRY_ENABLED environment variable") + return false + } + + // Validate Application Insights key availability + if config.TelemetrySettings.AppInsightsInstrumentationKey == "" { + logger.Printf("No Application Insights instrumentation key configured for CNI telemetry") + return false + } + + logger.Printf("CNI Telemetry enabled - will collect Azure network interface events") + return true +} + +// sleepUntilShutdown keeps the container running when telemetry is disabled +func (s *TelemetrySidecar) sleepUntilShutdown(ctx context.Context) error { + logger.Printf("CNI Telemetry sidecar sleeping until shutdown signal received") + <-ctx.Done() + return ctx.Err() +} diff --git a/cns/service/main.go b/cns/service/main.go index 4b36e0a8fb..df1fc67453 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -94,7 +94,6 @@ const ( // Service name. name = "azure-cns" pluginName = "azure-vnet" - aiPluginName = "AzureCNI" endpointStoreName = "azure-endpoints" endpointStoreLocationLinux = "/var/run/azure-cns/" endpointStoreLocationWindows = "/k/azurecns/" @@ -471,36 +470,11 @@ func sendRegisterNodeRequest(ctx context.Context, httpClient httpDoer, httpRestS return nil } -func startTelemetryService(ctx context.Context, cnsconfig *configuration.CNSConfig) { - // Use the same telemetry settings as the main CNS service - ts := cnsconfig.TelemetrySettings - - // Check if telemetry is disabled - if ts.DisableAll { - logger.Printf("Telemetry is disabled, skipping CNI telemetry service") - return - } - - aiConfig := aitelemetry.AIConfig{ - AppName: aiPluginName, - AppVersion: version, - BatchSize: ts.TelemetryBatchSizeBytes, - BatchInterval: ts.TelemetryBatchIntervalInSecs, - RefreshTimeout: ts.RefreshIntervalInSecs, - DisableMetadataRefreshThread: ts.DisableMetadataRefreshThread, - DebugMode: ts.DebugMode, - } +func startTelemetryService(ctx context.Context) { + var config aitelemetry.AIConfig tb := telemetry.NewTelemetryBuffer(nil) - - var err error - if aiKey := cnsconfig.TelemetrySettings.AppInsightsInstrumentationKey; aiKey != "" { - err = tb.CreateAITelemetryHandle(aiConfig, ts.DisableTrace, ts.DisableMetric, ts.DisableEvent) - } else { - logger.Printf("No Application Insights key provided for CNI telemetry service") - return - } - + err := tb.CreateAITelemetryHandle(config, false, false, false) if err != nil { logger.Errorf("AI telemetry handle creation failed: %v", err) return @@ -731,7 +705,7 @@ func main() { if telemetryDaemonEnabled { logger.Printf("CNI Telemetry is enabled") - go startTelemetryService(rootCtx, cnsconfig) + go startTelemetryService(rootCtx) } // Log platform information. From 0455d4c6ef1bb9b2944d0a492b2b83a4d451b803 Mon Sep 17 00:00:00 2001 From: "Behzad.Mirkhanzadeh" Date: Tue, 22 Jul 2025 15:21:25 -0700 Subject: [PATCH 3/5] improve the logging --- cns/cni-telemetry-sidecar/configmanager.go | 130 ++++++------ cns/cni-telemetry-sidecar/main.go | 42 +++- cns/cni-telemetry-sidecar/sidecar.go | 236 +++++++++++++-------- 3 files changed, 238 insertions(+), 170 deletions(-) diff --git a/cns/cni-telemetry-sidecar/configmanager.go b/cns/cni-telemetry-sidecar/configmanager.go index 12a3855fff..1e3c9c8fa3 100644 --- a/cns/cni-telemetry-sidecar/configmanager.go +++ b/cns/cni-telemetry-sidecar/configmanager.go @@ -4,104 +4,98 @@ import ( "encoding/json" "fmt" "os" - "time" "github.com/Azure/azure-container-networking/cns/configuration" - "github.com/Azure/azure-container-networking/cns/logger" + "go.uber.org/zap" ) -// ConfigManager handles Azure CNS configuration loading and validation +// ConfigManager handles CNS configuration loading type ConfigManager struct { configPath string + logger *zap.Logger } -// NewConfigManager creates a new configuration manager for Azure CNS +// NewConfigManager creates a new config manager func NewConfigManager(configPath string) *ConfigManager { return &ConfigManager{ configPath: configPath, } } -// LoadConfig loads and validates the Azure CNS configuration +// SetLogger sets the zap logger for the config manager +func (cm *ConfigManager) SetLogger(logger *zap.Logger) { + cm.logger = logger +} + +// LoadConfig loads the CNS configuration from file func (cm *ConfigManager) LoadConfig() (*configuration.CNSConfig, error) { - logger.Printf("Loading Azure CNS configuration from: %s", cm.configPath) + // Use zap logger if available, otherwise create a default config + if cm.logger != nil { + cm.logger.Debug("Loading CNS configuration", zap.String("path", cm.configPath)) + } - // Wait for configuration file to become available (Kubernetes ConfigMap mount) - if err := cm.waitForConfigFile(); err != nil { - return nil, fmt.Errorf("Azure CNS configuration file not available: %w", err) + // Check if config file exists + if _, err := os.Stat(cm.configPath); os.IsNotExist(err) { + if cm.logger != nil { + cm.logger.Info("CNS config file not found, using default configuration", + zap.String("path", cm.configPath)) + } + + // Return default configuration + return &configuration.CNSConfig{ + TelemetrySettings: configuration.TelemetrySettings{ + DisableAll: false, + TelemetryBatchSizeBytes: 16384, + TelemetryBatchIntervalInSecs: 15, + RefreshIntervalInSecs: 15, + DisableMetadataRefreshThread: false, + DebugMode: false, + DisableTrace: false, + DisableMetric: false, + DisableEvent: false, + AppInsightsInstrumentationKey: "", // Will be set by environment or config + }, + }, nil } - // Read the configuration file from mounted volume + // Read the config file data, err := os.ReadFile(cm.configPath) if err != nil { - return nil, fmt.Errorf("failed to read Azure CNS configuration file: %w", err) + if cm.logger != nil { + cm.logger.Error("Failed to read CNS config file", + zap.String("path", cm.configPath), + zap.Error(err)) + } + return nil, fmt.Errorf("failed to read config file %s: %w", cm.configPath, err) } - // Parse JSON configuration into CNS config structure + // Parse the JSON configuration var config configuration.CNSConfig if err := json.Unmarshal(data, &config); err != nil { - return nil, fmt.Errorf("failed to parse Azure CNS configuration: %w", err) - } - - // Validate configuration for Azure telemetry requirements - if err := cm.validateConfig(&config); err != nil { - return nil, fmt.Errorf("Azure CNS configuration validation failed: %w", err) - } - - logger.Printf("Azure CNS configuration loaded and validated successfully") - return &config, nil -} - -// waitForConfigFile waits for the configuration file to become available -// This is important in Kubernetes environments where ConfigMaps are mounted asynchronously -func (cm *ConfigManager) waitForConfigFile() error { - const maxRetries = 30 - const retryInterval = 2 * time.Second - - for i := 0; i < maxRetries; i++ { - if _, err := os.Stat(cm.configPath); err == nil { - return nil - } - - if i == 0 { - logger.Printf("Waiting for Azure CNS configuration file to become available...") + if cm.logger != nil { + cm.logger.Error("Failed to parse CNS config file", + zap.String("path", cm.configPath), + zap.Error(err)) } - - time.Sleep(retryInterval) + return nil, fmt.Errorf("failed to parse config file: %w", err) } - return fmt.Errorf("Azure CNS configuration file not available after %d attempts (%v total wait time)", - maxRetries, time.Duration(maxRetries)*retryInterval) -} - -// validateConfig performs Azure-specific validation of the CNS configuration -func (cm *ConfigManager) validateConfig(config *configuration.CNSConfig) error { - // Validate that telemetry settings are properly configured - if config.TelemetrySettings.AppInsightsInstrumentationKey == "" && !config.TelemetrySettings.DisableAll { - logger.Printf("Warning: No Application Insights instrumentation key configured and telemetry not disabled") + // Set default values for telemetry settings if not specified + if config.TelemetrySettings.TelemetryBatchSizeBytes == 0 { + config.TelemetrySettings.TelemetryBatchSizeBytes = 16384 } - - // Validate batch size settings for Azure Application Insights - if config.TelemetrySettings.TelemetryBatchSizeBytes <= 0 { - logger.Printf("Warning: Invalid telemetry batch size, using default") + if config.TelemetrySettings.TelemetryBatchIntervalInSecs == 0 { + config.TelemetrySettings.TelemetryBatchIntervalInSecs = 15 } - - // Validate batch interval for optimal Azure ingestion - if config.TelemetrySettings.TelemetryBatchIntervalInSecs <= 0 { - logger.Printf("Warning: Invalid telemetry batch interval, using default") + if config.TelemetrySettings.RefreshIntervalInSecs == 0 { + config.TelemetrySettings.RefreshIntervalInSecs = 15 } - // Log configuration summary for Azure monitoring and debugging - logger.Printf("Azure CNS Configuration Summary:") - logger.Printf(" - Telemetry DisableAll: %t", config.TelemetrySettings.DisableAll) - logger.Printf(" - Application Insights Key Present: %t", - config.TelemetrySettings.AppInsightsInstrumentationKey != "") - logger.Printf(" - Batch Size: %d bytes", config.TelemetrySettings.TelemetryBatchSizeBytes) - logger.Printf(" - Batch Interval: %d seconds", config.TelemetrySettings.TelemetryBatchIntervalInSecs) - logger.Printf(" - Disable Trace: %t", config.TelemetrySettings.DisableTrace) - logger.Printf(" - Disable Metric: %t", config.TelemetrySettings.DisableMetric) - logger.Printf(" - Disable Event: %t", config.TelemetrySettings.DisableEvent) - logger.Printf(" - Debug Mode: %t", config.TelemetrySettings.DebugMode) + if cm.logger != nil { + cm.logger.Info("Successfully loaded CNS configuration", + zap.String("path", cm.configPath), + zap.Bool("telemetryDisabled", config.TelemetrySettings.DisableAll)) + } - return nil + return &config, nil } diff --git a/cns/cni-telemetry-sidecar/main.go b/cns/cni-telemetry-sidecar/main.go index 3e5bcb2a30..9ba1acedec 100644 --- a/cns/cni-telemetry-sidecar/main.go +++ b/cns/cni-telemetry-sidecar/main.go @@ -7,26 +7,46 @@ import ( "os/signal" "syscall" - "github.com/Azure/azure-container-networking/cns/logger" + "github.com/Azure/azure-container-networking/cns/logger/v2" + cores "github.com/Azure/azure-container-networking/cns/logger/v2/cores" + "go.uber.org/zap" ) var ( version = "unknown" configPath = flag.String("config", "/etc/cns/cns-config.json", "Path to CNS configuration file") + logLevel = flag.String("log-level", "info", "Log level (debug, info, warn, error)") ) func main() { flag.Parse() - // Initialize logging for the CNI telemetry sidecar - logger.InitLogger("azure-cns-cni-telemetry-sidecar", 1, 1, "/var/log/azure-cns-telemetry") - defer logger.Close() + // Initialize main logger with correct path for shared volume + zapLogger, cleanup, err := logger.New(&logger.Config{ + Level: *logLevel, + File: &cores.FileConfig{ + Filepath: "/var/log/azure-cni-telemetry-sidecar.log", // This will write to host's /var/log/azure-cns/ + }, + }) + if err != nil { + panic("Failed to initialize logger: " + err.Error()) + } + defer cleanup() - logger.Printf("Starting Azure CNI Telemetry Sidecar v%s", version) + zapLogger.Info("Starting Azure CNI Telemetry Sidecar", + zap.String("version", version), + zap.String("configPath", *configPath), + zap.String("logLevel", *logLevel)) - // Create telemetry sidecar service + // Create telemetry sidecar service and pass the logger sidecar := NewTelemetrySidecar(*configPath) + // Set the logger for the sidecar to avoid nil pointer + if err := sidecar.SetLogger(zapLogger); err != nil { + zapLogger.Error("Failed to set logger for sidecar", zap.Error(err)) + os.Exit(1) + } + // Setup graceful shutdown context ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -37,15 +57,17 @@ func main() { go func() { sig := <-sigCh - logger.Printf("Received shutdown signal %v, initiating graceful shutdown", sig) + zapLogger.Info("Received shutdown signal, initiating graceful shutdown", + zap.String("signal", sig.String())) cancel() }() - // Run the telemetry sidecar + // Run the telemetry sidecar (using the Run method from sidecar.go) if err := sidecar.Run(ctx); err != nil { - logger.Errorf("Azure CNI Telemetry Sidecar failed: %v", err) + zapLogger.Error("Azure CNI Telemetry Sidecar failed", + zap.Error(err)) os.Exit(1) } - logger.Printf("Azure CNI Telemetry Sidecar stopped gracefully") + zapLogger.Info("Azure CNI Telemetry Sidecar stopped gracefully") } diff --git a/cns/cni-telemetry-sidecar/sidecar.go b/cns/cni-telemetry-sidecar/sidecar.go index 4661ac5e0b..a81fec4173 100644 --- a/cns/cni-telemetry-sidecar/sidecar.go +++ b/cns/cni-telemetry-sidecar/sidecar.go @@ -3,26 +3,27 @@ package main import ( "context" "fmt" + "net" "os" + "time" - "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/cns/configuration" - "github.com/Azure/azure-container-networking/cns/logger" - "github.com/Azure/azure-container-networking/telemetry" + "go.uber.org/zap" ) const ( - // CNI telemetry constants aligned with azure-vnet-telemetry + // CNI telemetry constants cniTelemetryAppName = "azure-vnet-telemetry" cniTelemetryVersion = "1.0.0" - telemetrySocket = "/var/run/azure-vnet-telemetry.sock" + telemetrySocketPath = "/var/run/azure-vnet-telemetry.sock" // Socket path that azure-vnet expects ) // TelemetrySidecar manages the lifecycle of the CNI telemetry service type TelemetrySidecar struct { - configPath string - configManager *ConfigManager - telemetryBuffer *telemetry.TelemetryBuffer + configPath string + configManager *ConfigManager + logger *zap.Logger + socketListener net.Listener } // NewTelemetrySidecar creates a new telemetry sidecar instance @@ -33,9 +34,26 @@ func NewTelemetrySidecar(configPath string) *TelemetrySidecar { } } +// SetLogger sets the zap logger for the sidecar +func (s *TelemetrySidecar) SetLogger(logger *zap.Logger) error { + if logger == nil { + return fmt.Errorf("logger cannot be nil") + } + s.logger = logger + + // Also set the logger for the config manager + s.configManager.SetLogger(logger) + + return nil +} + // Run starts the telemetry sidecar and manages its lifecycle func (s *TelemetrySidecar) Run(ctx context.Context) error { - logger.Printf("Initializing Azure CNI Telemetry Sidecar for azure-vnet-telemetry") + if s.logger == nil { + return fmt.Errorf("logger not initialized - call SetLogger() first") + } + + s.logger.Info("Initializing Azure CNI Telemetry Sidecar") // Load CNS configuration from shared mount config, err := s.configManager.LoadConfig() @@ -45,136 +63,170 @@ func (s *TelemetrySidecar) Run(ctx context.Context) error { // Determine if telemetry should run based on configuration and environment if !s.shouldRunTelemetry(config) { - logger.Printf("CNI Telemetry disabled, entering sleep mode") + s.logger.Info("CNI Telemetry disabled, entering sleep mode") return s.sleepUntilShutdown(ctx) } - // Initialize CNI telemetry using existing Azure packages - if err := s.initializeCNITelemetry(config); err != nil { - return fmt.Errorf("failed to initialize CNI telemetry: %w", err) + // Create the telemetry socket that azure-vnet CNI expects + if err := s.createTelemetrySocket(); err != nil { + return fmt.Errorf("failed to create telemetry socket: %w", err) } + defer s.cleanupSocket() - // Start telemetry service mimicking azure-vnet-telemetry behavior - logger.Printf("Starting Azure VNet Telemetry service (CNI mode)") - return s.runCNITelemetryService(ctx) + s.logger.Info("Starting Azure CNI Telemetry collection with socket server") + return s.runTelemetryService(ctx) } -// initializeCNITelemetry sets up telemetry specifically for CNI operations -func (s *TelemetrySidecar) initializeCNITelemetry(config *configuration.CNSConfig) error { - ts := config.TelemetrySettings - - // Create AI configuration matching azure-vnet-telemetry behavior - aiConfig := aitelemetry.AIConfig{ - AppName: cniTelemetryAppName, - AppVersion: cniTelemetryVersion, - BatchSize: ts.TelemetryBatchSizeBytes, - BatchInterval: ts.TelemetryBatchIntervalInSecs, - RefreshTimeout: ts.RefreshIntervalInSecs, - DisableMetadataRefreshThread: ts.DisableMetadataRefreshThread, - DebugMode: ts.DebugMode, +// createTelemetrySocket creates the Unix socket that azure-vnet CNI connects to +func (s *TelemetrySidecar) createTelemetrySocket() error { + // Remove any existing socket file + if err := os.RemoveAll(telemetrySocketPath); err != nil { + s.logger.Warn("Failed to remove existing socket file", zap.Error(err)) } - // Create telemetry buffer for CNI-specific data collection - s.telemetryBuffer = telemetry.NewTelemetryBuffer(nil) - - // Validate Application Insights instrumentation key - if config.TelemetrySettings.AppInsightsInstrumentationKey == "" { - return fmt.Errorf("Application Insights instrumentation key is required for CNI telemetry") + // Create the directory if it doesn't exist + if err := os.MkdirAll("/var/run", 0755); err != nil { + return fmt.Errorf("failed to create /var/run directory: %w", err) } - // Initialize AI telemetry handle with Azure-specific configuration - err := s.telemetryBuffer.CreateAITelemetryHandle( - aiConfig, - ts.DisableTrace, - ts.DisableMetric, - ts.DisableEvent, - ) + // Create Unix socket listener + listener, err := net.Listen("unix", telemetrySocketPath) if err != nil { - return fmt.Errorf("failed to create AI telemetry handle for CNI: %w", err) + return fmt.Errorf("failed to create Unix socket at %s: %w", telemetrySocketPath, err) } - logger.Printf("CNI Telemetry initialized with Application Insights (App: %s, Version: %s, BatchSize: %d)", - cniTelemetryAppName, cniTelemetryVersion, ts.TelemetryBatchSizeBytes) - return nil -} + s.socketListener = listener + s.logger.Info("Created telemetry socket", zap.String("path", telemetrySocketPath)) -// runCNITelemetryService runs the telemetry service mimicking azure-vnet-telemetry -func (s *TelemetrySidecar) runCNITelemetryService(ctx context.Context) error { - // Cleanup any existing CNI telemetry instances - s.cleanupExistingInstances() - - // Start telemetry server on the expected socket for CNI integration - if err := s.telemetryBuffer.StartServer(); err != nil { - return fmt.Errorf("failed to start CNI telemetry server: %w", err) + // Set socket permissions so azure-vnet can access it + if err := os.Chmod(telemetrySocketPath, 0666); err != nil { + s.logger.Warn("Failed to set socket permissions", zap.Error(err)) } - logger.Printf("Azure VNet Telemetry server started successfully on socket: %s", telemetrySocket) + return nil +} - // Start telemetry data collection in background (non-blocking) - go s.telemetryBuffer.PushData(ctx) +// runTelemetryService runs both the socket server and telemetry collection +func (s *TelemetrySidecar) runTelemetryService(ctx context.Context) error { + // Start socket server in background + go s.handleSocketConnections(ctx) - // Log readiness for CNI network event collection - logger.Printf("CNI Telemetry sidecar ready to collect Azure network interface events") + // Start telemetry collection loop + return s.runTelemetryLoop(ctx) +} - // Wait for context cancellation (graceful shutdown signal) - <-ctx.Done() +// handleSocketConnections handles incoming connections from azure-vnet CNI +func (s *TelemetrySidecar) handleSocketConnections(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + default: + // Accept connection with timeout + if conn, err := s.socketListener.Accept(); err == nil { + go s.handleConnection(conn) + } + } + } +} - // Perform cleanup and graceful shutdown - logger.Printf("Shutting down Azure CNI Telemetry service") - return s.shutdownTelemetry() +// handleConnection handles a single connection from azure-vnet CNI +func (s *TelemetrySidecar) handleConnection(conn net.Conn) { + defer conn.Close() + + s.logger.Debug("Azure CNI telemetry connection established") + + // Read telemetry data from azure-vnet CNI + buffer := make([]byte, 4096) + for { + n, err := conn.Read(buffer) + if err != nil { + s.logger.Debug("Connection closed", zap.Error(err)) + break + } + + if n > 0 { + // Process telemetry data received from azure-vnet + s.processTelemetryData(buffer[:n]) + } + } } -// cleanupExistingInstances cleans up any leftover telemetry instances -func (s *TelemetrySidecar) cleanupExistingInstances() { - // Create temporary buffer for cleanup operations - tempBuffer := telemetry.NewTelemetryBuffer(nil) +// processTelemetryData processes telemetry data received from azure-vnet CNI +func (s *TelemetrySidecar) processTelemetryData(data []byte) { + s.logger.Debug("Received CNI telemetry data", + zap.Int("bytes", len(data)), + zap.String("data", string(data))) + + // TODO: Parse and process the actual telemetry data + // This could include: + // - JSON parsing of CNI events + // - Metrics extraction + // - Forwarding to Azure Monitor/Application Insights +} - // Use the same FdName that azure-vnet-telemetry uses for consistency - if err := tempBuffer.Cleanup(telemetry.FdName); err != nil { - logger.Printf("Warning: Failed to cleanup existing CNI telemetry instances: %v", err) - } else { - logger.Printf("Successfully cleaned up existing CNI telemetry instances") +// runTelemetryLoop runs the main telemetry collection loop +func (s *TelemetrySidecar) runTelemetryLoop(ctx context.Context) error { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + s.logger.Info("CNI Telemetry collection started with socket server") + + for { + select { + case <-ctx.Done(): + s.logger.Info("Shutting down Azure CNI Telemetry service") + return nil + case <-ticker.C: + s.collectTelemetry() + } } } -// shutdownTelemetry handles graceful shutdown of telemetry resources -func (s *TelemetrySidecar) shutdownTelemetry() error { - if s.telemetryBuffer != nil { - // Close telemetry buffer (ensures data is flushed to Azure) - s.telemetryBuffer.Close() - logger.Printf("CNI Telemetry buffer closed and remaining data flushed to Azure") +// collectTelemetry performs telemetry collection +func (s *TelemetrySidecar) collectTelemetry() { + s.logger.Debug("Collecting CNI telemetry data") + // TODO: Implement actual telemetry collection logic here + // This could include: + // - Reading CNI metrics + // - Collecting network statistics + // - Gathering Azure CNI specific data +} + +// cleanupSocket removes the telemetry socket file +func (s *TelemetrySidecar) cleanupSocket() { + if s.socketListener != nil { + s.socketListener.Close() + } + if err := os.RemoveAll(telemetrySocketPath); err != nil { + s.logger.Warn("Failed to cleanup socket file", zap.Error(err)) + } else { + s.logger.Info("Telemetry socket cleaned up") } - return nil } // shouldRunTelemetry determines if CNI telemetry should be enabled func (s *TelemetrySidecar) shouldRunTelemetry(config *configuration.CNSConfig) bool { // Check global telemetry disable flag in CNS configuration if config.TelemetrySettings.DisableAll { - logger.Printf("CNI Telemetry disabled globally in CNS configuration") + s.logger.Info("CNI Telemetry disabled globally in CNS configuration") return false } - // Check CNI telemetry specific enable flag (replaces old "ts" option) + // Check CNI telemetry specific enable flag cniTelemetryEnabled := os.Getenv("CNI_TELEMETRY_ENABLED") if cniTelemetryEnabled != "true" { - logger.Printf("CNI Telemetry not enabled via CNI_TELEMETRY_ENABLED environment variable") - return false - } - - // Validate Application Insights key availability - if config.TelemetrySettings.AppInsightsInstrumentationKey == "" { - logger.Printf("No Application Insights instrumentation key configured for CNI telemetry") + s.logger.Info("CNI Telemetry not enabled via CNI_TELEMETRY_ENABLED environment variable") return false } - logger.Printf("CNI Telemetry enabled - will collect Azure network interface events") + s.logger.Info("CNI Telemetry enabled - will collect Azure network interface events") return true } // sleepUntilShutdown keeps the container running when telemetry is disabled func (s *TelemetrySidecar) sleepUntilShutdown(ctx context.Context) error { - logger.Printf("CNI Telemetry sidecar sleeping until shutdown signal received") + s.logger.Info("CNI Telemetry sidecar sleeping until shutdown signal received") <-ctx.Done() return ctx.Err() } From fbbbe8b806cd9d157cede3775261932848ad0a60 Mon Sep 17 00:00:00 2001 From: "Behzad.Mirkhanzadeh" Date: Wed, 30 Jul 2025 20:37:31 -0700 Subject: [PATCH 4/5] chaning sidecar code --- .pipelines/pipeline.yaml | 12 - Makefile | 18 +- cns/cni-telemetry-sidecar/Dockerfile | 59 ++-- cns/cni-telemetry-sidecar/configmanager.go | 132 ++++++-- cns/cni-telemetry-sidecar/main.go | 93 ++++-- cns/cni-telemetry-sidecar/sidecar.go | 344 +++++++++++++-------- cns/configuration/configuration.go | 4 + telemetry/aiwrapper.go | 10 + 8 files changed, 432 insertions(+), 240 deletions(-) diff --git a/.pipelines/pipeline.yaml b/.pipelines/pipeline.yaml index 536bd6cf3f..8f03b1c3a4 100644 --- a/.pipelines/pipeline.yaml +++ b/.pipelines/pipeline.yaml @@ -158,14 +158,6 @@ stages: arch: amd64 name: npm os: windows - cni_telemetry_sidecar_linux_amd64: - arch: amd64 - name: cni-telemetry-sidecar - os: linux - cni_telemetry_sidecar_windows_amd64: - arch: amd64 - name: cni-telemetry-sidecar - os: windows steps: - template: containers/container-template.yaml parameters: @@ -207,10 +199,6 @@ stages: arch: arm64 name: npm os: linux - cni_telemetry_sidecar_linux_arm64: - arch: arm64 - name: cni-telemetry-sidecar - os: linux steps: - template: containers/container-template.yaml parameters: diff --git a/Makefile b/Makefile index 884e8a8c4a..683973ced9 100644 --- a/Makefile +++ b/Makefile @@ -154,7 +154,7 @@ ipv6-hp-bpf: ipv6-hp-bpf-binary ipv6-hp-bpf-archive azure-block-iptables: azure-block-iptables-binary azure-block-iptables-archive azure-ip-masq-merger: azure-ip-masq-merger-binary azure-ip-masq-merger-archive azure-iptables-monitor: azure-iptables-monitor-binary azure-iptables-monitor-archive - +cni-telemetry-sidecar: cni-telemetry-sidecar-binary cni-telemetry-sidecar-archive ##@ Versioning @@ -240,9 +240,14 @@ azure-vnet-telemetry-binary: cd $(CNI_TELEMETRY_DIR) && CGO_ENABLED=0 go build -v -o $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) -ldflags "-X main.version=$(CNI_VERSION) -X $(CNI_AI_PATH)=$(CNI_AI_ID) $(LD_BUILD_FLAGS)" -gcflags="-dwarflocationlists=true" # Build the Azure CNI Telemetry Sidecar binary. -cni-telemetry-sidecar-binary: - cd $(CNI_TELEMETRY_SIDECAR_DIR) && CGO_ENABLED=0 go build -v -o $(CNI_TELEMETRY_SIDECAR_BUILD_DIR)/azure-cni-telemetry-sidecar$(EXE_EXT) -ldflags "-X main.version=$(CNI_TELEMETRY_SIDECAR_VERSION) -X $(CNI_AI_PATH)=$(CNI_TELEMETRY_SIDECAR_AI_ID)" -gcflags="-dwarflocationlists=true" - +cni-telemetry-sidecar-binary: ## build cni-telemetry-sidecar binary. + $(MKDIR) $(CNI_TELEMETRY_SIDECAR_BUILD_DIR) + cd $(CNI_TELEMETRY_SIDECAR_DIR) && CGO_ENABLED=0 go build \ + -v \ + -o $(CNI_TELEMETRY_SIDECAR_BUILD_DIR)/azure-cni-telemetry-sidecar$(EXE_EXT) \ + -ldflags "-X main.version=$(CNI_TELEMETRY_SIDECAR_VERSION) -X $(CNI_AI_PATH)=$(CNI_TELEMETRY_SIDECAR_AI_ID)" \ + -gcflags="-dwarflocationlists=true" \ + . # Build the Azure CLI network binary. acncli-binary: cd $(ACNCLI_DIR) && CGO_ENABLED=0 go build -v -o $(ACNCLI_BUILD_DIR)/acn$(EXE_EXT) -ldflags "-X main.version=$(ACN_VERSION) $(LD_BUILD_FLAGS)" -gcflags="-dwarflocationlists=true" @@ -310,6 +315,7 @@ CNS_IMAGE = azure-cns NPM_IMAGE = azure-npm AZURE_IP_MASQ_MERGER_IMAGE = azure-ip-masq-merger AZURE_IPTABLES_MONITOR_IMAGE = azure-iptables-monitor +CNI_TELEMETRY_SIDECAR_IMAGE = azure-cni-telemetry-sidecar ## Image platform tags. ACNCLI_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(ACN_VERSION) @@ -323,7 +329,7 @@ CNS_WINDOWS_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(CNS_VERSION)-$(OS_SKU_W NPM_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(NPM_VERSION) AZURE_IP_MASQ_MERGER_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(AZURE_IP_MASQ_MERGER_VERSION) AZURE_IPTABLES_MONITOR_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(AZURE_IPTABLES_MONITOR_VERSION) - +CNI_TELEMETRY_SIDECAR_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(CNI_TELEMETRY_SIDECAR_VERSION) qemu-user-static: ## Set up the host to run qemu multiplatform container builds. sudo $(CONTAINER_RUNTIME) run --rm --privileged multiarch/qemu-user-static --reset -p yes @@ -616,7 +622,7 @@ cni-telemetry-sidecar-image: ## build cni-telemetry-sidecar container image. $(MAKE) container \ DOCKERFILE=cns/cni-telemetry-sidecar/Dockerfile \ IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ - EXTRA_BUILD_ARGS='--build-arg CNI_AI_PATH=$(CNI_AI_PATH) --build-arg CNI_AI_ID=$(CNI_TELEMETRY_SIDECAR_AI_ID)' \ + EXTRA_BUILD_ARGS='--build-arg CNI_AI_PATH=$(CNI_AI_PATH) --build-arg CNI_AI_ID=$(CNI_TELEMETRY_SIDECAR_AI_ID) --build-arg VERSION=$(CNI_TELEMETRY_SIDECAR_VERSION)' \ PLATFORM=$(PLATFORM) \ TAG=$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) \ TARGET=$(OS) \ diff --git a/cns/cni-telemetry-sidecar/Dockerfile b/cns/cni-telemetry-sidecar/Dockerfile index 4c2fef0040..c1b3db6188 100644 --- a/cns/cni-telemetry-sidecar/Dockerfile +++ b/cns/cni-telemetry-sidecar/Dockerfile @@ -1,59 +1,44 @@ -# Azure CNI Telemetry Sidecar Dockerfile -# Based on proven CNI Dockerfile pattern from azure-container-networking ARG ARCH ARG OS_VERSION ARG OS -# Use the same proven base images as CNI -# mcr.microsoft.com/oss/go/microsoft/golang:1.23-azurelinux3.0 FROM --platform=linux/${ARCH} mcr.microsoft.com/oss/go/microsoft/golang@sha256:8f60e85f4b2f567c888d0b3a4cd12dc74bee534d94c528655546452912d90c74 AS go -# mcr.microsoft.com/azurelinux/base/core:3.0 -FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/base/core@sha256:9948138108a3d69f1dae62104599ac03132225c3b7a5ac57b85a214629c8567d AS mariner-core - -FROM go AS azure-cni-telemetry-sidecar +FROM go AS cni-telemetry-sidecar ARG OS ARG VERSION ARG CNI_AI_PATH ARG CNI_AI_ID +# Add debug output to verify build args are received +RUN echo "=== Dockerfile Build Debug ===" +RUN echo "VERSION = $VERSION" +RUN echo "CNI_AI_ID = $CNI_AI_ID" +RUN echo "CNI_AI_PATH = $CNI_AI_PATH" + WORKDIR /azure-container-networking COPY . . -# Debug: Check if the source file exists and show Go version -RUN ls -la cns/cni-telemetry-sidecar/ && go version - -# Build the Azure CNI telemetry sidecar binary for Linux -RUN CGO_ENABLED=0 GOOS=linux go build \ - -a \ - -o /go/bin/azure-cni-telemetry-sidecar \ - -trimpath \ - -ldflags "-X main.version=${VERSION:-unknown} -X ${CNI_AI_PATH:-main.aiMetadata}=${CNI_AI_ID:-unknown}" \ - -gcflags="-dwarflocationlists=true" \ - ./cns/cni-telemetry-sidecar/ - -# Build the Azure CNI telemetry sidecar binary for Windows -RUN CGO_ENABLED=0 GOOS=windows go build \ - -a \ - -o /go/bin/azure-cni-telemetry-sidecar.exe \ +# Build the telemetry sidecar with embedded AppInsights key using CNI_AI_PATH +RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-cni-telemetry-sidecar \ -trimpath \ - -ldflags "-X main.version=${VERSION:-unknown} -X ${CNI_AI_PATH:-main.aiMetadata}=${CNI_AI_ID:-unknown}" \ + -ldflags "-X main.version=$VERSION -X $CNI_AI_PATH=$CNI_AI_ID" \ -gcflags="-dwarflocationlists=true" \ - ./cns/cni-telemetry-sidecar/ + ./cns/cni-telemetry-sidecar -# Verify both binaries were built -RUN ls -la /go/bin/azure-cni-telemetry-sidecar* +# Verify the key is embedded in the telemetry.aiMetadata variable +RUN echo "=== Binary Debug ===" +RUN strings /go/bin/azure-cni-telemetry-sidecar | grep "5515a1eb" || echo "AI key NOT found in binary!" +RUN ls -la /go/bin/azure-cni-telemetry-sidecar FROM scratch AS bins -COPY --from=azure-cni-telemetry-sidecar /go/bin/* / +COPY --from=cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar / -FROM scratch AS linux -COPY --from=azure-cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar /azure-cni-telemetry-sidecar -ENTRYPOINT [ "/azure-cni-telemetry-sidecar" ] +FROM mcr.microsoft.com/azurelinux/base/core:3.0 AS linux +COPY --from=cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar /usr/local/bin/ -# Windows support following CNI pattern -FROM --platform=windows/${ARCH} mcr.microsoft.com/oss/kubernetes/windows-host-process-containers-base-image@sha256:b4c9637e032f667c52d1eccfa31ad8c63f1b035e8639f3f48a510536bf34032b AS hpc +# Create directories for logs and config +RUN mkdir -p /var/log /etc/azure-cns /var/run -FROM hpc AS windows -COPY --from=azure-cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar.exe /azure-cni-telemetry-sidecar.exe -ENTRYPOINT [ "/azure-cni-telemetry-sidecar.exe" ] \ No newline at end of file +ENTRYPOINT ["/usr/local/bin/azure-cni-telemetry-sidecar"] +CMD ["--config=/etc/azure-cns/cns_config.json", "--log-level=info"] \ No newline at end of file diff --git a/cns/cni-telemetry-sidecar/configmanager.go b/cns/cni-telemetry-sidecar/configmanager.go index 1e3c9c8fa3..0edf94de4c 100644 --- a/cns/cni-telemetry-sidecar/configmanager.go +++ b/cns/cni-telemetry-sidecar/configmanager.go @@ -6,6 +6,7 @@ import ( "os" "github.com/Azure/azure-container-networking/cns/configuration" + "github.com/Azure/azure-container-networking/telemetry" "go.uber.org/zap" ) @@ -29,7 +30,6 @@ func (cm *ConfigManager) SetLogger(logger *zap.Logger) { // LoadConfig loads the CNS configuration from file func (cm *ConfigManager) LoadConfig() (*configuration.CNSConfig, error) { - // Use zap logger if available, otherwise create a default config if cm.logger != nil { cm.logger.Debug("Loading CNS configuration", zap.String("path", cm.configPath)) } @@ -40,22 +40,7 @@ func (cm *ConfigManager) LoadConfig() (*configuration.CNSConfig, error) { cm.logger.Info("CNS config file not found, using default configuration", zap.String("path", cm.configPath)) } - - // Return default configuration - return &configuration.CNSConfig{ - TelemetrySettings: configuration.TelemetrySettings{ - DisableAll: false, - TelemetryBatchSizeBytes: 16384, - TelemetryBatchIntervalInSecs: 15, - RefreshIntervalInSecs: 15, - DisableMetadataRefreshThread: false, - DebugMode: false, - DisableTrace: false, - DisableMetric: false, - DisableEvent: false, - AppInsightsInstrumentationKey: "", // Will be set by environment or config - }, - }, nil + return cm.createDefaultConfig(), nil } // Read the config file @@ -80,22 +65,119 @@ func (cm *ConfigManager) LoadConfig() (*configuration.CNSConfig, error) { return nil, fmt.Errorf("failed to parse config file: %w", err) } + // Apply defaults and environment variable overrides + cm.setConfigDefaults(&config) + + // Check for AppInsights key from all sources (build-time, config, env) + hasAppInsightsKey := cm.hasEffectiveAppInsightsKey(&config.TelemetrySettings) + + if cm.logger != nil { + cm.logger.Info("Successfully loaded CNS configuration", + zap.String("path", cm.configPath), + zap.Bool("telemetryDisabled", config.TelemetrySettings.DisableAll), + zap.Bool("cniTelemetryEnabled", config.TelemetrySettings.EnableCNITelemetry), + zap.String("socketPath", config.TelemetrySettings.CNITelemetrySocketPath), + zap.Bool("hasAppInsightsKey", hasAppInsightsKey)) + } + + return &config, nil +} + +// createDefaultConfig creates a default configuration +func (cm *ConfigManager) createDefaultConfig() *configuration.CNSConfig { + config := &configuration.CNSConfig{ + TelemetrySettings: configuration.TelemetrySettings{ + DisableAll: false, + TelemetryBatchSizeBytes: defaultBatchSizeInBytes, + TelemetryBatchIntervalInSecs: defaultBatchIntervalInSecs, + RefreshIntervalInSecs: defaultRefreshTimeoutInSecs, + DisableMetadataRefreshThread: false, + DebugMode: false, + DisableTrace: false, + DisableMetric: false, + DisableEvent: false, + EnableCNITelemetry: false, // Default to false + CNITelemetrySocketPath: "/var/run/azure-vnet-telemetry.sock", + }, + } + + // Set AppInsights key from environment variables (if any) + cm.setAppInsightsKeyFromEnv(&config.TelemetrySettings) + + return config +} + +// setConfigDefaults applies default values and environment variable overrides +func (cm *ConfigManager) setConfigDefaults(config *configuration.CNSConfig) { // Set default values for telemetry settings if not specified if config.TelemetrySettings.TelemetryBatchSizeBytes == 0 { - config.TelemetrySettings.TelemetryBatchSizeBytes = 16384 + config.TelemetrySettings.TelemetryBatchSizeBytes = defaultBatchSizeInBytes } if config.TelemetrySettings.TelemetryBatchIntervalInSecs == 0 { - config.TelemetrySettings.TelemetryBatchIntervalInSecs = 15 + config.TelemetrySettings.TelemetryBatchIntervalInSecs = defaultBatchIntervalInSecs } if config.TelemetrySettings.RefreshIntervalInSecs == 0 { - config.TelemetrySettings.RefreshIntervalInSecs = 15 + config.TelemetrySettings.RefreshIntervalInSecs = defaultRefreshTimeoutInSecs } - if cm.logger != nil { - cm.logger.Info("Successfully loaded CNS configuration", - zap.String("path", cm.configPath), - zap.Bool("telemetryDisabled", config.TelemetrySettings.DisableAll)) + // Set default CNI telemetry socket path + if config.TelemetrySettings.CNITelemetrySocketPath == "" { + config.TelemetrySettings.CNITelemetrySocketPath = "/var/run/azure-vnet-telemetry.sock" } - return &config, nil + // Handle AppInsights instrumentation key from environment variables + cm.setAppInsightsKeyFromEnv(&config.TelemetrySettings) +} + +// setAppInsightsKeyFromEnv sets the AppInsights instrumentation key from environment variables +func (cm *ConfigManager) setAppInsightsKeyFromEnv(ts *configuration.TelemetrySettings) { + // Try multiple environment variable names + envKeys := []string{ + "APPINSIGHTS_INSTRUMENTATIONKEY", + "APPLICATIONINSIGHTS_CONNECTION_STRING", + "AI_INSTRUMENTATION_KEY", + } + + // If no key is set in config, try environment variables + if ts.AppInsightsInstrumentationKey == "" { + for _, envKey := range envKeys { + if key := os.Getenv(envKey); key != "" { + ts.AppInsightsInstrumentationKey = key + if cm.logger != nil { + cm.logger.Debug("Found AppInsights key in environment variable", + zap.String("envVar", envKey)) + } + break + } + } + } +} + +// hasEffectiveAppInsightsKey checks if AppInsights key is available from any source +// (build-time aiMetadata, config file, or environment variables) +func (cm *ConfigManager) hasEffectiveAppInsightsKey(ts *configuration.TelemetrySettings) bool { + // Priority 1: Build-time embedded key via telemetry.aiMetadata + if buildTimeKey := telemetry.GetAIMetadata(); buildTimeKey != "" { + return true + } + + // Priority 2: Config file + if ts.AppInsightsInstrumentationKey != "" { + return true + } + + // Priority 3: Environment variables + envKeys := []string{ + "APPINSIGHTS_INSTRUMENTATIONKEY", + "APPLICATIONINSIGHTS_CONNECTION_STRING", + "AI_INSTRUMENTATION_KEY", + } + + for _, envKey := range envKeys { + if key := os.Getenv(envKey); key != "" { + return true + } + } + + return false } diff --git a/cns/cni-telemetry-sidecar/main.go b/cns/cni-telemetry-sidecar/main.go index 9ba1acedec..baa84b9ba5 100644 --- a/cns/cni-telemetry-sidecar/main.go +++ b/cns/cni-telemetry-sidecar/main.go @@ -7,67 +7,94 @@ import ( "os/signal" "syscall" - "github.com/Azure/azure-container-networking/cns/logger/v2" - cores "github.com/Azure/azure-container-networking/cns/logger/v2/cores" + "github.com/Azure/azure-container-networking/telemetry" "go.uber.org/zap" ) var ( - version = "unknown" - configPath = flag.String("config", "/etc/cns/cns-config.json", "Path to CNS configuration file") + configPath = flag.String("config", "/etc/azure-cns/cns_config.json", "Path to CNS configuration file") logLevel = flag.String("log-level", "info", "Log level (debug, info, warn, error)") + + // This variable is set at build time via ldflags from Makefile + version = "1.0.0" // -X main.version=$(CNI_TELEMETRY_SIDECAR_VERSION) ) func main() { flag.Parse() - // Initialize main logger with correct path for shared volume - zapLogger, cleanup, err := logger.New(&logger.Config{ - Level: *logLevel, - File: &cores.FileConfig{ - Filepath: "/var/log/azure-cni-telemetry-sidecar.log", // This will write to host's /var/log/azure-cns/ - }, - }) + // Initialize logger + logger, err := initializeLogger(*logLevel) if err != nil { - panic("Failed to initialize logger: " + err.Error()) + panic(err) } - defer cleanup() + defer logger.Sync() - zapLogger.Info("Starting Azure CNI Telemetry Sidecar", + // DEBUG: Check if aiMetadata was set at build time via ldflags + currentAIMetadata := telemetry.GetAIMetadata() + logger.Info("Starting Azure CNI Telemetry Sidecar", zap.String("version", version), zap.String("configPath", *configPath), - zap.String("logLevel", *logLevel)) + zap.String("logLevel", *logLevel), + zap.Bool("hasBuiltInAIKey", currentAIMetadata != ""), + zap.String("aiKeyPrefix", maskAIKey(currentAIMetadata))) - // Create telemetry sidecar service and pass the logger + // Create and configure telemetry sidecar + // Pass the configPath to NewTelemetrySidecar (it expects a string parameter) sidecar := NewTelemetrySidecar(*configPath) - - // Set the logger for the sidecar to avoid nil pointer - if err := sidecar.SetLogger(zapLogger); err != nil { - zapLogger.Error("Failed to set logger for sidecar", zap.Error(err)) + if err := sidecar.SetLogger(logger); err != nil { + logger.Error("Failed to set logger", zap.Error(err)) os.Exit(1) } - // Setup graceful shutdown context + // Log which AI key source we're using + if currentAIMetadata != "" { + logger.Info("Using build-time embedded AppInsights key (from Makefile)") + } else { + logger.Info("No build-time AppInsights key found - will check config/environment") + } + + // Create context with cancellation for graceful shutdown ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Handle OS signals for graceful shutdown - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) - + // Handle shutdown signals + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) go func() { - sig := <-sigCh - zapLogger.Info("Received shutdown signal, initiating graceful shutdown", - zap.String("signal", sig.String())) + sig := <-sigChan + logger.Info("Received shutdown signal", zap.String("signal", sig.String())) cancel() }() - // Run the telemetry sidecar (using the Run method from sidecar.go) - if err := sidecar.Run(ctx); err != nil { - zapLogger.Error("Azure CNI Telemetry Sidecar failed", - zap.Error(err)) + // Run the sidecar + if err := sidecar.Run(ctx); err != nil && err != context.Canceled { + logger.Error("Sidecar execution failed", zap.Error(err)) os.Exit(1) } - zapLogger.Info("Azure CNI Telemetry Sidecar stopped gracefully") + logger.Info("Azure CNI Telemetry Sidecar shutdown complete") +} + +// initializeLogger creates a zap logger with the specified level +func initializeLogger(level string) (*zap.Logger, error) { + var zapLevel zap.AtomicLevel + switch level { + case "debug": + zapLevel = zap.NewAtomicLevelAt(zap.DebugLevel) + case "info": + zapLevel = zap.NewAtomicLevelAt(zap.InfoLevel) + case "warn": + zapLevel = zap.NewAtomicLevelAt(zap.WarnLevel) + case "error": + zapLevel = zap.NewAtomicLevelAt(zap.ErrorLevel) + default: + zapLevel = zap.NewAtomicLevelAt(zap.InfoLevel) + } + + config := zap.NewProductionConfig() + config.Level = zapLevel + config.DisableStacktrace = true + config.DisableCaller = false + + return config.Build() } diff --git a/cns/cni-telemetry-sidecar/sidecar.go b/cns/cni-telemetry-sidecar/sidecar.go index a81fec4173..bbad1704be 100644 --- a/cns/cni-telemetry-sidecar/sidecar.go +++ b/cns/cni-telemetry-sidecar/sidecar.go @@ -3,27 +3,34 @@ package main import ( "context" "fmt" - "net" "os" "time" + "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/cns/configuration" + "github.com/Azure/azure-container-networking/telemetry" "go.uber.org/zap" ) const ( - // CNI telemetry constants - cniTelemetryAppName = "azure-vnet-telemetry" - cniTelemetryVersion = "1.0.0" - telemetrySocketPath = "/var/run/azure-vnet-telemetry.sock" // Socket path that azure-vnet expects + // Constants matching telemetrymain.go + defaultReportToHostIntervalInSecs = 30 + defaultRefreshTimeoutInSecs = 15 + defaultBatchSizeInBytes = 16384 + defaultBatchIntervalInSecs = 15 + defaultGetEnvRetryCount = 2 + defaultGetEnvRetryWaitTimeInSecs = 3 + pluginName = "AzureCNI" + cniTelemetryVersion = "1.0.0" ) -// TelemetrySidecar manages the lifecycle of the CNI telemetry service +// TelemetrySidecar replaces the azure-vnet-telemetry binary fork process type TelemetrySidecar struct { - configPath string - configManager *ConfigManager - logger *zap.Logger - socketListener net.Listener + configPath string + configManager *ConfigManager + logger *zap.Logger + telemetryBuffer *telemetry.TelemetryBuffer + builtInAIKey string // AppInsights key embedded at build time (like azure-vnet-telemetry) } // NewTelemetrySidecar creates a new telemetry sidecar instance @@ -40,190 +47,273 @@ func (s *TelemetrySidecar) SetLogger(logger *zap.Logger) error { return fmt.Errorf("logger cannot be nil") } s.logger = logger - - // Also set the logger for the config manager s.configManager.SetLogger(logger) - return nil } -// Run starts the telemetry sidecar and manages its lifecycle +// SetBuiltInAIKey sets the build-time embedded AppInsights key +func (s *TelemetrySidecar) SetBuiltInAIKey(key string) { + s.builtInAIKey = key +} + +// Run starts the telemetry sidecar (replaces main() in telemetrymain.go) func (s *TelemetrySidecar) Run(ctx context.Context) error { if s.logger == nil { return fmt.Errorf("logger not initialized - call SetLogger() first") } - s.logger.Info("Initializing Azure CNI Telemetry Sidecar") + s.logger.Info("Starting Azure CNI Telemetry Sidecar (replacing azure-vnet-telemetry binary)") - // Load CNS configuration from shared mount - config, err := s.configManager.LoadConfig() + // Load CNS configuration + cnsConfig, err := s.configManager.LoadConfig() if err != nil { return fmt.Errorf("failed to load CNS configuration: %w", err) } - // Determine if telemetry should run based on configuration and environment - if !s.shouldRunTelemetry(config) { + // Check if telemetry should run + if !s.shouldRunTelemetry(cnsConfig) { s.logger.Info("CNI Telemetry disabled, entering sleep mode") return s.sleepUntilShutdown(ctx) } - // Create the telemetry socket that azure-vnet CNI expects - if err := s.createTelemetrySocket(); err != nil { - return fmt.Errorf("failed to create telemetry socket: %w", err) + // Convert CNS config to telemetry config (like telemetrymain.go does) + telemetryConfig := s.convertToTelemetryConfig(cnsConfig) + s.setTelemetryDefaults(&telemetryConfig) + + s.logger.Info("Telemetry configuration", zap.Any("config", telemetryConfig)) + + // Initialize and start telemetry service with both configs (like telemetrymain.go does) + if err := s.startTelemetryService(ctx, telemetryConfig, cnsConfig); err != nil { + return fmt.Errorf("failed to start telemetry service: %w", err) } - defer s.cleanupSocket() - s.logger.Info("Starting Azure CNI Telemetry collection with socket server") - return s.runTelemetryService(ctx) + // Keep running until context is cancelled + <-ctx.Done() + return s.cleanup() +} + +// convertToTelemetryConfig converts CNS config to telemetry config +func (s *TelemetrySidecar) convertToTelemetryConfig(cnsConfig *configuration.CNSConfig) telemetry.TelemetryConfig { + ts := cnsConfig.TelemetrySettings + + return telemetry.TelemetryConfig{ + ReportToHostIntervalInSeconds: time.Duration(defaultReportToHostIntervalInSecs) * time.Second, + DisableAll: ts.DisableAll, + DisableTrace: ts.DisableTrace, + DisableMetric: ts.DisableMetric, + BatchSizeInBytes: ts.TelemetryBatchSizeBytes, + BatchIntervalInSecs: ts.TelemetryBatchIntervalInSecs, + RefreshTimeoutInSecs: ts.RefreshIntervalInSecs, + DisableMetadataThread: ts.DisableMetadataRefreshThread, + DebugMode: ts.DebugMode, + GetEnvRetryCount: defaultGetEnvRetryCount, + GetEnvRetryWaitTimeInSecs: defaultGetEnvRetryWaitTimeInSecs, + } } -// createTelemetrySocket creates the Unix socket that azure-vnet CNI connects to -func (s *TelemetrySidecar) createTelemetrySocket() error { - // Remove any existing socket file - if err := os.RemoveAll(telemetrySocketPath); err != nil { - s.logger.Warn("Failed to remove existing socket file", zap.Error(err)) +// setTelemetryDefaults sets default values (same as telemetrymain.go) +func (s *TelemetrySidecar) setTelemetryDefaults(config *telemetry.TelemetryConfig) { + if config.ReportToHostIntervalInSeconds == 0 { + config.ReportToHostIntervalInSeconds = time.Duration(defaultReportToHostIntervalInSecs) * time.Second } - // Create the directory if it doesn't exist - if err := os.MkdirAll("/var/run", 0755); err != nil { - return fmt.Errorf("failed to create /var/run directory: %w", err) + if config.RefreshTimeoutInSecs == 0 { + config.RefreshTimeoutInSecs = defaultRefreshTimeoutInSecs } - // Create Unix socket listener - listener, err := net.Listen("unix", telemetrySocketPath) - if err != nil { - return fmt.Errorf("failed to create Unix socket at %s: %w", telemetrySocketPath, err) + if config.BatchIntervalInSecs == 0 { + config.BatchIntervalInSecs = defaultBatchIntervalInSecs } - s.socketListener = listener - s.logger.Info("Created telemetry socket", zap.String("path", telemetrySocketPath)) + if config.BatchSizeInBytes == 0 { + config.BatchSizeInBytes = defaultBatchSizeInBytes + } - // Set socket permissions so azure-vnet can access it - if err := os.Chmod(telemetrySocketPath, 0666); err != nil { - s.logger.Warn("Failed to set socket permissions", zap.Error(err)) + if config.GetEnvRetryCount == 0 { + config.GetEnvRetryCount = defaultGetEnvRetryCount } - return nil + if config.GetEnvRetryWaitTimeInSecs == 0 { + config.GetEnvRetryWaitTimeInSecs = defaultGetEnvRetryWaitTimeInSecs + } } -// runTelemetryService runs both the socket server and telemetry collection -func (s *TelemetrySidecar) runTelemetryService(ctx context.Context) error { - // Start socket server in background - go s.handleSocketConnections(ctx) +// getAppInsightsKey gets the AppInsights key with priority: build-time > config > env vars +func (s *TelemetrySidecar) getAppInsightsKey(cnsConfig *configuration.CNSConfig) string { + // Priority 1: Build-time embedded key via telemetry.aiMetadata (like azure-vnet-telemetry) + if buildTimeKey := telemetry.GetAIMetadata(); buildTimeKey != "" { + s.logger.Debug("Using build-time embedded AppInsights key") + return buildTimeKey + } - // Start telemetry collection loop - return s.runTelemetryLoop(ctx) -} + // Priority 2: CNS configuration + if cnsConfig != nil && cnsConfig.TelemetrySettings.AppInsightsInstrumentationKey != "" { + s.logger.Debug("Using AppInsights key from CNS configuration") + return cnsConfig.TelemetrySettings.AppInsightsInstrumentationKey + } -// handleSocketConnections handles incoming connections from azure-vnet CNI -func (s *TelemetrySidecar) handleSocketConnections(ctx context.Context) { - for { - select { - case <-ctx.Done(): - return - default: - // Accept connection with timeout - if conn, err := s.socketListener.Accept(); err == nil { - go s.handleConnection(conn) - } + // Priority 3: Environment variables (fallback) + envKeys := []string{ + "APPINSIGHTS_INSTRUMENTATIONKEY", + "APPLICATIONINSIGHTS_CONNECTION_STRING", + "AI_INSTRUMENTATION_KEY", + } + + for _, envKey := range envKeys { + if envKey := os.Getenv(envKey); envKey != "" { + s.logger.Debug("Using AppInsights key from environment variable", zap.String("envVar", envKey)) + return envKey } } + + // Only log warning if no key found from any source + s.logger.Warn("No AppInsights instrumentation key found from any source (build-time, config, or environment)") + return "" } -// handleConnection handles a single connection from azure-vnet CNI -func (s *TelemetrySidecar) handleConnection(conn net.Conn) { - defer conn.Close() +// startTelemetryService starts the telemetry service (replicates telemetrymain.go logic) +func (s *TelemetrySidecar) startTelemetryService(ctx context.Context, config telemetry.TelemetryConfig, cnsConfig *configuration.CNSConfig) error { + s.logger.Info("Initializing telemetry service") - s.logger.Debug("Azure CNI telemetry connection established") + // Get AppInsights key with priority order (build-time aiMetadata has highest priority) + aiKey := s.getAppInsightsKey(cnsConfig) + + // DEBUG: Only show detailed debug info in debug mode + if s.logger.Level() == zap.DebugLevel { + currentAIMetadata := telemetry.GetAIMetadata() + s.logger.Debug("AI telemetry status", + zap.String("buildTimeAIMetadata", maskAIKey(currentAIMetadata)), + zap.String("resolvedAIKey", maskAIKey(aiKey)), + zap.Bool("aiMetadataSet", currentAIMetadata != "")) + } + + if aiKey != "" { + // Set environment variable for compatibility with other telemetry components + os.Setenv("APPINSIGHTS_INSTRUMENTATIONKEY", aiKey) + + // If aiMetadata wasn't set at build time, set it now for runtime scenarios + if currentAIMetadata := telemetry.GetAIMetadata(); currentAIMetadata == "" { + telemetry.SetAIMetadata(aiKey) + s.logger.Debug("Set aiMetadata at runtime") + } + } - // Read telemetry data from azure-vnet CNI - buffer := make([]byte, 4096) + // Clean up any orphan socket (same as telemetrymain.go) + tbtemp := telemetry.NewTelemetryBuffer(s.logger) + tbtemp.Cleanup(telemetry.FdName) + + // Create telemetry buffer (same as telemetrymain.go) + s.telemetryBuffer = telemetry.NewTelemetryBuffer(s.logger) + + // Start telemetry server (same as telemetrymain.go) for { - n, err := conn.Read(buffer) - if err != nil { - s.logger.Debug("Connection closed", zap.Error(err)) + s.logger.Info("Starting telemetry server") + err := s.telemetryBuffer.StartServer() + if err == nil || s.telemetryBuffer.FdExists { break } - if n > 0 { - // Process telemetry data received from azure-vnet - s.processTelemetryData(buffer[:n]) - } + s.logger.Error("Telemetry service starting failed", zap.Error(err)) + s.telemetryBuffer.Cleanup(telemetry.FdName) + time.Sleep(time.Millisecond * 200) } -} -// processTelemetryData processes telemetry data received from azure-vnet CNI -func (s *TelemetrySidecar) processTelemetryData(data []byte) { - s.logger.Debug("Received CNI telemetry data", - zap.Int("bytes", len(data)), - zap.String("data", string(data))) - - // TODO: Parse and process the actual telemetry data - // This could include: - // - JSON parsing of CNI events - // - Metrics extraction - // - Forwarding to Azure Monitor/Application Insights -} - -// runTelemetryLoop runs the main telemetry collection loop -func (s *TelemetrySidecar) runTelemetryLoop(ctx context.Context) error { - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() + // Only create AI telemetry handle if we have an AI key or aiMetadata is set + finalAIMetadata := telemetry.GetAIMetadata() + if finalAIMetadata != "" { + // Configure AI settings (same as telemetrymain.go) + aiConfig := aitelemetry.AIConfig{ + AppName: pluginName, + AppVersion: cniTelemetryVersion, + BatchSize: config.BatchSizeInBytes, + BatchInterval: config.BatchIntervalInSecs, + RefreshTimeout: config.RefreshTimeoutInSecs, + DisableMetadataRefreshThread: config.DisableMetadataThread, + DebugMode: config.DebugMode, + GetEnvRetryCount: config.GetEnvRetryCount, + GetEnvRetryWaitTimeInSecs: config.GetEnvRetryWaitTimeInSecs, + } - s.logger.Info("CNI Telemetry collection started with socket server") + s.logger.Info("Initializing Azure Application Insights telemetry") - for { - select { - case <-ctx.Done(): - s.logger.Info("Shutting down Azure CNI Telemetry service") - return nil - case <-ticker.C: - s.collectTelemetry() + // Create AI telemetry handle (same as telemetrymain.go) + if err := s.telemetryBuffer.CreateAITelemetryHandle(aiConfig, config.DisableAll, config.DisableTrace, config.DisableMetric); err != nil { + s.logger.Error("Failed to initialize Azure Application Insights", zap.Error(err)) + s.logger.Info("Continuing with local telemetry only") + } else { + s.logger.Info("Azure Application Insights telemetry initialized successfully") } + } else { + s.logger.Info("Running with local telemetry only (no Azure Application Insights key)") } -} -// collectTelemetry performs telemetry collection -func (s *TelemetrySidecar) collectTelemetry() { - s.logger.Debug("Collecting CNI telemetry data") - // TODO: Implement actual telemetry collection logic here - // This could include: - // - Reading CNI metrics - // - Collecting network statistics - // - Gathering Azure CNI specific data + s.logger.Info("Telemetry service started successfully", + zap.Duration("reportInterval", config.ReportToHostIntervalInSeconds), + zap.String("pluginName", pluginName), + zap.String("version", cniTelemetryVersion), + zap.Bool("azureIntegration", finalAIMetadata != "")) + + // Start the data push routine in background (same as telemetrymain.go) + go s.telemetryBuffer.PushData(ctx) + + return nil } -// cleanupSocket removes the telemetry socket file -func (s *TelemetrySidecar) cleanupSocket() { - if s.socketListener != nil { - s.socketListener.Close() - } - if err := os.RemoveAll(telemetrySocketPath); err != nil { - s.logger.Warn("Failed to cleanup socket file", zap.Error(err)) - } else { - s.logger.Info("Telemetry socket cleaned up") +// Helper function to mask AI key for logging +func maskAIKey(aiKey string) string { + if len(aiKey) <= 8 { + return aiKey } + return aiKey[:8] + "..." } // shouldRunTelemetry determines if CNI telemetry should be enabled -func (s *TelemetrySidecar) shouldRunTelemetry(config *configuration.CNSConfig) bool { - // Check global telemetry disable flag in CNS configuration - if config.TelemetrySettings.DisableAll { - s.logger.Info("CNI Telemetry disabled globally in CNS configuration") +func (s *TelemetrySidecar) shouldRunTelemetry(cnsConfig *configuration.CNSConfig) bool { + // Check if telemetry is disabled globally + if cnsConfig.TelemetrySettings.DisableAll { + s.logger.Info("Telemetry disabled via CNS configuration") return false } - // Check CNI telemetry specific enable flag - cniTelemetryEnabled := os.Getenv("CNI_TELEMETRY_ENABLED") - if cniTelemetryEnabled != "true" { - s.logger.Info("CNI Telemetry not enabled via CNI_TELEMETRY_ENABLED environment variable") + // Check if CNI telemetry is specifically enabled + if !cnsConfig.TelemetrySettings.EnableCNITelemetry { + s.logger.Info("CNI Telemetry disabled via CNS configuration") return false } - s.logger.Info("CNI Telemetry enabled - will collect Azure network interface events") + // Check if we have an AI key from any source + aiKey := s.getAppInsightsKey(cnsConfig) + hasAIKey := aiKey != "" + + if hasAIKey { + s.logger.Info("CNI Telemetry enabled with AppInsights integration", + zap.Bool("enableCNITelemetry", cnsConfig.TelemetrySettings.EnableCNITelemetry), + zap.Bool("hasAppInsightsKey", true)) + } else { + s.logger.Info("CNI Telemetry enabled with local-only mode", + zap.Bool("enableCNITelemetry", cnsConfig.TelemetrySettings.EnableCNITelemetry), + zap.Bool("hasAppInsightsKey", false)) + } + return true } +// cleanup handles graceful shutdown (like telemetrymain.go) +func (s *TelemetrySidecar) cleanup() error { + s.logger.Info("Shutting down CNI Telemetry service") + + if s.telemetryBuffer != nil { + // Close AI telemetry handle (same as telemetrymain.go) + telemetry.CloseAITelemetryHandle() + + // Cleanup socket and resources + s.telemetryBuffer.Cleanup(telemetry.FdName) + s.logger.Info("Telemetry service cleaned up successfully") + } + + return nil +} + // sleepUntilShutdown keeps the container running when telemetry is disabled func (s *TelemetrySidecar) sleepUntilShutdown(ctx context.Context) error { s.logger.Info("CNI Telemetry sidecar sleeping until shutdown signal received") diff --git a/cns/configuration/configuration.go b/cns/configuration/configuration.go index 9ec5f8664f..b7deb115d8 100644 --- a/cns/configuration/configuration.go +++ b/cns/configuration/configuration.go @@ -88,6 +88,10 @@ type TelemetrySettings struct { ConfigSnapshotIntervalInMins int // AppInsightsInstrumentationKey allows the user to override the default appinsights ikey AppInsightsInstrumentationKey string + // Flag to enable CNI telemetry collection via sidecar + EnableCNITelemetry bool + // Path to the CNI telemetry socket file that azure-vnet CNI connects to + CNITelemetrySocketPath string } type ManagedSettings struct { diff --git a/telemetry/aiwrapper.go b/telemetry/aiwrapper.go index 95622adb69..bafa15b16b 100644 --- a/telemetry/aiwrapper.go +++ b/telemetry/aiwrapper.go @@ -86,3 +86,13 @@ func CloseAITelemetryHandle() { th.Close(waitTimeInSecs) } } + +// GetAIMetadata returns the current aiMetadata value +func GetAIMetadata() string { + return aiMetadata +} + +// SetAIMetadata sets the aiMetadata value (for runtime configuration) +func SetAIMetadata(metadata string) { + aiMetadata = metadata +} From e9e0c116d5070e570b99ca147ce1d756211b7bb9 Mon Sep 17 00:00:00 2001 From: "Behzad.Mirkhanzadeh" Date: Thu, 14 Aug 2025 12:24:34 -0700 Subject: [PATCH 5/5] Make the telemtry a binary in Azure-cni image --- Makefile | 62 +--------------------------- cni/Dockerfile | 1 + cns/cni-telemetry-sidecar/Dockerfile | 44 -------------------- cns/cni-telemetry-sidecar/main.go | 2 +- cns/cni-telemetry-sidecar/sidecar.go | 6 +-- 5 files changed, 7 insertions(+), 108 deletions(-) delete mode 100644 cns/cni-telemetry-sidecar/Dockerfile diff --git a/Makefile b/Makefile index 683973ced9..0bcf4bde88 100644 --- a/Makefile +++ b/Makefile @@ -88,10 +88,6 @@ ACN_PACKAGE_PATH = github.com/Azure/azure-container-networking CNI_AI_PATH=$(ACN_PACKAGE_PATH)/telemetry.aiMetadata CNS_AI_PATH=$(ACN_PACKAGE_PATH)/cns/logger.aiMetadata NPM_AI_PATH=$(ACN_PACKAGE_PATH)/npm.aiMetadata -CNI_TELEMETRY_SIDECAR_DIR = $(REPO_ROOT)/cns/cni-telemetry-sidecar -CNI_TELEMETRY_SIDECAR_BUILD_DIR = $(BUILD_DIR)/cni-telemetry-sidecar -CNI_TELEMETRY_SIDECAR_AI_ID = $(CNI_AI_ID) # Reuse CNI AI ID -CNI_TELEMETRY_SIDECAR_VERSION = $(CNS_VERSION) # Version follows CNS # Tool paths CONTROLLER_GEN := $(TOOLS_BIN_DIR)/controller-gen @@ -154,7 +150,7 @@ ipv6-hp-bpf: ipv6-hp-bpf-binary ipv6-hp-bpf-archive azure-block-iptables: azure-block-iptables-binary azure-block-iptables-archive azure-ip-masq-merger: azure-ip-masq-merger-binary azure-ip-masq-merger-archive azure-iptables-monitor: azure-iptables-monitor-binary azure-iptables-monitor-archive -cni-telemetry-sidecar: cni-telemetry-sidecar-binary cni-telemetry-sidecar-archive + ##@ Versioning @@ -239,15 +235,6 @@ azure-vnet-ipamv6-binary: azure-vnet-telemetry-binary: cd $(CNI_TELEMETRY_DIR) && CGO_ENABLED=0 go build -v -o $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) -ldflags "-X main.version=$(CNI_VERSION) -X $(CNI_AI_PATH)=$(CNI_AI_ID) $(LD_BUILD_FLAGS)" -gcflags="-dwarflocationlists=true" -# Build the Azure CNI Telemetry Sidecar binary. -cni-telemetry-sidecar-binary: ## build cni-telemetry-sidecar binary. - $(MKDIR) $(CNI_TELEMETRY_SIDECAR_BUILD_DIR) - cd $(CNI_TELEMETRY_SIDECAR_DIR) && CGO_ENABLED=0 go build \ - -v \ - -o $(CNI_TELEMETRY_SIDECAR_BUILD_DIR)/azure-cni-telemetry-sidecar$(EXE_EXT) \ - -ldflags "-X main.version=$(CNI_TELEMETRY_SIDECAR_VERSION) -X $(CNI_AI_PATH)=$(CNI_TELEMETRY_SIDECAR_AI_ID)" \ - -gcflags="-dwarflocationlists=true" \ - . # Build the Azure CLI network binary. acncli-binary: cd $(ACNCLI_DIR) && CGO_ENABLED=0 go build -v -o $(ACNCLI_BUILD_DIR)/acn$(EXE_EXT) -ldflags "-X main.version=$(ACN_VERSION) $(LD_BUILD_FLAGS)" -gcflags="-dwarflocationlists=true" @@ -315,7 +302,6 @@ CNS_IMAGE = azure-cns NPM_IMAGE = azure-npm AZURE_IP_MASQ_MERGER_IMAGE = azure-ip-masq-merger AZURE_IPTABLES_MONITOR_IMAGE = azure-iptables-monitor -CNI_TELEMETRY_SIDECAR_IMAGE = azure-cni-telemetry-sidecar ## Image platform tags. ACNCLI_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(ACN_VERSION) @@ -329,7 +315,7 @@ CNS_WINDOWS_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(CNS_VERSION)-$(OS_SKU_W NPM_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(NPM_VERSION) AZURE_IP_MASQ_MERGER_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(AZURE_IP_MASQ_MERGER_VERSION) AZURE_IPTABLES_MONITOR_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(AZURE_IPTABLES_MONITOR_VERSION) -CNI_TELEMETRY_SIDECAR_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))-$(CNI_TELEMETRY_SIDECAR_VERSION) + qemu-user-static: ## Set up the host to run qemu multiplatform container builds. sudo $(CONTAINER_RUNTIME) run --rm --privileged multiarch/qemu-user-static --reset -p yes @@ -610,35 +596,6 @@ npm-image-pull: ## pull cns container image. IMAGE=$(NPM_IMAGE) \ TAG=$(NPM_PLATFORM_TAG) -# cni-telemetry-sidecar - -cni-telemetry-sidecar-image-name: # util target to print the CNI telemetry sidecar image name - @echo $(CNI_TELEMETRY_SIDECAR_IMAGE) - -cni-telemetry-sidecar-image-name-and-tag: # util target to print the CNI telemetry sidecar image name and tag. - @echo $(IMAGE_REGISTRY)/$(CNI_TELEMETRY_SIDECAR_IMAGE):$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) - -cni-telemetry-sidecar-image: ## build cni-telemetry-sidecar container image. - $(MAKE) container \ - DOCKERFILE=cns/cni-telemetry-sidecar/Dockerfile \ - IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ - EXTRA_BUILD_ARGS='--build-arg CNI_AI_PATH=$(CNI_AI_PATH) --build-arg CNI_AI_ID=$(CNI_TELEMETRY_SIDECAR_AI_ID) --build-arg VERSION=$(CNI_TELEMETRY_SIDECAR_VERSION)' \ - PLATFORM=$(PLATFORM) \ - TAG=$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) \ - TARGET=$(OS) \ - OS=$(OS) \ - ARCH=$(ARCH) - -cni-telemetry-sidecar-image-push: ## push cni-telemetry-sidecar container image. - $(MAKE) container-push \ - IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ - TAG=$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) - -cni-telemetry-sidecar-image-pull: ## pull cni-telemetry-sidecar container image. - $(MAKE) container-pull \ - IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ - TAG=$(CNI_TELEMETRY_SIDECAR_PLATFORM_TAG) - ## Reusable targets for building multiplat container image manifests. IMAGE_ARCHIVE_DIR ?= $(shell pwd) @@ -794,21 +751,6 @@ npm-skopeo-archive: ## export tar archive of multiplat container manifest. IMAGE=$(NPM_IMAGE) \ TAG=$(NPM_VERSION) -cni-telemetry-sidecar-manifest-build: ## build cni-telemetry-sidecar multiplat container manifest. - $(MAKE) manifest-build \ - PLATFORMS="$(PLATFORMS)" \ - IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ - TAG=$(CNI_TELEMETRY_SIDECAR_VERSION) - -cni-telemetry-sidecar-manifest-push: ## push cni-telemetry-sidecar multiplat container manifest - $(MAKE) manifest-push \ - IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ - TAG=$(CNI_TELEMETRY_SIDECAR_VERSION) - -cni-telemetry-sidecar-skopeo-archive: ## export tar archive of cni-telemetry-sidecar multiplat container manifest. - $(MAKE) manifest-skopeo-archive \ - IMAGE=$(CNI_TELEMETRY_SIDECAR_IMAGE) \ - TAG=$(CNI_TELEMETRY_SIDECAR_VERSION) ########################### Archives ################################ diff --git a/cni/Dockerfile b/cni/Dockerfile index e67b453456..471831d678 100644 --- a/cni/Dockerfile +++ b/cni/Dockerfile @@ -22,6 +22,7 @@ RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet -trimpath -ldflags RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet-telemetry -trimpath -ldflags "-s -w -X main.version="$VERSION" -X "$CNI_AI_PATH"="$CNI_AI_ID"" -gcflags="-dwarflocationlists=true" cni/telemetry/service/telemetrymain.go RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet-ipam -trimpath -ldflags "-s -w -X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/ipam/plugin/main.go RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet-stateless -trimpath -ldflags "-s -w -X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/network/stateless/main.go +RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-cni-telemetry-sidecar -trimpath -ldflags "-X main.version=$VERSION" -gcflags="-dwarflocationlists=true" ./cns/cni-telemetry-sidecar FROM mariner-core AS compressor ARG OS diff --git a/cns/cni-telemetry-sidecar/Dockerfile b/cns/cni-telemetry-sidecar/Dockerfile deleted file mode 100644 index c1b3db6188..0000000000 --- a/cns/cni-telemetry-sidecar/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -ARG ARCH -ARG OS_VERSION -ARG OS - -FROM --platform=linux/${ARCH} mcr.microsoft.com/oss/go/microsoft/golang@sha256:8f60e85f4b2f567c888d0b3a4cd12dc74bee534d94c528655546452912d90c74 AS go - -FROM go AS cni-telemetry-sidecar -ARG OS -ARG VERSION -ARG CNI_AI_PATH -ARG CNI_AI_ID - -# Add debug output to verify build args are received -RUN echo "=== Dockerfile Build Debug ===" -RUN echo "VERSION = $VERSION" -RUN echo "CNI_AI_ID = $CNI_AI_ID" -RUN echo "CNI_AI_PATH = $CNI_AI_PATH" - -WORKDIR /azure-container-networking -COPY . . - -# Build the telemetry sidecar with embedded AppInsights key using CNI_AI_PATH -RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-cni-telemetry-sidecar \ - -trimpath \ - -ldflags "-X main.version=$VERSION -X $CNI_AI_PATH=$CNI_AI_ID" \ - -gcflags="-dwarflocationlists=true" \ - ./cns/cni-telemetry-sidecar - -# Verify the key is embedded in the telemetry.aiMetadata variable -RUN echo "=== Binary Debug ===" -RUN strings /go/bin/azure-cni-telemetry-sidecar | grep "5515a1eb" || echo "AI key NOT found in binary!" -RUN ls -la /go/bin/azure-cni-telemetry-sidecar - -FROM scratch AS bins -COPY --from=cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar / - -FROM mcr.microsoft.com/azurelinux/base/core:3.0 AS linux -COPY --from=cni-telemetry-sidecar /go/bin/azure-cni-telemetry-sidecar /usr/local/bin/ - -# Create directories for logs and config -RUN mkdir -p /var/log /etc/azure-cns /var/run - -ENTRYPOINT ["/usr/local/bin/azure-cni-telemetry-sidecar"] -CMD ["--config=/etc/azure-cns/cns_config.json", "--log-level=info"] \ No newline at end of file diff --git a/cns/cni-telemetry-sidecar/main.go b/cns/cni-telemetry-sidecar/main.go index baa84b9ba5..0e96772ee8 100644 --- a/cns/cni-telemetry-sidecar/main.go +++ b/cns/cni-telemetry-sidecar/main.go @@ -36,7 +36,7 @@ func main() { zap.String("configPath", *configPath), zap.String("logLevel", *logLevel), zap.Bool("hasBuiltInAIKey", currentAIMetadata != ""), - zap.String("aiKeyPrefix", maskAIKey(currentAIMetadata))) + zap.String("aiKeyPrefix", MaskAIKey(currentAIMetadata))) // Create and configure telemetry sidecar // Pass the configPath to NewTelemetrySidecar (it expects a string parameter) diff --git a/cns/cni-telemetry-sidecar/sidecar.go b/cns/cni-telemetry-sidecar/sidecar.go index bbad1704be..35e89ff5b2 100644 --- a/cns/cni-telemetry-sidecar/sidecar.go +++ b/cns/cni-telemetry-sidecar/sidecar.go @@ -182,8 +182,8 @@ func (s *TelemetrySidecar) startTelemetryService(ctx context.Context, config tel if s.logger.Level() == zap.DebugLevel { currentAIMetadata := telemetry.GetAIMetadata() s.logger.Debug("AI telemetry status", - zap.String("buildTimeAIMetadata", maskAIKey(currentAIMetadata)), - zap.String("resolvedAIKey", maskAIKey(aiKey)), + zap.String("buildTimeAIMetadata", MaskAIKey(currentAIMetadata)), + zap.String("resolvedAIKey", MaskAIKey(aiKey)), zap.Bool("aiMetadataSet", currentAIMetadata != "")) } @@ -260,7 +260,7 @@ func (s *TelemetrySidecar) startTelemetryService(ctx context.Context, config tel } // Helper function to mask AI key for logging -func maskAIKey(aiKey string) string { +func MaskAIKey(aiKey string) string { if len(aiKey) <= 8 { return aiKey }