diff --git a/charts/openobserve-standalone/Chart.yaml b/charts/openobserve-standalone/Chart.yaml index ab36518..c102581 100644 --- a/charts/openobserve-standalone/Chart.yaml +++ b/charts/openobserve-standalone/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.16.1 +version: 0.16.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "v0.16.1" +appVersion: "v0.16.2" dependencies: - name: minio diff --git a/charts/openobserve-standalone/values.yaml b/charts/openobserve-standalone/values.yaml index 27c6115..7210a2c 100644 --- a/charts/openobserve-standalone/values.yaml +++ b/charts/openobserve-standalone/values.yaml @@ -6,7 +6,7 @@ image: repository: o2cr.ai/openobserve/openobserve pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "v0.16.1" + tag: "v0.16.2" busybox: repository: busybox tag: 1.37.0 diff --git a/charts/openobserve/Chart.yaml b/charts/openobserve/Chart.yaml index 56bc4c2..177659f 100644 --- a/charts/openobserve/Chart.yaml +++ b/charts/openobserve/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.16.1 +version: 0.16.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "v0.16.1" +appVersion: "v0.16.2" dependencies: - name: etcd diff --git a/charts/openobserve/README.md b/charts/openobserve/README.md index 5ec97a4..61677be 100644 --- a/charts/openobserve/README.md +++ b/charts/openobserve/README.md @@ -1,4 +1,4 @@ -# OpenObserve helm chart +# OpenObserve Helm Chart ## Amazon EKS @@ -11,7 +11,7 @@ You must set a minimum of 2 values: 1. IAM role for the serviceAccount to gain AWS IAM credentials to access s3 - serviceAccount.annotations."eks.amazonaws.com/role-arn" -## Install +## Installation Install the Cloud Native PostgreSQL Operator. This is a prerequisite for openobserve helm chart. This helm chart sets up a postgres database cluster (1 primary + 1 replica) and uses it as metadata store of OpenObserve. ```shell diff --git a/charts/openobserve/templates/compactor-hpa.yaml b/charts/openobserve/templates/compactor-hpa.yaml index f9412fe..25ed6b2 100644 --- a/charts/openobserve/templates/compactor-hpa.yaml +++ b/charts/openobserve/templates/compactor-hpa.yaml @@ -1,4 +1,4 @@ -{{- if .Values.autoscaling.compactor.enabled }} +{{- if and .Values.autoscaling.compactor.enabled .Values.enterprise.enabled }} apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: diff --git a/charts/openobserve/templates/ingester-hpa.yaml b/charts/openobserve/templates/ingester-hpa.yaml index 182839d..de10275 100644 --- a/charts/openobserve/templates/ingester-hpa.yaml +++ b/charts/openobserve/templates/ingester-hpa.yaml @@ -1,4 +1,4 @@ -{{- if .Values.autoscaling.ingester.enabled }} +{{- if and .Values.autoscaling.ingester.enabled .Values.enterprise.enabled }} apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: diff --git a/charts/openobserve/templates/ingester-statefulset.yaml b/charts/openobserve/templates/ingester-statefulset.yaml index 25e6d06..8ec4594 100644 --- a/charts/openobserve/templates/ingester-statefulset.yaml +++ b/charts/openobserve/templates/ingester-statefulset.yaml @@ -121,6 +121,99 @@ spec: successThreshold: {{ .Values.probes.ingester.config.readinessProbe.successThreshold | default 1 }} failureThreshold: {{ .Values.probes.ingester.config.readinessProbe.failureThreshold | default 3 }} {{- end }} + {{- if .Values.autoscaling.ingester.enabled }} + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - | + echo "==========================================" + echo "PreStop Hook Started: $(date)" + echo "Pod: $HOSTNAME" + echo "==========================================" + + # Get credentials from environment + USER_EMAIL="$ZO_ROOT_USER_EMAIL" + USER_PASSWORD="$ZO_ROOT_USER_PASSWORD" + AUTH_HEADER=$(echo -n "${USER_EMAIL}:${USER_PASSWORD}" | base64) + PORT="${ZO_HTTP_PORT:-5080}" + + # Step 1: Disable the node (triggers drain mode) + echo "[$(date)] Step 1: Calling PUT /node/enable?value=false to disable node..." + DISABLE_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ + -X PUT "http://localhost:${PORT}/node/enable?value=false" \ + -H "Authorization: Basic ${AUTH_HEADER}") + + HTTP_CODE=$(echo "$DISABLE_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2) + BODY=$(echo "$DISABLE_RESPONSE" | grep -v "HTTP_CODE:") + + echo "[$(date)] Response (HTTP $HTTP_CODE): $BODY" + + if [ "$HTTP_CODE" != "200" ]; then + echo "[$(date)] ERROR: Failed to disable node" + exit 1 + fi + + echo "[$(date)] ✓ Node disabled - drain mode activated" + echo "" + + # Step 2: Poll drain status until ready for shutdown + echo "[$(date)] Step 2: Monitoring drain status via GET /node/drain_status..." + + START_TIME=$(date +%s) + MAX_WAIT=1000 # ~16 minutes (leave buffer for k8s) + POLL_INTERVAL=5 + + while true; do + CURRENT_TIME=$(date +%s) + ELAPSED=$((CURRENT_TIME - START_TIME)) + + if [ $ELAPSED -ge $MAX_WAIT ]; then + echo "[$(date)] WARNING: Drain timeout after ${ELAPSED}s" + echo "[$(date)] Exiting to allow Kubernetes to terminate pod" + break + fi + + # Call drain_status API + STATUS=$(curl -s "http://localhost:${PORT}/node/drain_status" \ + -H "Authorization: Basic ${AUTH_HEADER}") + + if [ $? -ne 0 ]; then + echo "[$(date)] ERROR: Failed to get drain status" + sleep $POLL_INTERVAL + continue + fi + + # Parse JSON response (without jq dependency) + READY=$(echo "$STATUS" | grep -o '"readyForShutdown":[^,}]*' | cut -d: -f2 | tr -d ' ') + PENDING=$(echo "$STATUS" | grep -o '"pendingParquetFiles":[^,}]*' | cut -d: -f2 | tr -d ' ') + IS_DRAINING=$(echo "$STATUS" | grep -o '"isDraining":[^,}]*' | cut -d: -f2 | tr -d ' ') + MEMORY_FLUSHED=$(echo "$STATUS" | grep -o '"memoryFlushed":[^,}]*' | cut -d: -f2 | tr -d ' ') + + echo "[$(date)] [${ELAPSED}s] Status:" + echo " - isDraining: $IS_DRAINING" + echo " - memoryFlushed: $MEMORY_FLUSHED" + echo " - pendingParquetFiles: $PENDING" + echo " - readyForShutdown: $READY" + + # Check if ready for shutdown + if [ "$READY" = "true" ]; then + echo "" + echo "==========================================" + echo "[$(date)] ✓ DRAIN COMPLETED in ${ELAPSED}s" + echo "==========================================" + echo "All parquet files uploaded to S3" + echo "Pod is safe to terminate" + break + fi + + sleep $POLL_INTERVAL + done + + echo "[$(date)] PreStop hook completed. Pod will now terminate." + {{- end }} resources: {{- toYaml .Values.resources.ingester | nindent 12 }} envFrom: diff --git a/charts/openobserve/templates/router-hpa.yaml b/charts/openobserve/templates/router-hpa.yaml index 15faf5c..22676ad 100644 --- a/charts/openobserve/templates/router-hpa.yaml +++ b/charts/openobserve/templates/router-hpa.yaml @@ -1,4 +1,4 @@ -{{- if .Values.autoscaling.router.enabled }} +{{- if and .Values.autoscaling.router.enabled .Values.enterprise.enabled }} apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: diff --git a/charts/openobserve/values.yaml b/charts/openobserve/values.yaml index b845ac3..6097f95 100644 --- a/charts/openobserve/values.yaml +++ b/charts/openobserve/values.yaml @@ -10,7 +10,7 @@ image: enterprise: repository: o2cr.ai/openobserve/openobserve-enterprise # Overrides the image tag whose default is the chart appVersion. - tag: "v0.16.1" + tag: "v0.16.2" reportserver: repository: o2cr.ai/openobserve/report-server tag: "v0.11.0-70baf7a" @@ -1028,14 +1028,14 @@ probes: timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 - terminationGracePeriodSeconds: 30 + terminationGracePeriodSeconds: 1200 # 20 minutes for now, since we are using pre-stop hook to flush data and it takes up to 10 minutes to flush data to s3 livenessProbe: initialDelaySeconds: 10 periodSeconds: 10 timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 - terminationGracePeriodSeconds: 30 + terminationGracePeriodSeconds: 1200 # 20 minutes for now, since we are using pre-stop hook to flush data and it takes up to 10 minutes to flush data to s3 querier: enabled: false config: