Skip to content

Commit eaa0a47

Browse files
committed
updated autoscaling for enterprise
1 parent 1c5ce0d commit eaa0a47

File tree

4 files changed

+81
-32
lines changed

4 files changed

+81
-32
lines changed

charts/openobserve/templates/compactor-hpa.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if .Values.autoscaling.compactor.enabled }}
1+
{{- if and .Values.autoscaling.compactor.enabled .Values.enterprise.enabled }}
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:

charts/openobserve/templates/ingester-hpa.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if .Values.autoscaling.ingester.enabled }}
1+
{{- if and .Values.autoscaling.ingester.enabled .Values.enterprise.enabled }}
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:

charts/openobserve/templates/ingester-statefulset.yaml

Lines changed: 78 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -129,41 +129,90 @@ spec:
129129
- /bin/sh
130130
- -c
131131
- |
132-
# Get credentials from environment variables
132+
echo "=========================================="
133+
echo "PreStop Hook Started: $(date)"
134+
echo "Pod: $HOSTNAME"
135+
echo "=========================================="
136+
137+
# Get credentials from environment
133138
USER_EMAIL="$ZO_ROOT_USER_EMAIL"
134139
USER_PASSWORD="$ZO_ROOT_USER_PASSWORD"
135-
136-
# Create base64 encoded credentials for Authorization header
137140
AUTH_HEADER=$(echo -n "${USER_EMAIL}:${USER_PASSWORD}" | base64)
138-
139-
# Disable the node first
140-
echo "Disabling ingester node..."
141-
curl -X PUT "http://localhost:{{ .Values.config.ZO_HTTP_PORT }}/node/enable?value=false" \
142-
-H "Authorization: Basic ${AUTH_HEADER}"
143-
144-
# returns 200 if successful and "true" if the node is disabled
145-
146-
# Flush all data from memory to WAL. This does not flush data from ingester to s3.
147-
echo "Flushing data from ingester..."
148-
RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -X PUT "http://localhost:{{ .Values.config.ZO_HTTP_PORT }}/node/flush" \
141+
PORT="${ZO_HTTP_PORT:-5080}"
142+
143+
# Step 1: Disable the node (triggers drain mode)
144+
echo "[$(date)] Step 1: Calling PUT /node/enable?value=false to disable node..."
145+
DISABLE_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" \
146+
-X PUT "http://localhost:${PORT}/node/enable?value=false" \
149147
-H "Authorization: Basic ${AUTH_HEADER}")
150-
if [ "$RESPONSE" -ne 200 ]; then
151-
echo "Error: Failed to flush data from ingester. HTTP response code: $RESPONSE"
148+
149+
HTTP_CODE=$(echo "$DISABLE_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
150+
BODY=$(echo "$DISABLE_RESPONSE" | grep -v "HTTP_CODE:")
151+
152+
echo "[$(date)] Response (HTTP $HTTP_CODE): $BODY"
153+
154+
if [ "$HTTP_CODE" != "200" ]; then
155+
echo "[$(date)] ERROR: Failed to disable node"
152156
exit 1
153157
fi
154-
155-
# returns 200 if successful and "true" if the node is flushed
156-
157-
# We need another API to check if all the data has been moved to s3 or /flush should become async and move files to s3 as well
158-
# e.g /node/wal_status
159-
# Need to build this API. Until then, we will wait for 900 seconds.
160-
161-
# Wait for 900 seconds after flush to ensure data is moved to s3
162-
# 15 minutes for now, since file movement to s3 may take up to 10 minutes
163-
echo "Waiting 900 seconds to flush data..."
164-
sleep 900
165-
166-
echo "Pre-stop hook completed"
158+
159+
echo "[$(date)] ✓ Node disabled - drain mode activated"
160+
echo ""
161+
162+
# Step 2: Poll drain status until ready for shutdown
163+
echo "[$(date)] Step 2: Monitoring drain status via GET /node/drain_status..."
164+
165+
START_TIME=$(date +%s)
166+
MAX_WAIT=1000 # ~16 minutes (leave buffer for k8s)
167+
POLL_INTERVAL=5
168+
169+
while true; do
170+
CURRENT_TIME=$(date +%s)
171+
ELAPSED=$((CURRENT_TIME - START_TIME))
172+
173+
if [ $ELAPSED -ge $MAX_WAIT ]; then
174+
echo "[$(date)] WARNING: Drain timeout after ${ELAPSED}s"
175+
echo "[$(date)] Exiting to allow Kubernetes to terminate pod"
176+
break
177+
fi
178+
179+
# Call drain_status API
180+
STATUS=$(curl -s "http://localhost:${PORT}/node/drain_status" \
181+
-H "Authorization: Basic ${AUTH_HEADER}")
182+
183+
if [ $? -ne 0 ]; then
184+
echo "[$(date)] ERROR: Failed to get drain status"
185+
sleep $POLL_INTERVAL
186+
continue
187+
fi
188+
189+
# Parse JSON response (without jq dependency)
190+
READY=$(echo "$STATUS" | grep -o '"readyForShutdown":[^,}]*' | cut -d: -f2 | tr -d ' ')
191+
PENDING=$(echo "$STATUS" | grep -o '"pendingParquetFiles":[^,}]*' | cut -d: -f2 | tr -d ' ')
192+
IS_DRAINING=$(echo "$STATUS" | grep -o '"isDraining":[^,}]*' | cut -d: -f2 | tr -d ' ')
193+
MEMORY_FLUSHED=$(echo "$STATUS" | grep -o '"memoryFlushed":[^,}]*' | cut -d: -f2 | tr -d ' ')
194+
195+
echo "[$(date)] [${ELAPSED}s] Status:"
196+
echo " - isDraining: $IS_DRAINING"
197+
echo " - memoryFlushed: $MEMORY_FLUSHED"
198+
echo " - pendingParquetFiles: $PENDING"
199+
echo " - readyForShutdown: $READY"
200+
201+
# Check if ready for shutdown
202+
if [ "$READY" = "true" ]; then
203+
echo ""
204+
echo "=========================================="
205+
echo "[$(date)] ✓ DRAIN COMPLETED in ${ELAPSED}s"
206+
echo "=========================================="
207+
echo "All parquet files uploaded to S3"
208+
echo "Pod is safe to terminate"
209+
break
210+
fi
211+
212+
sleep $POLL_INTERVAL
213+
done
214+
215+
echo "[$(date)] PreStop hook completed. Pod will now terminate."
167216
{{- end }}
168217
resources:
169218
{{- toYaml .Values.resources.ingester | nindent 12 }}

charts/openobserve/templates/router-hpa.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if .Values.autoscaling.router.enabled }}
1+
{{- if and .Values.autoscaling.router.enabled .Values.enterprise.enabled }}
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:

0 commit comments

Comments
 (0)