Skip to content

Commit d62d3dc

Browse files
prabhatsharmaCopilotmmosarafO2
authored
add preStop hook for autoscaling down (#142)
* add preStop hook for autoscaling down * Update charts/openobserve/templates/ingester-statefulset.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update charts/openobserve/values.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * typo fix * updated autoscaling for enterprise * Updated release version from v0.16.1 to v0.16.2 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: mmosarafO2 <mosraf@openobserve.ai>
1 parent bd7e550 commit d62d3dc

File tree

9 files changed

+106
-13
lines changed

9 files changed

+106
-13
lines changed

charts/openobserve-standalone/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.16.1
18+
version: 0.16.2
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to
2222
# follow Semantic Versioning. They should reflect the version the application is using.
2323
# It is recommended to use it with quotes.
24-
appVersion: "v0.16.1"
24+
appVersion: "v0.16.2"
2525

2626
dependencies:
2727
- name: minio

charts/openobserve-standalone/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ image:
66
repository: o2cr.ai/openobserve/openobserve
77
pullPolicy: IfNotPresent
88
# Overrides the image tag whose default is the chart appVersion.
9-
tag: "v0.16.1"
9+
tag: "v0.16.2"
1010
busybox:
1111
repository: busybox
1212
tag: 1.37.0

charts/openobserve/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.16.1
18+
version: 0.16.2
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to
2222
# follow Semantic Versioning. They should reflect the version the application is using.
2323
# It is recommended to use it with quotes.
24-
appVersion: "v0.16.1"
24+
appVersion: "v0.16.2"
2525

2626
dependencies:
2727
- name: etcd

charts/openobserve/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# OpenObserve helm chart
1+
# OpenObserve Helm Chart
22

33
## Amazon EKS
44

@@ -11,7 +11,7 @@ You must set a minimum of 2 values:
1111
1. IAM role for the serviceAccount to gain AWS IAM credentials to access s3
1212
- serviceAccount.annotations."eks.amazonaws.com/role-arn"
1313

14-
## Install
14+
## Installation
1515

1616
Install the Cloud Native PostgreSQL Operator. This is a prerequisite for openobserve helm chart. This helm chart sets up a postgres database cluster (1 primary + 1 replica) and uses it as metadata store of OpenObserve.
1717
```shell

charts/openobserve/templates/compactor-hpa.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if .Values.autoscaling.compactor.enabled }}
1+
{{- if and .Values.autoscaling.compactor.enabled .Values.enterprise.enabled }}
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:

charts/openobserve/templates/ingester-hpa.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if .Values.autoscaling.ingester.enabled }}
1+
{{- if and .Values.autoscaling.ingester.enabled .Values.enterprise.enabled }}
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:

charts/openobserve/templates/ingester-statefulset.yaml

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,99 @@ spec:
121121
successThreshold: {{ .Values.probes.ingester.config.readinessProbe.successThreshold | default 1 }}
122122
failureThreshold: {{ .Values.probes.ingester.config.readinessProbe.failureThreshold | default 3 }}
123123
{{- end }}
124+
{{- if .Values.autoscaling.ingester.enabled }}
125+
lifecycle:
126+
preStop:
127+
exec:
128+
command:
129+
- /bin/sh
130+
- -c
131+
- |
132+
echo "=========================================="
133+
echo "PreStop Hook Started: $(date)"
134+
echo "Pod: $HOSTNAME"
135+
echo "=========================================="
136+
137+
# Get credentials from environment
138+
USER_EMAIL="$ZO_ROOT_USER_EMAIL"
139+
USER_PASSWORD="$ZO_ROOT_USER_PASSWORD"
140+
AUTH_HEADER=$(echo -n "${USER_EMAIL}:${USER_PASSWORD}" | base64)
141+
PORT="${ZO_HTTP_PORT:-5080}"
142+
143+
# Step 1: Disable the node (triggers drain mode)
144+
echo "[$(date)] Step 1: Calling PUT /node/enable?value=false to disable node..."
145+
DISABLE_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" \
146+
-X PUT "http://localhost:${PORT}/node/enable?value=false" \
147+
-H "Authorization: Basic ${AUTH_HEADER}")
148+
149+
HTTP_CODE=$(echo "$DISABLE_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
150+
BODY=$(echo "$DISABLE_RESPONSE" | grep -v "HTTP_CODE:")
151+
152+
echo "[$(date)] Response (HTTP $HTTP_CODE): $BODY"
153+
154+
if [ "$HTTP_CODE" != "200" ]; then
155+
echo "[$(date)] ERROR: Failed to disable node"
156+
exit 1
157+
fi
158+
159+
echo "[$(date)] ✓ Node disabled - drain mode activated"
160+
echo ""
161+
162+
# Step 2: Poll drain status until ready for shutdown
163+
echo "[$(date)] Step 2: Monitoring drain status via GET /node/drain_status..."
164+
165+
START_TIME=$(date +%s)
166+
MAX_WAIT=1000 # ~16 minutes (leave buffer for k8s)
167+
POLL_INTERVAL=5
168+
169+
while true; do
170+
CURRENT_TIME=$(date +%s)
171+
ELAPSED=$((CURRENT_TIME - START_TIME))
172+
173+
if [ $ELAPSED -ge $MAX_WAIT ]; then
174+
echo "[$(date)] WARNING: Drain timeout after ${ELAPSED}s"
175+
echo "[$(date)] Exiting to allow Kubernetes to terminate pod"
176+
break
177+
fi
178+
179+
# Call drain_status API
180+
STATUS=$(curl -s "http://localhost:${PORT}/node/drain_status" \
181+
-H "Authorization: Basic ${AUTH_HEADER}")
182+
183+
if [ $? -ne 0 ]; then
184+
echo "[$(date)] ERROR: Failed to get drain status"
185+
sleep $POLL_INTERVAL
186+
continue
187+
fi
188+
189+
# Parse JSON response (without jq dependency)
190+
READY=$(echo "$STATUS" | grep -o '"readyForShutdown":[^,}]*' | cut -d: -f2 | tr -d ' ')
191+
PENDING=$(echo "$STATUS" | grep -o '"pendingParquetFiles":[^,}]*' | cut -d: -f2 | tr -d ' ')
192+
IS_DRAINING=$(echo "$STATUS" | grep -o '"isDraining":[^,}]*' | cut -d: -f2 | tr -d ' ')
193+
MEMORY_FLUSHED=$(echo "$STATUS" | grep -o '"memoryFlushed":[^,}]*' | cut -d: -f2 | tr -d ' ')
194+
195+
echo "[$(date)] [${ELAPSED}s] Status:"
196+
echo " - isDraining: $IS_DRAINING"
197+
echo " - memoryFlushed: $MEMORY_FLUSHED"
198+
echo " - pendingParquetFiles: $PENDING"
199+
echo " - readyForShutdown: $READY"
200+
201+
# Check if ready for shutdown
202+
if [ "$READY" = "true" ]; then
203+
echo ""
204+
echo "=========================================="
205+
echo "[$(date)] ✓ DRAIN COMPLETED in ${ELAPSED}s"
206+
echo "=========================================="
207+
echo "All parquet files uploaded to S3"
208+
echo "Pod is safe to terminate"
209+
break
210+
fi
211+
212+
sleep $POLL_INTERVAL
213+
done
214+
215+
echo "[$(date)] PreStop hook completed. Pod will now terminate."
216+
{{- end }}
124217
resources:
125218
{{- toYaml .Values.resources.ingester | nindent 12 }}
126219
envFrom:

charts/openobserve/templates/router-hpa.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if .Values.autoscaling.router.enabled }}
1+
{{- if and .Values.autoscaling.router.enabled .Values.enterprise.enabled }}
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:

charts/openobserve/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ image:
1010
enterprise:
1111
repository: o2cr.ai/openobserve/openobserve-enterprise
1212
# Overrides the image tag whose default is the chart appVersion.
13-
tag: "v0.16.1"
13+
tag: "v0.16.2"
1414
reportserver:
1515
repository: o2cr.ai/openobserve/report-server
1616
tag: "v0.11.0-70baf7a"
@@ -1028,14 +1028,14 @@ probes:
10281028
timeoutSeconds: 5
10291029
successThreshold: 1
10301030
failureThreshold: 3
1031-
terminationGracePeriodSeconds: 30
1031+
terminationGracePeriodSeconds: 1200 # 20 minutes for now, since we are using pre-stop hook to flush data and it takes up to 10 minutes to flush data to s3
10321032
livenessProbe:
10331033
initialDelaySeconds: 10
10341034
periodSeconds: 10
10351035
timeoutSeconds: 5
10361036
successThreshold: 1
10371037
failureThreshold: 3
1038-
terminationGracePeriodSeconds: 30
1038+
terminationGracePeriodSeconds: 1200 # 20 minutes for now, since we are using pre-stop hook to flush data and it takes up to 10 minutes to flush data to s3
10391039
querier:
10401040
enabled: false
10411041
config:

0 commit comments

Comments
 (0)