Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions charts/openobserve-standalone/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.16.1
version: 0.16.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v0.16.1"
appVersion: "v0.16.2"

dependencies:
- name: minio
Expand Down
2 changes: 1 addition & 1 deletion charts/openobserve-standalone/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ image:
repository: o2cr.ai/openobserve/openobserve
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "v0.16.1"
tag: "v0.16.2"
busybox:
repository: busybox
tag: 1.37.0
Expand Down
4 changes: 2 additions & 2 deletions charts/openobserve/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.16.1
version: 0.16.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v0.16.1"
appVersion: "v0.16.2"

dependencies:
- name: etcd
Expand Down
4 changes: 2 additions & 2 deletions charts/openobserve/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# OpenObserve helm chart
# OpenObserve Helm Chart

## Amazon EKS

Expand All @@ -11,7 +11,7 @@ You must set a minimum of 2 values:
1. IAM role for the serviceAccount to gain AWS IAM credentials to access s3
- serviceAccount.annotations."eks.amazonaws.com/role-arn"

## Install
## Installation

Install the Cloud Native PostgreSQL Operator. This is a prerequisite for openobserve helm chart. This helm chart sets up a postgres database cluster (1 primary + 1 replica) and uses it as metadata store of OpenObserve.
```shell
Expand Down
2 changes: 1 addition & 1 deletion charts/openobserve/templates/compactor-hpa.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if .Values.autoscaling.compactor.enabled }}
{{- if and .Values.autoscaling.compactor.enabled .Values.enterprise.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
Expand Down
2 changes: 1 addition & 1 deletion charts/openobserve/templates/ingester-hpa.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if .Values.autoscaling.ingester.enabled }}
{{- if and .Values.autoscaling.ingester.enabled .Values.enterprise.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
Expand Down
93 changes: 93 additions & 0 deletions charts/openobserve/templates/ingester-statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,99 @@ spec:
successThreshold: {{ .Values.probes.ingester.config.readinessProbe.successThreshold | default 1 }}
failureThreshold: {{ .Values.probes.ingester.config.readinessProbe.failureThreshold | default 3 }}
{{- end }}
{{- if .Values.autoscaling.ingester.enabled }}
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- |
echo "=========================================="
echo "PreStop Hook Started: $(date)"
echo "Pod: $HOSTNAME"
echo "=========================================="

# Get credentials from environment
USER_EMAIL="$ZO_ROOT_USER_EMAIL"
USER_PASSWORD="$ZO_ROOT_USER_PASSWORD"
AUTH_HEADER=$(echo -n "${USER_EMAIL}:${USER_PASSWORD}" | base64)
PORT="${ZO_HTTP_PORT:-5080}"

# Step 1: Disable the node (triggers drain mode)
echo "[$(date)] Step 1: Calling PUT /node/enable?value=false to disable node..."
DISABLE_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" \
-X PUT "http://localhost:${PORT}/node/enable?value=false" \
-H "Authorization: Basic ${AUTH_HEADER}")

HTTP_CODE=$(echo "$DISABLE_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
BODY=$(echo "$DISABLE_RESPONSE" | grep -v "HTTP_CODE:")

echo "[$(date)] Response (HTTP $HTTP_CODE): $BODY"

if [ "$HTTP_CODE" != "200" ]; then
echo "[$(date)] ERROR: Failed to disable node"
exit 1
fi

echo "[$(date)] ✓ Node disabled - drain mode activated"
echo ""

# Step 2: Poll drain status until ready for shutdown
echo "[$(date)] Step 2: Monitoring drain status via GET /node/drain_status..."

START_TIME=$(date +%s)
MAX_WAIT=1000 # ~16 minutes (leave buffer for k8s)
POLL_INTERVAL=5

while true; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))

if [ $ELAPSED -ge $MAX_WAIT ]; then
echo "[$(date)] WARNING: Drain timeout after ${ELAPSED}s"
echo "[$(date)] Exiting to allow Kubernetes to terminate pod"
break
fi

# Call drain_status API
STATUS=$(curl -s "http://localhost:${PORT}/node/drain_status" \
-H "Authorization: Basic ${AUTH_HEADER}")

if [ $? -ne 0 ]; then
echo "[$(date)] ERROR: Failed to get drain status"
sleep $POLL_INTERVAL
continue
fi

# Parse JSON response (without jq dependency)
READY=$(echo "$STATUS" | grep -o '"readyForShutdown":[^,}]*' | cut -d: -f2 | tr -d ' ')
PENDING=$(echo "$STATUS" | grep -o '"pendingParquetFiles":[^,}]*' | cut -d: -f2 | tr -d ' ')
IS_DRAINING=$(echo "$STATUS" | grep -o '"isDraining":[^,}]*' | cut -d: -f2 | tr -d ' ')
MEMORY_FLUSHED=$(echo "$STATUS" | grep -o '"memoryFlushed":[^,}]*' | cut -d: -f2 | tr -d ' ')

echo "[$(date)] [${ELAPSED}s] Status:"
echo " - isDraining: $IS_DRAINING"
echo " - memoryFlushed: $MEMORY_FLUSHED"
echo " - pendingParquetFiles: $PENDING"
echo " - readyForShutdown: $READY"

# Check if ready for shutdown
if [ "$READY" = "true" ]; then
echo ""
echo "=========================================="
echo "[$(date)] ✓ DRAIN COMPLETED in ${ELAPSED}s"
echo "=========================================="
echo "All parquet files uploaded to S3"
echo "Pod is safe to terminate"
break
fi

sleep $POLL_INTERVAL
done

echo "[$(date)] PreStop hook completed. Pod will now terminate."
{{- end }}
resources:
{{- toYaml .Values.resources.ingester | nindent 12 }}
envFrom:
Expand Down
2 changes: 1 addition & 1 deletion charts/openobserve/templates/router-hpa.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if .Values.autoscaling.router.enabled }}
{{- if and .Values.autoscaling.router.enabled .Values.enterprise.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
Expand Down
6 changes: 3 additions & 3 deletions charts/openobserve/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ image:
enterprise:
repository: o2cr.ai/openobserve/openobserve-enterprise
# Overrides the image tag whose default is the chart appVersion.
tag: "v0.16.1"
tag: "v0.16.2"
reportserver:
repository: o2cr.ai/openobserve/report-server
tag: "v0.11.0-70baf7a"
Expand Down Expand Up @@ -1028,14 +1028,14 @@ probes:
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
terminationGracePeriodSeconds: 30
terminationGracePeriodSeconds: 1200 # 20 minutes for now, since we are using pre-stop hook to flush data and it takes up to 10 minutes to flush data to s3
livenessProbe:
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
terminationGracePeriodSeconds: 30
terminationGracePeriodSeconds: 1200 # 20 minutes for now, since we are using pre-stop hook to flush data and it takes up to 10 minutes to flush data to s3
querier:
enabled: false
config:
Expand Down