Skip to content

Commit 183e8f4

Browse files
yuluo-yxrootfs
andauthored
feat: add helm support deploy support (#532)
* feat: add helm support deploy support Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix: fix ns error Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix: fix helm Signed-off-by: jishiwen.jsw <jishiwen.jsw@digital-engine.com> * feat: add ci Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix: delete ci kind config Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * feat: update init container download model time Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix ci Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * add more log Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix ci Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix ci Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix: adjust timeout Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix: adjust the readiness and health probe Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> * fix Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> --------- Signed-off-by: yuluo-yx <yuluo08290126@gmail.com> Signed-off-by: jishiwen.jsw <jishiwen.jsw@digital-engine.com> Signed-off-by: shown <yuluo08290126@gmail.com> Co-authored-by: Huamin Chen <rootfs@users.noreply.github.com>
1 parent 5b3c65d commit 183e8f4

24 files changed

+3179
-1
lines changed

.github/workflows/helm-ci.yml

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
name: Helm Chart CI
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
paths:
8+
- 'deploy/helm/**'
9+
- '.github/workflows/helm-ci.yml'
10+
pull_request:
11+
branches:
12+
- main
13+
paths:
14+
- 'deploy/helm/**'
15+
- '.github/workflows/helm-ci.yml'
16+
workflow_dispatch:
17+
18+
env:
19+
HELM_VERSION: v3.14.0
20+
KIND_VERSION: v0.22.0
21+
KUBECTL_VERSION: v1.29.0
22+
CHART_PATH: deploy/helm/semantic-router
23+
24+
jobs:
25+
26+
# Lint and validate Helm chart
27+
lint-chart:
28+
name: Lint Helm Chart
29+
runs-on: ubuntu-latest
30+
steps:
31+
- name: Checkout code
32+
uses: actions/checkout@v4
33+
34+
- name: Set up Helm
35+
uses: azure/setup-helm@v4
36+
with:
37+
version: ${{ env.HELM_VERSION }}
38+
39+
- name: Run Helm lint
40+
run: |
41+
echo "::group::Helm Lint"
42+
helm lint ${{ env.CHART_PATH }}
43+
echo "::endgroup::"
44+
45+
- name: Run Helm lint with dev values
46+
run: |
47+
echo "::group::Helm Lint (Dev Values)"
48+
helm lint ${{ env.CHART_PATH }} -f ${{ env.CHART_PATH }}/values-dev.yaml
49+
echo "::endgroup::"
50+
51+
- name: Run Helm lint with prod values
52+
run: |
53+
echo "::group::Helm Lint (Prod Values)"
54+
helm lint ${{ env.CHART_PATH }} -f ${{ env.CHART_PATH }}/values-prod.yaml
55+
echo "::endgroup::"
56+
57+
# Template validation
58+
template-chart:
59+
name: Validate Helm Templates
60+
runs-on: ubuntu-latest
61+
steps:
62+
- name: Checkout code
63+
uses: actions/checkout@v4
64+
65+
- name: Set up Helm
66+
uses: azure/setup-helm@v4
67+
with:
68+
version: ${{ env.HELM_VERSION }}
69+
70+
- name: Template with default values
71+
run: |
72+
echo "::group::Template with Default Values"
73+
helm template test-release ${{ env.CHART_PATH }} \
74+
--namespace test-namespace > /tmp/default-template.yaml
75+
echo "Templates generated successfully"
76+
echo "::endgroup::"
77+
78+
- name: Template with dev values
79+
run: |
80+
echo "::group::Template with Dev Values"
81+
helm template test-release ${{ env.CHART_PATH }} \
82+
-f ${{ env.CHART_PATH }}/values-dev.yaml \
83+
--namespace test-namespace > /tmp/dev-template.yaml
84+
echo "Dev templates generated successfully"
85+
echo "::endgroup::"
86+
87+
- name: Template with prod values
88+
run: |
89+
echo "::group::Template with Prod Values"
90+
helm template test-release ${{ env.CHART_PATH }} \
91+
-f ${{ env.CHART_PATH }}/values-prod.yaml \
92+
--namespace test-namespace > /tmp/prod-template.yaml
93+
echo "Prod templates generated successfully"
94+
echo "::endgroup::"
95+
96+
- name: Validate generated YAML
97+
run: |
98+
echo "::group::Validate YAML Syntax"
99+
# Check if yamllint is available, install if needed
100+
if ! command -v yamllint &> /dev/null; then
101+
echo "Installing yamllint..."
102+
pip install yamllint
103+
fi
104+
105+
# Validate generated templates (ignore some Helm template warnings)
106+
yamllint -d "{extends: default, rules: {line-length: {max: 120}, indentation: {spaces: 2}}}" \
107+
/tmp/default-template.yaml || echo "Some yamllint warnings are expected for Helm templates"
108+
echo "::endgroup::"
109+
110+
- name: Verify required resources
111+
run: |
112+
echo "::group::Verify Required Resources"
113+
required_resources=(
114+
"Namespace"
115+
"ServiceAccount"
116+
"PersistentVolumeClaim"
117+
"ConfigMap"
118+
"Deployment"
119+
"Service"
120+
)
121+
122+
for resource in "${required_resources[@]}"; do
123+
if grep -q "kind: $resource" /tmp/default-template.yaml; then
124+
echo "✓ Found resource: $resource"
125+
else
126+
echo "✗ Missing resource: $resource"
127+
exit 1
128+
fi
129+
done
130+
echo "All required resources found"
131+
echo "::endgroup::"
132+
133+
- name: Upload templates as artifacts
134+
uses: actions/upload-artifact@v4
135+
with:
136+
name: helm-templates
137+
path: /tmp/*-template.yaml
138+
retention-days: 7
139+
140+
# CI test: Install chart in Kind cluster
141+
install-chart:
142+
name: Install Chart in Kind
143+
runs-on: ubuntu-latest
144+
needs: [lint-chart, template-chart]
145+
strategy:
146+
matrix:
147+
k8s-version:
148+
- v1.27.11
149+
- v1.28.7
150+
- v1.29.2
151+
steps:
152+
- name: Checkout code
153+
uses: actions/checkout@v4
154+
155+
- name: Set up Helm
156+
uses: azure/setup-helm@v4
157+
with:
158+
version: ${{ env.HELM_VERSION }}
159+
160+
- name: Set up kubectl
161+
uses: azure/setup-kubectl@v4
162+
with:
163+
version: ${{ env.KUBECTL_VERSION }}
164+
165+
- name: Create Kind cluster
166+
uses: helm/kind-action@v1.10.0
167+
with:
168+
version: ${{ env.KIND_VERSION }}
169+
node_image: kindest/node:${{ matrix.k8s-version }}
170+
cluster_name: helm-test-cluster
171+
wait: 120s
172+
173+
- name: Verify Kind cluster
174+
run: |
175+
echo "::group::Cluster Info"
176+
kubectl cluster-info
177+
kubectl get nodes
178+
kubectl version
179+
echo "::endgroup::"
180+
181+
- name: Create namespace
182+
run: |
183+
echo "::group::Create Namespace"
184+
kubectl create namespace vllm-semantic-router-system || echo "Namespace already exists"
185+
kubectl get namespace vllm-semantic-router-system
186+
echo "::endgroup::"
187+
188+
- name: Install Helm chart with dev values (CI minimal config)
189+
run: |
190+
echo "::group::Install Chart"
191+
# CI environment: Download only essential model to avoid OOM
192+
# Only download all-MiniLM-L12-v2 (smallest model ~120MB)
193+
helm install semantic-router ${{ env.CHART_PATH }} \
194+
-f ${{ env.CHART_PATH }}/values-dev.yaml \
195+
--set initContainer.resources.limits.memory=2Gi \
196+
--set initContainer.resources.requests.memory=1Gi \
197+
--namespace vllm-semantic-router-system \
198+
--wait \
199+
--timeout 10m \
200+
--debug
201+
echo "::endgroup::"
202+
203+
# For ci debug, check init container logs
204+
- name: Check init container logs
205+
if: always()
206+
run: |
207+
echo "::group::Init Container Logs"
208+
# Wait a bit for init container to start
209+
sleep 5
210+
# Get pod name
211+
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app.kubernetes.io/name=semantic-router -o jsonpath='{.items[0].metadata.name}')
212+
if [ -n "$POD_NAME" ]; then
213+
echo "Checking init container logs for pod: $POD_NAME"
214+
kubectl logs -n vllm-semantic-router-system $POD_NAME -c model-downloader --tail=100 || echo "Init container may have already completed or not started yet"
215+
else
216+
echo "No pod found yet"
217+
fi
218+
echo "::endgroup::"
219+
220+
- name: Verify installation
221+
run: |
222+
echo "::group::Helm Status"
223+
helm status semantic-router -n vllm-semantic-router-system
224+
echo "::endgroup::"
225+
226+
echo "::group::Check Resources"
227+
kubectl get all -n vllm-semantic-router-system
228+
echo "::endgroup::"
229+
230+
echo "::group::Check PVC"
231+
kubectl get pvc -n vllm-semantic-router-system
232+
echo "::endgroup::"
233+
234+
echo "::group::Check ConfigMap"
235+
kubectl get configmap -n vllm-semantic-router-system
236+
echo "::endgroup::"
237+
238+
- name: Wait for deployment to be ready
239+
run: |
240+
echo "::group::Wait for Deployment"
241+
kubectl wait --for=condition=Available deployment/semantic-router \
242+
-n vllm-semantic-router-system \
243+
--timeout=600s || {
244+
echo "Deployment failed to become ready"
245+
echo "::group::Pod Status"
246+
kubectl get pods -n vllm-semantic-router-system
247+
echo "::endgroup::"
248+
echo "::group::Pod Describe"
249+
kubectl describe pods -n vllm-semantic-router-system
250+
echo "::endgroup::"
251+
echo "::group::Pod Logs"
252+
kubectl logs -n vllm-semantic-router-system -l app.kubernetes.io/name=semantic-router --all-containers=true --tail=100
253+
echo "::endgroup::"
254+
exit 1
255+
}
256+
echo "::endgroup::"
257+
258+
- name: Check pod status
259+
run: |
260+
echo "::group::Pod Details"
261+
kubectl get pods -n vllm-semantic-router-system -o wide
262+
echo "::endgroup::"
263+
264+
echo "::group::Pod Events"
265+
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp'
266+
echo "::endgroup::"
267+
268+
- name: Test service endpoints
269+
run: |
270+
echo "::group::Service Endpoints"
271+
kubectl get svc -n vllm-semantic-router-system
272+
kubectl get endpoints -n vllm-semantic-router-system
273+
echo "::endgroup::"
274+
275+
# for ci debug.
276+
- name: Collect logs on failure
277+
if: failure()
278+
run: |
279+
echo "::group::Helm Release Info"
280+
helm list -n vllm-semantic-router-system
281+
helm get values semantic-router -n vllm-semantic-router-system --all
282+
echo "::endgroup::"
283+
284+
echo "::group::All Resources"
285+
kubectl get all -n vllm-semantic-router-system -o wide
286+
echo "::endgroup::"
287+
288+
echo "::group::Pod Logs"
289+
for pod in $(kubectl get pods -n vllm-semantic-router-system -o name); do
290+
echo "Logs for $pod:"
291+
kubectl logs -n vllm-semantic-router-system $pod --all-containers=true --tail=200 || true
292+
echo "---"
293+
done
294+
echo "::endgroup::"
295+
296+
echo "::group::Events"
297+
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp'
298+
echo "::endgroup::"
299+
300+
- name: Test upgrade
301+
run: |
302+
echo "::group::Upgrade Chart"
303+
# Use same minimal config for upgrade test
304+
helm upgrade semantic-router ${{ env.CHART_PATH }} \
305+
-f ${{ env.CHART_PATH }}/values-dev.yaml \
306+
--set initContainer.resources.limits.memory=2Gi \
307+
--set initContainer.resources.requests.memory=1Gi \
308+
--set-json 'initContainer.models=[{"name":"all-MiniLM-L12-v2","repo":"sentence-transformers/all-MiniLM-L12-v2"}]' \
309+
--namespace vllm-semantic-router-system \
310+
--wait \
311+
--timeout 10m
312+
echo "::endgroup::"
313+
314+
echo "::group::Verify Upgrade"
315+
helm status semantic-router -n vllm-semantic-router-system
316+
kubectl get pods -n vllm-semantic-router-system
317+
echo "::endgroup::"
318+
319+
- name: Test rollback
320+
run: |
321+
echo "::group::Rollback Chart"
322+
helm rollback semantic-router -n vllm-semantic-router-system --wait
323+
echo "::endgroup::"
324+
325+
echo "::group::Verify Rollback"
326+
helm history semantic-router -n vllm-semantic-router-system
327+
echo "::endgroup::"
328+
329+
- name: Uninstall chart
330+
if: always()
331+
run: |
332+
echo "::group::Uninstall Chart"
333+
helm uninstall semantic-router -n vllm-semantic-router-system || true
334+
kubectl delete namespace vllm-semantic-router-system --timeout=60s || true
335+
echo "::endgroup::"
336+
337+
# Job 4: Validation script test
338+
validation-script:
339+
name: Run Validation Script
340+
runs-on: ubuntu-latest
341+
steps:
342+
- name: Checkout code
343+
uses: actions/checkout@v4
344+
345+
- name: Set up Helm
346+
uses: azure/setup-helm@v4
347+
with:
348+
version: ${{ env.HELM_VERSION }}
349+
350+
- name: Install yamllint
351+
run: pip install yamllint
352+
353+
- name: Run validation script
354+
run: |
355+
chmod +x deploy/helm/validate-chart.sh
356+
./deploy/helm/validate-chart.sh
357+
358+
# all GHA Job success, print it.
359+
ci-success:
360+
name: CI Success
361+
runs-on: ubuntu-latest
362+
needs: [lint-chart, template-chart, install-chart, validation-script]
363+
if: success()
364+
steps:
365+
- name: Success summary
366+
run: |
367+
echo "✓ Lint checks passed"
368+
echo "✓ Template validation passed"
369+
echo "✓ Chart installation tests passed"
370+
echo "✓ Validation script passed"

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ _run:
1616
-f tools/make/pre-commit.mk \
1717
-f tools/make/docker.mk \
1818
-f tools/make/kube.mk \
19+
-f tools/make/helm.mk \
1920
-f tools/make/observability.mk \
2021
-f tools/make/openshift.mk \
2122
$(MAKECMDGOALS)

0 commit comments

Comments
 (0)