Skip to content

Commit 4e77e8c

Browse files
committed
add serviceMonitor resource for helm
Signed-off-by: calvin chen <wen.chen@dynamia.ai>
1 parent 2ccf412 commit 4e77e8c

File tree

4 files changed

+74
-1
lines changed

4 files changed

+74
-1
lines changed

charts/hami/templates/_commons.tpl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,17 @@ imagePullSecrets:
4646
- name: {{ . }}
4747
{{- end }}
4848
{{- end }}
49-
{{- end -}}
49+
{{- end -}}
50+
51+
{{/*
52+
Renders a value that contains template.
53+
Usage:
54+
{{ include "common.tplvalues.render" ( dict "value" .Values.path.to.the.Value "context" $) }}
55+
*/}}
56+
{{- define "common.tplvalues.render" -}}
57+
{{- if typeIs "string" .value }}
58+
{{- tpl .value .context }}
59+
{{- else }}
60+
{{- tpl (.value | toYaml) .context }}
61+
{{- end }}
62+
{{- end -}}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{{- if .Values.devicePlugin.serviceMonitor.enabled }}
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: ServiceMonitor
4+
metadata:
5+
name: {{ include "hami-vgpu.device-plugin" . }}
6+
namespace: {{ include "hami-vgpu.namespace" . }}
7+
labels:
8+
{{- include "hami-vgpu.labels" . | nindent 4 }}
9+
{{- if .Values.devicePlugin.serviceMonitor.labels }}
10+
{{- include "common.tplvalues.render" ( dict "value" .Values.devicePlugin.serviceMonitor.labels "context" $ ) | nindent 4 }}
11+
{{- end }}
12+
spec:
13+
endpoints:
14+
- interval: {{ .Values.devicePlugin.serviceMonitor.interval }}
15+
path: /metrics
16+
port: monitorport
17+
jobLabel: app
18+
namespaceSelector:
19+
matchNames:
20+
- {{ include "hami-vgpu.namespace" . }}
21+
selector:
22+
matchLabels:
23+
app.kubernetes.io/component: hami-device-plugin
24+
{{- end }}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{{- if .Values.scheduler.serviceMonitor.enabled }}
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: ServiceMonitor
4+
metadata:
5+
name: {{ include "hami-vgpu.scheduler" . }}
6+
namespace: {{ include "hami-vgpu.namespace" . }}
7+
labels:
8+
{{- include "hami-vgpu.labels" . | nindent 4 }}
9+
{{- if .Values.scheduler.serviceMonitor.labels }}
10+
{{- include "common.tplvalues.render" ( dict "value" .Values.scheduler.serviceMonitor.labels "context" $ ) | nindent 4 }}
11+
{{- end }}
12+
spec:
13+
endpoints:
14+
- interval: {{ .Values.scheduler.serviceMonitor.interval }}
15+
path: /metrics
16+
port: monitor
17+
jobLabel: app
18+
namespaceSelector:
19+
matchNames:
20+
- {{ include "hami-vgpu.namespace" . }}
21+
selector:
22+
matchLabels:
23+
app.kubernetes.io/component: hami-scheduler
24+
{{- end }}

charts/hami/values.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ scheduler:
232232
monitorTargetPort: 9395
233233
labels: {}
234234
annotations: {}
235+
# enable serviceMonitor resource for hami scheduler
236+
serviceMonitor:
237+
enabled: false
238+
interval: "15s"
239+
labels:
240+
release: prometheus
235241

236242
devicePlugin:
237243
## @param image.registry devicePlugin image registry
@@ -296,6 +302,12 @@ devicePlugin:
296302
httpPort: 31992
297303
labels: {}
298304
annotations: {}
305+
# enable serviceMonitor resource for device plugin
306+
serviceMonitor:
307+
enabled: false
308+
interval: "15s"
309+
labels:
310+
release: prometheus
299311

300312
pluginPath: /var/lib/kubelet/device-plugins
301313
libPath: /usr/local/vgpu

0 commit comments

Comments
 (0)