Skip to content

Commit 3a410a1

Browse files
committed
add serviceMonitor resource for helm
Signed-off-by: calvin chen <wen.chen@dynamia.ai>
1 parent 2ccf412 commit 3a410a1

File tree

3 files changed

+86
-0
lines changed

3 files changed

+86
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{{- if .Values.devicePlugin.serviceMonitor.enabled }}
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: ServiceMonitor
4+
metadata:
5+
name: {{ include "hami-vgpu.device-plugin" . }}
6+
namespace: {{ include "hami-vgpu.namespace" . }}
7+
labels:
8+
{{- include "hami-vgpu.labels" . | nindent 4 }}
9+
release: prometheus
10+
spec:
11+
endpoints:
12+
- interval: 15s
13+
metricRelabelings:
14+
- action: replace
15+
sourceLabels:
16+
- podname
17+
targetLabel: exported_pod
18+
- action: replace
19+
sourceLabels:
20+
- podnamespace
21+
targetLabel: exported_namespace
22+
- sourceLabels: [podname]
23+
regex: ^(.+)-[a-z0-9]{9,}-[a-z0-9]+$
24+
targetLabel: deployment_name
25+
replacement: $1
26+
action: replace
27+
- sourceLabels: [podnamespace, deployment_name]
28+
separator: /
29+
targetLabel: deployment
30+
action: replace
31+
path: /metrics
32+
port: monitorport
33+
relabelings:
34+
- action: replace
35+
sourceLabels:
36+
- __meta_kubernetes_endpoint_node_name
37+
targetLabel: node
38+
jobLabel: app
39+
namespaceSelector:
40+
matchNames:
41+
- {{ include "hami-vgpu.namespace" . }}
42+
selector:
43+
matchLabels:
44+
app.kubernetes.io/component: hami-device-plugin
45+
{{- end }}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{{- if .Values.scheduler.serviceMonitor.enabled }}
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: ServiceMonitor
4+
metadata:
5+
name: {{ include "hami-vgpu.scheduler" . }}
6+
namespace: {{ include "hami-vgpu.namespace" . }}
7+
labels:
8+
{{- include "hami-vgpu.labels" . | nindent 4 }}
9+
release: prometheus
10+
spec:
11+
endpoints:
12+
- interval: 15s
13+
metricRelabelings:
14+
- action: replace
15+
sourceLabels:
16+
- podname
17+
targetLabel: exported_pod
18+
- action: replace
19+
sourceLabels:
20+
- podnamespace
21+
targetLabel: exported_namespace
22+
- action: replace
23+
sourceLabels:
24+
- nodeid
25+
targetLabel: node
26+
path: /metrics
27+
port: monitor
28+
jobLabel: app
29+
namespaceSelector:
30+
matchNames:
31+
- {{ include "hami-vgpu.namespace" . }}
32+
selector:
33+
matchLabels:
34+
app.kubernetes.io/component: hami-scheduler
35+
{{- end }}

charts/hami/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,9 @@ scheduler:
232232
monitorTargetPort: 9395
233233
labels: {}
234234
annotations: {}
235+
# enable serviceMonitor resource for hami scheduler
236+
serviceMonitor:
237+
enabled: false
235238

236239
devicePlugin:
237240
## @param image.registry devicePlugin image registry
@@ -296,6 +299,9 @@ devicePlugin:
296299
httpPort: 31992
297300
labels: {}
298301
annotations: {}
302+
# enable serviceMonitor resource for device plugin
303+
serviceMonitor:
304+
enabled: false
299305

300306
pluginPath: /var/lib/kubelet/device-plugins
301307
libPath: /usr/local/vgpu

0 commit comments

Comments
 (0)